In [1]:
import gc
import torch
import torch.optim as optim
from torchinfo import summary

from KSI_models import KSI, ModifiedKSI, CNN
from KSI_utils import load_KSI_data, train_model, test_model

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
n_embedding = 100
batch_size = 32
n_epochs = 25
save = True
profile = False
model_type = 'CNN'
early_stopping = 5

In [3]:
dir = 'data/original/'
loaders, wikivec, word_to_ix = load_KSI_data(dir=dir, 
                                             batch_size=batch_size, 
                                             train=True, 
                                             val=True, 
                                             test=True, 
                                             device=DEVICE)
train_dataloader = loaders['train']
val_dataloader = loaders['val']
test_dataloader = loaders['test']

n_wiki, n_vocab = wikivec.shape
n_words = len(word_to_ix)

In [4]:
# note_lengths = []
# for data in train_dataloader:
#     n, _, _ = data
#     note_lengths.append(n.shape[1])
# avg_note_size = np.round(np.array(note_lengths).mean()).astype(int)

avg_note_size = 2455

In [5]:
base_model = CNN(n_words, n_wiki, n_embedding)
base_model = base_model.to(DEVICE)
base_summary = summary(base_model, [(batch_size, avg_note_size), (batch_size, n_vocab)], dtypes=[torch.int, torch.float])

base_summary

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      --                        --
├─Embedding: 1-1                         [32, 2455, 100]           4,796,200
├─Dropout: 1-2                           [32, 2455, 100]           --
├─Conv1d: 1-3                            [32, 100, 2453]           30,100
├─Conv1d: 1-4                            [32, 100, 2452]           40,100
├─Conv1d: 1-5                            [32, 100, 2451]           50,100
├─Linear: 1-6                            [32, 344]                 103,544
Total params: 5,020,044
Trainable params: 5,020,044
Non-trainable params: 0
Total mult-adds (G): 9.60
Input size (MB): 1.87
Forward/backward pass size (MB): 251.25
Params size (MB): 20.08
Estimated Total Size (MB): 273.20

In [6]:
optimizer = optim.Adam(base_model.parameters())
prof_base = train_model(base_model, 
                        train_dataloader=train_dataloader,
                        val_dataloader=val_dataloader,
                        optimizer=optimizer,
                        n_epochs=n_epochs,
                        profile=profile, 
                        log_path=f'./log/{model_type}',
                        device=DEVICE,
                        early_stopping=early_stopping)

Epoch: 001, Train Recall@10: 0.6604, Val Recall@10: 0.6532, Train Micro F1: 0.5292, Val Micro F1: 0.5207, Train Macro F1: 0.0579, Val Macro F1: 0.0682, Train Micro AUC: 0.9578, Val Micro AUC: 0.9468, Train Macro AUC: 0.7287, Val Macro AUC: 0.7194
Epoch: 002, Train Recall@10: 0.7283, Val Recall@10: 0.7156, Train Micro F1: 0.5933, Val Micro F1: 0.5803, Train Macro F1: 0.0950, Val Macro F1: 0.1080, Train Micro AUC: 0.9696, Val Micro AUC: 0.9595, Train Macro AUC: 0.8358, Val Macro AUC: 0.7841
Epoch: 003, Train Recall@10: 0.7642, Val Recall@10: 0.7477, Train Micro F1: 0.6309, Val Micro F1: 0.6136, Train Macro F1: 0.1322, Val Macro F1: 0.1511, Train Micro AUC: 0.9749, Val Micro AUC: 0.9646, Train Macro AUC: 0.8968, Val Macro AUC: 0.8129
Epoch: 004, Train Recall@10: 0.7843, Val Recall@10: 0.7647, Train Micro F1: 0.6521, Val Micro F1: 0.6293, Train Macro F1: 0.1557, Val Macro F1: 0.1709, Train Micro AUC: 0.9784, Val Micro AUC: 0.9672, Train Macro AUC: 0.9326, Val Macro AUC: 0.8302
Epoch: 005, 

In [7]:
if save:
    torch.save(base_model, f'{dir}{model_type}_model.pt')
if profile:
    print(prof_base.key_averages(group_by_stack_n=5).table(sort_by='self_cuda_time_total'))

In [8]:
tt_recall_at_k, tt_micro_f1, tt_macro_f1, tt_micro_auc, tt_macro_auc, label_aucs_base = test_model(base_model, 
                                                                                                   test_dataloader, 
                                                                                                   wikivec,
                                                                                                   device=DEVICE)
print(f'Test Recall@10: {tt_recall_at_k:.4f}, Test Micro F1: {tt_micro_f1:.4f}, Test Macro F1: {tt_macro_f1:.4f}' +
      f', Test Micro AUC: {tt_micro_auc:.4f}, Test Macro AUC: {tt_macro_auc:.4f}')
del base_model
gc.collect()
if DEVICE == 'cuda':
    torch.cuda.empty_cache()

Test Recall@10: 0.7959, Test Micro F1: 0.6550, Test Macro F1: 0.2527, Test Micro AUC: 0.9753, Test Macro AUC: 0.8500


In [9]:
ksi = KSI(n_embedding, n_vocab)
ksi.to(DEVICE)
model = CNN(n_words, n_wiki, n_embedding, ksi=ksi)
model = model.to(DEVICE)
ksi_summary = summary(model, [(batch_size, avg_note_size), 
                              (batch_size, n_vocab),
                              (n_wiki, n_vocab)], 
                      dtypes=[torch.int, torch.float, torch.float])

ksi_summary

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      --                        --
├─KSI: 1-1                               --                        --
│    └─Linear: 2-1                       --                        (recursive)
│    └─Linear: 2-2                       --                        (recursive)
│    └─Linear: 2-3                       --                        (recursive)
├─Embedding: 1-2                         [32, 2455, 100]           4,796,200
├─Dropout: 1-3                           [32, 2455, 100]           --
├─Conv1d: 1-4                            [32, 100, 2453]           30,100
├─Conv1d: 1-5                            [32, 100, 2452]           40,100
├─Conv1d: 1-6                            [32, 100, 2451]           50,100
├─Linear: 1-7                            [32, 344]                 103,544
├─KSI: 1-1                               --                        --
│    └─Linear: 2-4                

In [10]:
optimizer = optim.Adam(model.parameters())
prof_ksi = train_model(model, 
                       train_dataloader=train_dataloader,
                       val_dataloader=val_dataloader,
                       wikivec=wikivec,
                       optimizer=optimizer,
                       n_epochs=n_epochs, 
                       profile=profile, 
                       log_path=f'./log/{model_type}_KSI',
                       device=DEVICE,
                       early_stopping=early_stopping)

Epoch: 001, Train Recall@10: 0.7342, Val Recall@10: 0.7240, Train Micro F1: 0.5626, Val Micro F1: 0.5508, Train Macro F1: 0.1101, Val Macro F1: 0.1261, Train Micro AUC: 0.9733, Val Micro AUC: 0.9648, Train Macro AUC: 0.8603, Val Macro AUC: 0.8419
Epoch: 002, Train Recall@10: 0.7826, Val Recall@10: 0.7668, Train Micro F1: 0.6270, Val Micro F1: 0.6073, Train Macro F1: 0.1825, Val Macro F1: 0.1984, Train Micro AUC: 0.9801, Val Micro AUC: 0.9716, Train Macro AUC: 0.9123, Val Macro AUC: 0.8735
Epoch: 003, Train Recall@10: 0.8036, Val Recall@10: 0.7793, Train Micro F1: 0.6557, Val Micro F1: 0.6294, Train Macro F1: 0.2318, Val Macro F1: 0.2234, Train Micro AUC: 0.9830, Val Micro AUC: 0.9738, Train Macro AUC: 0.9385, Val Macro AUC: 0.8843
Epoch: 004, Train Recall@10: 0.8163, Val Recall@10: 0.7860, Train Micro F1: 0.6722, Val Micro F1: 0.6401, Train Macro F1: 0.2689, Val Macro F1: 0.2516, Train Micro AUC: 0.9850, Val Micro AUC: 0.9748, Train Macro AUC: 0.9547, Val Macro AUC: 0.8895
Epoch: 005, 

In [11]:
if save:
    torch.save(model, f'{dir}{model_type}_KSI_model.pt')
if profile:
    print(prof_ksi.key_averages(group_by_stack_n=5).table(sort_by='self_cuda_time_total'))

In [12]:
tt_recall_at_k, tt_micro_f1, tt_macro_f1, tt_micro_auc, tt_macro_auc, label_aucs_ksi = test_model(model, 
                                                                                                  test_dataloader, 
                                                                                                  wikivec,
                                                                                                  device=DEVICE)
print(f'Test Recall@10: {tt_recall_at_k:.4f}, Test Micro F1: {tt_micro_f1:.4f}, Test Macro F1: {tt_macro_f1:.4f}' +
      f', Test Micro AUC: {tt_micro_auc:.4f}, Test Macro AUC: {tt_macro_auc:.4f}')
del model
gc.collect()
if DEVICE == 'cuda':
    torch.cuda.empty_cache()

Test Recall@10: 0.7946, Test Micro F1: 0.6482, Test Macro F1: 0.2571, Test Micro AUC: 0.9771, Test Macro AUC: 0.8923


In [13]:
# run modified KSI using frequency vectors rather than binary vectors
dir = 'data/original_freqs/'
loaders, wikivec, word_to_ix = load_KSI_data(dir=dir, 
                                             batch_size=batch_size, 
                                             train=True, 
                                             val=True, 
                                             test=True, 
                                             device=DEVICE)
train_dataloader = loaders['train']
val_dataloader = loaders['val']
test_dataloader = loaders['test']

n_wiki, n_vocab = wikivec.shape
n_words = len(word_to_ix)

In [18]:
mod_ksi = ModifiedKSI(n_embedding, n_vocab)
mod_ksi.to(DEVICE)
mod_model = CNN(n_words, n_wiki, n_embedding, ksi=mod_ksi)
mod_model = mod_model.to(DEVICE)
mod_summary = summary(mod_model, [(batch_size, avg_note_size), 
                                  (batch_size, n_vocab),
                                  (n_wiki, n_vocab)], 
                      dtypes=[torch.int, torch.float, torch.float])

mod_summary

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      --                        --
├─ModifiedKSI: 1-1                       --                        --
│    └─Linear: 2-1                       --                        (recursive)
│    └─Linear: 2-2                       --                        (recursive)
│    └─Linear: 2-3                       --                        (recursive)
│    └─Linear: 2-4                       --                        (recursive)
├─Embedding: 1-2                         [32, 2455, 100]           4,796,200
├─Dropout: 1-3                           [32, 2455, 100]           --
├─Conv1d: 1-4                            [32, 100, 2453]           30,100
├─Conv1d: 1-5                            [32, 100, 2452]           40,100
├─Conv1d: 1-6                            [32, 100, 2451]           50,100
├─Linear: 1-7                            [32, 344]                 103,544
├─ModifiedKSI: 1-1       

In [19]:
optimizer = optim.Adam(mod_model.parameters())
prof_mod_ksi = train_model(mod_model, 
                           train_dataloader=train_dataloader,
                           val_dataloader=val_dataloader,
                           wikivec=wikivec,
                           optimizer=optimizer,
                           n_epochs=n_epochs, 
                           profile=profile, 
                           log_path=f'./log/{model_type}_ModifiedKSI',
                           device=DEVICE,
                           early_stopping=early_stopping)

Epoch: 001, Train Recall@10: 0.7488, Val Recall@10: 0.7409, Train Micro F1: 0.5869, Val Micro F1: 0.5776, Train Macro F1: 0.1438, Val Macro F1: 0.1753, Train Micro AUC: 0.9756, Val Micro AUC: 0.9690, Train Macro AUC: 0.8802, Val Macro AUC: 0.8805
Epoch: 002, Train Recall@10: 0.7833, Val Recall@10: 0.7714, Train Micro F1: 0.6306, Val Micro F1: 0.6188, Train Macro F1: 0.1849, Val Macro F1: 0.2148, Train Micro AUC: 0.9802, Val Micro AUC: 0.9736, Train Macro AUC: 0.9105, Val Macro AUC: 0.8982
Epoch: 003, Train Recall@10: 0.7993, Val Recall@10: 0.7873, Train Micro F1: 0.6495, Val Micro F1: 0.6348, Train Macro F1: 0.2218, Val Macro F1: 0.2409, Train Micro AUC: 0.9824, Val Micro AUC: 0.9755, Train Macro AUC: 0.9309, Val Macro AUC: 0.9037
Epoch: 004, Train Recall@10: 0.8095, Val Recall@10: 0.7938, Train Micro F1: 0.6634, Val Micro F1: 0.6442, Train Macro F1: 0.2493, Val Macro F1: 0.2592, Train Micro AUC: 0.9841, Val Micro AUC: 0.9766, Train Macro AUC: 0.9482, Val Macro AUC: 0.9061
Epoch: 005, 

In [20]:
if save:
    torch.save(mod_model, f'{dir}{model_type}_ModifiedKSI_model.pt')
if profile:
    print(prof_mod_ksi.key_averages(group_by_stack_n=5).table(sort_by='self_cuda_time_total'))

In [21]:
tt_recall_at_k, tt_micro_f1, tt_macro_f1, tt_micro_auc, tt_macro_auc, label_aucs_mod = test_model(mod_model, 
                                                                                                  test_dataloader, 
                                                                                                  wikivec,
                                                                                                  device=DEVICE)
print(f'Test Recall@10: {tt_recall_at_k:.4f}, Test Micro F1: {tt_micro_f1:.4f}, Test Macro F1: {tt_macro_f1:.4f}' +
      f', Test Micro AUC: {tt_micro_auc:.4f}, Test Macro AUC: {tt_macro_auc:.4f}')
del mod_model
gc.collect()
if DEVICE == 'cuda':
    torch.cuda.empty_cache()

Test Recall@10: 0.8066, Test Micro F1: 0.6571, Test Macro F1: 0.3023, Test Micro AUC: 0.9799, Test Macro AUC: 0.9061


In [22]:
# run modified KSI using tfidf vectors rather than binary vectors
dir = 'data/original_tfidf/'
loaders, wikivec, word_to_ix = load_KSI_data(dir=dir, 
                                             batch_size=batch_size, 
                                             train=True, 
                                             val=True, 
                                             test=True, 
                                             device=DEVICE)
train_dataloader = loaders['train']
val_dataloader = loaders['val']
test_dataloader = loaders['test']

n_wiki, n_vocab = wikivec.shape
n_words = len(word_to_ix)

In [23]:
mod_ksi2 = ModifiedKSI(n_embedding, n_vocab)
mod_ksi2.to(DEVICE)
tfidf_model = CNN(n_words, n_wiki, n_embedding, ksi=mod_ksi2)
tfidf_model = tfidf_model.to(DEVICE)
tfidf_summary = summary(tfidf_model, [(batch_size, avg_note_size), 
                                      (batch_size, n_vocab),
                                      (n_wiki, n_vocab)], 
                        dtypes=[torch.int, torch.float, torch.float])

tfidf_summary

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      --                        --
├─ModifiedKSI: 1-1                       --                        --
│    └─Linear: 2-1                       --                        (recursive)
│    └─Linear: 2-2                       --                        (recursive)
│    └─Linear: 2-3                       --                        (recursive)
│    └─Linear: 2-4                       --                        (recursive)
├─Embedding: 1-2                         [32, 2455, 100]           4,796,200
├─Dropout: 1-3                           [32, 2455, 100]           --
├─Conv1d: 1-4                            [32, 100, 2453]           30,100
├─Conv1d: 1-5                            [32, 100, 2452]           40,100
├─Conv1d: 1-6                            [32, 100, 2451]           50,100
├─Linear: 1-7                            [32, 344]                 103,544
├─ModifiedKSI: 1-1       

In [24]:
optimizer = optim.Adam(tfidf_model.parameters())
prof_tfidf_ksi = train_model(tfidf_model, 
                             train_dataloader=train_dataloader,
                             val_dataloader=val_dataloader,
                             wikivec=wikivec,
                             optimizer=optimizer,
                             n_epochs=n_epochs, 
                             profile=profile, 
                             log_path=f'./log/{model_type}_ModifiedKSI_tfidf',
                             device=DEVICE,
                             early_stopping=early_stopping)

Epoch: 001, Train Recall@10: 0.7539, Val Recall@10: 0.7474, Train Micro F1: 0.5916, Val Micro F1: 0.5822, Train Macro F1: 0.1546, Val Macro F1: 0.1834, Train Micro AUC: 0.9764, Val Micro AUC: 0.9700, Train Macro AUC: 0.8784, Val Macro AUC: 0.8774
Epoch: 002, Train Recall@10: 0.7839, Val Recall@10: 0.7715, Train Micro F1: 0.6310, Val Micro F1: 0.6197, Train Macro F1: 0.1956, Val Macro F1: 0.2235, Train Micro AUC: 0.9807, Val Micro AUC: 0.9741, Train Macro AUC: 0.9125, Val Macro AUC: 0.8949
Epoch: 003, Train Recall@10: 0.8003, Val Recall@10: 0.7870, Train Micro F1: 0.6491, Val Micro F1: 0.6331, Train Macro F1: 0.2255, Val Macro F1: 0.2452, Train Micro AUC: 0.9829, Val Micro AUC: 0.9759, Train Macro AUC: 0.9331, Val Macro AUC: 0.9023
Epoch: 004, Train Recall@10: 0.8121, Val Recall@10: 0.7958, Train Micro F1: 0.6633, Val Micro F1: 0.6442, Train Macro F1: 0.2570, Val Macro F1: 0.2615, Train Micro AUC: 0.9846, Val Micro AUC: 0.9772, Train Macro AUC: 0.9478, Val Macro AUC: 0.9044
Epoch: 005, 

In [25]:
if save:
    torch.save(tfidf_model, f'{dir}{model_type}_ModifiedKSI_tfidf_model.pt')
if profile:
    print(prof_tfidf_ksi.key_averages(group_by_stack_n=5).table(sort_by='self_cuda_time_total'))

In [26]:
tt_recall_at_k, tt_micro_f1, tt_macro_f1, tt_micro_auc, tt_macro_auc, label_aucs_mod = test_model(tfidf_model, 
                                                                                                  test_dataloader, 
                                                                                                  wikivec,
                                                                                                  device=DEVICE)
print(f'Test Recall@10: {tt_recall_at_k:.4f}, Test Micro F1: {tt_micro_f1:.4f}, Test Macro F1: {tt_macro_f1:.4f}' +
      f', Test Micro AUC: {tt_micro_auc:.4f}, Test Macro AUC: {tt_macro_auc:.4f}')
del tfidf_model
gc.collect()
if DEVICE == 'cuda':
    torch.cuda.empty_cache()

Test Recall@10: 0.8060, Test Micro F1: 0.6548, Test Macro F1: 0.3156, Test Micro AUC: 0.9800, Test Macro AUC: 0.9003
