In [1]:
import gc
import torch
import torch.optim as optim
from torchinfo import summary

from KSI_models import KSI, ModifiedKSI, LSTM
from KSI_utils import load_KSI_data, train_model, test_model

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
n_embedding = 100
n_hidden = 100 # 200 in paper, but too intensive for my machine
batch_size = 32
n_epochs = 25
save = True
profile = False
model_type = 'LSTM'
early_stopping = 5

In [3]:
dir = 'data/original/'
loaders, wikivec, word_to_ix = load_KSI_data(dir=dir, 
                                             batch_size=batch_size, 
                                             train=True, 
                                             val=True, 
                                             test=True, 
                                             device=DEVICE)
train_dataloader = loaders['train']
val_dataloader = loaders['val']
test_dataloader = loaders['test']

n_wiki, n_vocab = wikivec.shape
n_words = len(word_to_ix)

In [3]:
# note_lengths = []
# for data in train_dataloader:
#     n, _, _ = data
#     note_lengths.append(n.shape[1])
# avg_note_size = np.round(np.array(note_lengths).mean()).astype(int)

avg_note_size = 2455

In [5]:
base_model = LSTM(n_words, n_wiki, n_embedding, n_hidden, batch_size)
base_model = base_model.to(DEVICE)
base_summary = summary(base_model, [(batch_size, avg_note_size), 
                                    (batch_size, n_vocab)], 
                       dtypes=[torch.int, torch.float])

base_summary

Layer (type:depth-idx)                   Output Shape              Param #
LSTM                                     --                        --
├─Embedding: 1-1                         [32, 2455, 100]           4,796,200
├─Dropout: 1-2                           [2455, 32, 100]           --
├─LSTM: 1-3                              [2455, 32, 100]           80,800
├─Linear: 1-4                            [32, 344]                 34,744
Total params: 4,911,744
Trainable params: 4,911,744
Non-trainable params: 0
Total mult-adds (G): 6.50
Input size (MB): 1.87
Forward/backward pass size (MB): 125.78
Params size (MB): 19.65
Estimated Total Size (MB): 147.30

In [6]:
optimizer = optim.Adam(base_model.parameters())
prof_base = train_model(base_model, 
                        train_dataloader=train_dataloader,
                        val_dataloader=val_dataloader,
                        optimizer=optimizer,
                        n_epochs=n_epochs,
                        profile=profile, 
                        log_path=f'./log/{model_type}',
                        device=DEVICE,
                        init_hidden=True,
                        early_stopping=early_stopping)

Epoch: 001, Train Recall@10: 0.4447, Val Recall@10: 0.4500, Train Micro F1: 0.0000, Val Micro F1: 0.0000, Train Macro F1: 0.0000, Val Macro F1: 0.0000, Train Micro AUC: 0.9265, Val Micro AUC: 0.9120, Train Macro AUC: 0.5737, Val Macro AUC: 0.5759
Epoch: 002, Train Recall@10: 0.4519, Val Recall@10: 0.4566, Train Micro F1: 0.0000, Val Micro F1: 0.0000, Train Macro F1: 0.0000, Val Macro F1: 0.0000, Train Micro AUC: 0.9292, Val Micro AUC: 0.9150, Train Macro AUC: 0.5886, Val Macro AUC: 0.5944
Epoch: 003, Train Recall@10: 0.4587, Val Recall@10: 0.4627, Train Micro F1: 0.0395, Val Micro F1: 0.0419, Train Macro F1: 0.0018, Val Macro F1: 0.0023, Train Micro AUC: 0.9310, Val Micro AUC: 0.9170, Train Macro AUC: 0.6103, Val Macro AUC: 0.6185
Epoch: 004, Train Recall@10: 0.4833, Val Recall@10: 0.4871, Train Micro F1: 0.1279, Val Micro F1: 0.1276, Train Macro F1: 0.0051, Val Macro F1: 0.0061, Train Micro AUC: 0.9355, Val Micro AUC: 0.9223, Train Macro AUC: 0.6526, Val Macro AUC: 0.6536
Epoch: 005, 

In [7]:
if save:
    torch.save(base_model, f'{dir}{model_type}_model.pt')
if profile:
    print(prof_base.key_averages(group_by_stack_n=5).table(sort_by='self_cuda_time_total'))

In [8]:
tt_recall_at_k, tt_micro_f1, tt_macro_f1, tt_micro_auc, tt_macro_auc, label_aucs_base = test_model(base_model, 
                                                                                                   test_dataloader, 
                                                                                                   wikivec,
                                                                                                   device=DEVICE,
                                                                                                   init_hidden=True)
print(f'Test Recall@10: {tt_recall_at_k:.4f}, Test Micro F1: {tt_micro_f1:.4f}, Test Macro F1: {tt_macro_f1:.4f}' +
      f', Test Micro AUC: {tt_micro_auc:.4f}, Test Macro AUC: {tt_macro_auc:.4f}')
del base_model
gc.collect()
if DEVICE == 'cuda':
    torch.cuda.empty_cache()

Test Recall@10: 0.7142, Test Micro F1: 0.5831, Test Macro F1: 0.0813, Test Micro AUC: 0.9654, Test Macro AUC: 0.8224


In [9]:
ksi = KSI(n_embedding, n_vocab)
ksi.to(DEVICE)
model = LSTM(n_words, n_wiki, n_embedding, n_hidden, ksi=ksi)
model = model.to(DEVICE)
ksi_summary = summary(model, [(batch_size, avg_note_size), 
                              (batch_size, n_vocab),
                              (n_wiki, n_vocab)], 
                      dtypes=[torch.int, torch.float, torch.float])

ksi_summary

Layer (type:depth-idx)                   Output Shape              Param #
LSTM                                     --                        --
├─KSI: 1-1                               --                        --
│    └─Linear: 2-1                       --                        (recursive)
│    └─Linear: 2-2                       --                        (recursive)
│    └─Linear: 2-3                       --                        (recursive)
├─Embedding: 1-2                         [32, 2455, 100]           4,796,200
├─Dropout: 1-3                           [2455, 32, 100]           --
├─LSTM: 1-4                              [2455, 32, 100]           80,800
├─Linear: 1-5                            [32, 344]                 34,744
├─KSI: 1-1                               --                        --
│    └─Linear: 2-4                       [32, 344, 100]            1,217,400
│    └─Linear: 2-5                       [32, 344, 100]            10,100
│    └─Linear: 2-6              

In [10]:
optimizer = optim.Adam(model.parameters())
prof_ksi = train_model(model, 
                       train_dataloader=train_dataloader,
                       val_dataloader=val_dataloader,
                       wikivec=wikivec,
                       optimizer=optimizer,
                       n_epochs=n_epochs, 
                       profile=profile, 
                       log_path=f'./log/{model_type}_KSI',
                       device=DEVICE,
                       init_hidden=True,
                       early_stopping=early_stopping)

Epoch: 001, Train Recall@10: 0.6637, Val Recall@10: 0.6596, Train Micro F1: 0.4001, Val Micro F1: 0.3931, Train Macro F1: 0.0586, Val Macro F1: 0.0697, Train Micro AUC: 0.9673, Val Micro AUC: 0.9592, Train Macro AUC: 0.8271, Val Macro AUC: 0.8142
Epoch: 002, Train Recall@10: 0.7000, Val Recall@10: 0.6918, Train Micro F1: 0.4708, Val Micro F1: 0.4597, Train Macro F1: 0.0888, Val Macro F1: 0.1007, Train Micro AUC: 0.9721, Val Micro AUC: 0.9636, Train Macro AUC: 0.8625, Val Macro AUC: 0.8371
Epoch: 003, Train Recall@10: 0.7356, Val Recall@10: 0.7224, Train Micro F1: 0.5342, Val Micro F1: 0.5202, Train Macro F1: 0.1227, Val Macro F1: 0.1339, Train Micro AUC: 0.9763, Val Micro AUC: 0.9677, Train Macro AUC: 0.8863, Val Macro AUC: 0.8515
Epoch: 004, Train Recall@10: 0.7603, Val Recall@10: 0.7424, Train Micro F1: 0.5753, Val Micro F1: 0.5559, Train Macro F1: 0.1569, Val Macro F1: 0.1573, Train Micro AUC: 0.9793, Val Micro AUC: 0.9703, Train Macro AUC: 0.9033, Val Macro AUC: 0.8617
Epoch: 005, 

In [11]:
if save:
    torch.save(model, f'{dir}{model_type}_KSI_model.pt')
if profile:
    print(prof_ksi.key_averages(group_by_stack_n=5).table(sort_by='self_cuda_time_total'))

In [12]:
tt_recall_at_k, tt_micro_f1, tt_macro_f1, tt_micro_auc, tt_macro_auc, label_aucs_ksi = test_model(model, 
                                                                                                  test_dataloader, 
                                                                                                  wikivec,
                                                                                                  device=DEVICE,
                                                                                                  init_hidden=True)
print(f'Test Recall@10: {tt_recall_at_k:.4f}, Test Micro F1: {tt_micro_f1:.4f}, Test Macro F1: {tt_macro_f1:.4f}' +
      f', Test Micro AUC: {tt_micro_auc:.4f}, Test Macro AUC: {tt_macro_auc:.4f}')
del model
gc.collect()
if DEVICE == 'cuda':
    torch.cuda.empty_cache()

Test Recall@10: 0.7622, Test Micro F1: 0.5933, Test Macro F1: 0.1892, Test Micro AUC: 0.9743, Test Macro AUC: 0.8796


In [13]:
# run modified KSI using frequency vectors rather than binary vectors
dir = 'data/original_freqs/'
loaders, wikivec, word_to_ix = load_KSI_data(dir=dir, 
                                             batch_size=batch_size, 
                                             train=True, 
                                             val=True, 
                                             test=True, 
                                             device=DEVICE)
train_dataloader = loaders['train']
val_dataloader = loaders['val']
test_dataloader = loaders['test']

n_wiki, n_vocab = wikivec.shape
n_words = len(word_to_ix)

In [14]:
mod_ksi = ModifiedKSI(n_embedding, n_vocab)
mod_ksi.to(DEVICE)
mod_model = LSTM(n_words, n_wiki, n_embedding, n_hidden, ksi=mod_ksi)
mod_model = mod_model.to(DEVICE)
mod_summary = summary(mod_model, [(batch_size, avg_note_size), 
                                  (batch_size, n_vocab),
                                  (n_wiki, n_vocab)], 
                      dtypes=[torch.int, torch.float, torch.float])

mod_summary

Layer (type:depth-idx)                   Output Shape              Param #
LSTM                                     --                        --
├─ModifiedKSI: 1-1                       --                        --
│    └─Linear: 2-1                       --                        (recursive)
│    └─Linear: 2-2                       --                        (recursive)
│    └─Linear: 2-3                       --                        (recursive)
│    └─Linear: 2-4                       --                        (recursive)
├─Embedding: 1-2                         [32, 2455, 100]           4,796,200
├─Dropout: 1-3                           [2455, 32, 100]           --
├─LSTM: 1-4                              [2455, 32, 100]           80,800
├─Linear: 1-5                            [32, 344]                 34,744
├─ModifiedKSI: 1-1                       --                        --
│    └─Linear: 2-5                       [32, 344, 12173, 1]       2
│    └─Linear: 2-6                 

In [15]:
optimizer = optim.Adam(mod_model.parameters())
prof_mod_ksi = train_model(mod_model, 
                           train_dataloader=train_dataloader,
                           val_dataloader=val_dataloader,
                           wikivec=wikivec,
                           optimizer=optimizer,
                           n_epochs=n_epochs, 
                           profile=profile, 
                           log_path=f'./log/{model_type}_ModifiedKSI',
                           device=DEVICE,
                           init_hidden=True,
                           early_stopping=early_stopping)

Epoch: 001, Train Recall@10: 0.6959, Val Recall@10: 0.6950, Train Micro F1: 0.4661, Val Micro F1: 0.4611, Train Macro F1: 0.0663, Val Macro F1: 0.0821, Train Micro AUC: 0.9708, Val Micro AUC: 0.9646, Train Macro AUC: 0.8380, Val Macro AUC: 0.8439
Epoch: 002, Train Recall@10: 0.7223, Val Recall@10: 0.7196, Train Micro F1: 0.5010, Val Micro F1: 0.4954, Train Macro F1: 0.1046, Val Macro F1: 0.1285, Train Micro AUC: 0.9748, Val Micro AUC: 0.9690, Train Macro AUC: 0.8624, Val Macro AUC: 0.8605
Epoch: 003, Train Recall@10: 0.7405, Val Recall@10: 0.7356, Train Micro F1: 0.5245, Val Micro F1: 0.5162, Train Macro F1: 0.1325, Val Macro F1: 0.1497, Train Micro AUC: 0.9770, Val Micro AUC: 0.9713, Train Macro AUC: 0.8767, Val Macro AUC: 0.8694
Epoch: 004, Train Recall@10: 0.7575, Val Recall@10: 0.7528, Train Micro F1: 0.5501, Val Micro F1: 0.5429, Train Macro F1: 0.1538, Val Macro F1: 0.1657, Train Micro AUC: 0.9789, Val Micro AUC: 0.9731, Train Macro AUC: 0.8889, Val Macro AUC: 0.8774
Epoch: 005, 

In [16]:
if save:
    torch.save(mod_model, f'{dir}{model_type}_ModifiedKSI_model.pt')
if profile:
    print(prof_mod_ksi.key_averages(group_by_stack_n=5).table(sort_by='self_cuda_time_total'))

In [17]:
tt_recall_at_k, tt_micro_f1, tt_macro_f1, tt_micro_auc, tt_macro_auc, label_aucs_mod = test_model(mod_model, 
                                                                                                  test_dataloader, 
                                                                                                  wikivec,
                                                                                                  device=DEVICE,
                                                                                                  init_hidden=True)
print(f'Test Recall@10: {tt_recall_at_k:.4f}, Test Micro F1: {tt_micro_f1:.4f}, Test Macro F1: {tt_macro_f1:.4f}' +
      f', Test Micro AUC: {tt_micro_auc:.4f}, Test Macro AUC: {tt_macro_auc:.4f}')
del mod_model
gc.collect()
if DEVICE == 'cuda':
    torch.cuda.empty_cache()

Test Recall@10: 0.7940, Test Micro F1: 0.6226, Test Macro F1: 0.2437, Test Micro AUC: 0.9798, Test Macro AUC: 0.9000


In [4]:
# run modified KSI using tfidf vectors rather than binary vectors
dir = 'data/original_tfidf/'
loaders, wikivec, word_to_ix = load_KSI_data(dir=dir, 
                                             batch_size=batch_size, 
                                             train=True, 
                                             val=True, 
                                             test=True, 
                                             device=DEVICE)
train_dataloader = loaders['train']
val_dataloader = loaders['val']
test_dataloader = loaders['test']

n_wiki, n_vocab = wikivec.shape
n_words = len(word_to_ix)

In [5]:
mod_ksi2 = ModifiedKSI(n_embedding, n_vocab)
mod_ksi2.to(DEVICE)
tfidf_model = LSTM(n_words, n_wiki, n_embedding, n_hidden, ksi=mod_ksi2)
tfidf_model = tfidf_model.to(DEVICE)
tfidf_summary = summary(tfidf_model, [(batch_size, avg_note_size), 
                                      (batch_size, n_vocab),
                                      (n_wiki, n_vocab)], 
                        dtypes=[torch.int, torch.float, torch.float])

tfidf_summary

Layer (type:depth-idx)                   Output Shape              Param #
LSTM                                     --                        --
├─ModifiedKSI: 1-1                       --                        --
│    └─Linear: 2-1                       --                        (recursive)
│    └─Linear: 2-2                       --                        (recursive)
│    └─Linear: 2-3                       --                        (recursive)
│    └─Linear: 2-4                       --                        (recursive)
├─Embedding: 1-2                         [32, 2455, 100]           4,796,200
├─Dropout: 1-3                           [2455, 32, 100]           --
├─LSTM: 1-4                              [2455, 32, 100]           80,800
├─Linear: 1-5                            [32, 344]                 34,744
├─ModifiedKSI: 1-1                       --                        --
│    └─Linear: 2-5                       [32, 344, 12173, 1]       2
│    └─Linear: 2-6                 

In [6]:
optimizer = optim.Adam(tfidf_model.parameters())
prof_tfidf_ksi = train_model(tfidf_model, 
                             train_dataloader=train_dataloader,
                             val_dataloader=val_dataloader,
                             wikivec=wikivec,
                             optimizer=optimizer,
                             n_epochs=n_epochs, 
                             profile=profile, 
                             log_path=f'./log/{model_type}_ModifiedKSI_tfidf',
                             device=DEVICE,
                             init_hidden=True,
                             early_stopping=early_stopping)

Epoch: 001, Train Recall@10: 0.7107, Val Recall@10: 0.7099, Train Micro F1: 0.4842, Val Micro F1: 0.4788, Train Macro F1: 0.0854, Val Macro F1: 0.1029, Train Micro AUC: 0.9733, Val Micro AUC: 0.9675, Train Macro AUC: 0.8474, Val Macro AUC: 0.8533
Epoch: 002, Train Recall@10: 0.7286, Val Recall@10: 0.7252, Train Micro F1: 0.5027, Val Micro F1: 0.4965, Train Macro F1: 0.1223, Val Macro F1: 0.1403, Train Micro AUC: 0.9756, Val Micro AUC: 0.9697, Train Macro AUC: 0.8641, Val Macro AUC: 0.8623
Epoch: 003, Train Recall@10: 0.7407, Val Recall@10: 0.7352, Train Micro F1: 0.5210, Val Micro F1: 0.5133, Train Macro F1: 0.1462, Val Macro F1: 0.1613, Train Micro AUC: 0.9772, Val Micro AUC: 0.9712, Train Macro AUC: 0.8744, Val Macro AUC: 0.8687
Epoch: 004, Train Recall@10: 0.7533, Val Recall@10: 0.7465, Train Micro F1: 0.5416, Val Micro F1: 0.5323, Train Macro F1: 0.1646, Val Macro F1: 0.1711, Train Micro AUC: 0.9787, Val Micro AUC: 0.9727, Train Macro AUC: 0.8859, Val Macro AUC: 0.8767
Epoch: 005, 

In [7]:
if save:
    torch.save(tfidf_model, f'{dir}{model_type}_ModifiedKSI_tfidf_model.pt')
if profile:
    print(prof_tfidf_ksi.key_averages(group_by_stack_n=5).table(sort_by='self_cuda_time_total'))

In [8]:
tt_recall_at_k, tt_micro_f1, tt_macro_f1, tt_micro_auc, tt_macro_auc, label_aucs_mod = test_model(tfidf_model, 
                                                                                                  test_dataloader, 
                                                                                                  wikivec,
                                                                                                  device=DEVICE,
                                                                                                  init_hidden=True)
print(f'Test Recall@10: {tt_recall_at_k:.4f}, Test Micro F1: {tt_micro_f1:.4f}, Test Macro F1: {tt_macro_f1:.4f}' +
      f', Test Micro AUC: {tt_micro_auc:.4f}, Test Macro AUC: {tt_macro_auc:.4f}')
del tfidf_model
gc.collect()
if DEVICE == 'cuda':
    torch.cuda.empty_cache()

Test Recall@10: 0.7887, Test Micro F1: 0.6141, Test Macro F1: 0.2484, Test Micro AUC: 0.9792, Test Macro AUC: 0.8961
