In [1]:
from ncn.model import *
from ncn.training import *

In [2]:
random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
# set up training
#data = get_bucketized_iterators("/home/jupyter/tutorials/seminar_kd/arxiv_data.csv",
data = get_bucketized_iterators("ncn/arxiv_data_2.csv",
                                batch_size = 64,
                                len_context_vocab = 20000,
                                len_title_vocab = 20000,
                                len_aut_vocab = 20000)
PAD_IDX = data.ttl.vocab.stoi['<pad>']
cntxt_vocab_len = len(data.cntxt.vocab)
aut_vocab_len = len(data.aut.vocab)
ttl_vocab_len = len(data.ttl.vocab)

INFO:ncn.data:Getting fields...
INFO:ncn.data:Loading dataset...
INFO:ncn.data:Building vocab...


In [4]:
net = NeuralCitationNetwork(context_filters=[4,4,5,6,7],
                            title_filters=[2,2,3],  #Thi added
                            author_filters=[1,2],
                            context_vocab_size=cntxt_vocab_len,
                            title_vocab_size=ttl_vocab_len,
                            author_vocab_size=aut_vocab_len,
                            pad_idx=PAD_IDX,
                            num_filters=128,
                            authors=True, 
                            embed_size=128,
                            num_layers=1,
                            hidden_size=128,
                            dropout_p=0.2,
                            show_attention=False)
net.to(DEVICE)



NeuralCitationNetwork(
  (encoder): NCNEncoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (context_embedding): Embedding(20002, 128, padding_idx=1)
    (context_encoder): TDNNEncoder(
      (encoder): ModuleList(
        (0): TDNN(
          (conv): Conv2d(1, 128, kernel_size=(128, 4), stride=(1, 1), bias=False)
        )
        (1): TDNN(
          (conv): Conv2d(1, 128, kernel_size=(128, 4), stride=(1, 1), bias=False)
        )
        (2): TDNN(
          (conv): Conv2d(1, 128, kernel_size=(128, 5), stride=(1, 1), bias=False)
        )
        (3): TDNN(
          (conv): Conv2d(1, 128, kernel_size=(128, 6), stride=(1, 1), bias=False)
        )
        (4): TDNN(
          (conv): Conv2d(1, 128, kernel_size=(128, 7), stride=(1, 1), bias=False)
        )
      )
      (fc): Linear(in_features=640, out_features=640, bias=True)
    )
    (title_embedding): Embedding(20004, 128, padding_idx=1)
    (title_encoder): TDNNEncoder(
      (encoder): ModuleList(
        (0): TDNN(
    

In [5]:
train_losses, valid_losses = train_model(model = net, 
                                         train_iterator = data.train_iter, 
                                         valid_iterator = data.valid_iter,
                                         lr = 0.001,
                                         pad = PAD_IDX,
                                         model_name = "embed_128_hid_256_1_GRU")

INFO:ncn.training:INITIALIZING NEURAL CITATION NETWORK WITH AUTHORS = True
Running on: cpu
Number of model parameters: 19,452,964
Encoders: # Filters = 128, Context filter length = [4, 4, 5, 6, 7],  Author filter length = [1, 2]
Embeddings: Dimension = 128, Pad index = 1, Context vocab = 20002, Author vocab = 20002, Title vocab = 20004
Decoder: # GRU cells = 1, Hidden size = 128
Parameters: Dropout = 0.2, Show attention = False
-------------------------------------------------
TRAINING SETTINGS
Seed = 34, # Epochs = 20, Batch size = 64, Initial lr = 0.001


Epochs:   0%|          | 0/20 [00:00<?, ?it/s]

Thi epoch =  0


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 01 | Time: 28m 43s
INFO:ncn.training:	Train Loss: 5.820
INFO:ncn.training:	 Val. Loss: 5.098
Thi epoch =  1


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 02 | Time: 25m 59s
INFO:ncn.training:	Train Loss: 4.764
INFO:ncn.training:	 Val. Loss: 4.591
Thi epoch =  2


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 03 | Time: 26m 0s
INFO:ncn.training:	Train Loss: 4.290
INFO:ncn.training:	 Val. Loss: 4.354
Thi epoch =  3


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 04 | Time: 26m 10s
INFO:ncn.training:	Train Loss: 4.005
INFO:ncn.training:	 Val. Loss: 4.209
Thi epoch =  4


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 05 | Time: 26m 6s
INFO:ncn.training:	Train Loss: 3.812
INFO:ncn.training:	 Val. Loss: 4.120
Thi epoch =  5


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 06 | Time: 26m 2s
INFO:ncn.training:	Train Loss: 3.670
INFO:ncn.training:	 Val. Loss: 4.047
Thi epoch =  6


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 07 | Time: 26m 5s
INFO:ncn.training:	Train Loss: 3.557
INFO:ncn.training:	 Val. Loss: 3.999
Thi epoch =  7


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 08 | Time: 26m 3s
INFO:ncn.training:	Train Loss: 3.464
INFO:ncn.training:	 Val. Loss: 3.968
Thi epoch =  8


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 09 | Time: 26m 7s
INFO:ncn.training:	Train Loss: 3.388
INFO:ncn.training:	 Val. Loss: 3.939
Thi epoch =  9


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 10 | Time: 26m 5s
INFO:ncn.training:	Train Loss: 3.320
INFO:ncn.training:	 Val. Loss: 3.911
Thi epoch =  10


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 11 | Time: 26m 4s
INFO:ncn.training:	Train Loss: 3.261
INFO:ncn.training:	 Val. Loss: 3.890
Thi epoch =  11


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 12 | Time: 35m 42s
INFO:ncn.training:	Train Loss: 3.209
INFO:ncn.training:	 Val. Loss: 3.868
Thi epoch =  12


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 13 | Time: 31m 35s
INFO:ncn.training:	Train Loss: 3.163
INFO:ncn.training:	 Val. Loss: 3.848
Thi epoch =  13


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 14 | Time: 26m 20s
INFO:ncn.training:	Train Loss: 3.118
INFO:ncn.training:	 Val. Loss: 3.838
Thi epoch =  14


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 15 | Time: 26m 18s
INFO:ncn.training:	Train Loss: 3.080
INFO:ncn.training:	 Val. Loss: 3.825
Thi epoch =  15


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 16 | Time: 26m 22s
INFO:ncn.training:	Train Loss: 3.044
INFO:ncn.training:	 Val. Loss: 3.820
Thi epoch =  16


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 17 | Time: 26m 21s
INFO:ncn.training:	Train Loss: 3.012
INFO:ncn.training:	 Val. Loss: 3.796
Thi epoch =  17


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 18 | Time: 26m 40s
INFO:ncn.training:	Train Loss: 2.981
INFO:ncn.training:	 Val. Loss: 3.793
Thi epoch =  18


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 19 | Time: 26m 25s
INFO:ncn.training:	Train Loss: 2.950
INFO:ncn.training:	 Val. Loss: 3.795
Thi epoch =  19


Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Training batches:   0%|          | 0/1313 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/165 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 20 | Time: 26m 24s
INFO:ncn.training:	Train Loss: 2.924
INFO:ncn.training:	 Val. Loss: 3.780
