In [1]:
from ncn.model import *
from ncn.training import *

In [2]:
random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
# set up training
#data = get_bucketized_iterators("/home/jupyter/tutorials/seminar_kd/arxiv_data.csv",
data = get_bucketized_iterators("ncn/arxiv_data.csv",
                                batch_size = 64,
                                len_context_vocab = 20000,
                                len_title_vocab = 20000,
                                len_aut_vocab = 20000)
PAD_IDX = data.ttl.vocab.stoi['<pad>']
cntxt_vocab_len = len(data.cntxt.vocab)
aut_vocab_len = len(data.aut.vocab)
ttl_vocab_len = len(data.ttl.vocab)

INFO:ncn.data:Getting fields...
INFO:ncn.data:Loading dataset...
INFO:ncn.data:Building vocab...


In [4]:
net = NeuralCitationNetwork(context_filters=[4,4,5,6,7],
                            author_filters=[1,2],
                            context_vocab_size=cntxt_vocab_len,
                            title_vocab_size=ttl_vocab_len,
                            author_vocab_size=aut_vocab_len,
                            pad_idx=PAD_IDX,
                            num_filters=256,
                            authors=True, 
                            embed_size=128,
                            num_layers=1,
                            hidden_size=256,
                            dropout_p=0.2,
                            show_attention=False)
net.to(DEVICE)



NeuralCitationNetwork(
  (encoder): NCNEncoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (context_embedding): Embedding(20002, 128, padding_idx=1)
    (context_encoder): TDNNEncoder(
      (encoder): ModuleList(
        (0): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(128, 4), stride=(1, 1), bias=False)
        )
        (1): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(128, 4), stride=(1, 1), bias=False)
        )
        (2): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(128, 5), stride=(1, 1), bias=False)
        )
        (3): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(128, 6), stride=(1, 1), bias=False)
        )
        (4): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(128, 7), stride=(1, 1), bias=False)
        )
      )
      (fc): Linear(in_features=1280, out_features=1280, bias=True)
    )
    (author_embedding): Embedding(20002, 128, padding_idx=1)
    (citing_author_encoder): TDNNEncoder(
      (encoder): ModuleList(
        (0):

In [5]:
train_losses, valid_losses = train_model(model = net, 
                                         train_iterator = data.train_iter, 
                                         valid_iterator = data.valid_iter,
                                         lr = 0.001,
                                         pad = PAD_IDX,
                                         model_name = "embed_128_hid_256_1_GRU")

INFO:ncn.training:INITIALIZING NEURAL CITATION NETWORK WITH AUTHORS = True
Running on: cpu
Number of model parameters: 24,341,796
Encoders: # Filters = 256, Context filter length = [4, 4, 5, 6, 7],  Context filter length = [1, 2]
Embeddings: Dimension = 128, Pad index = 1, Context vocab = 20002, Author vocab = 20002, Title vocab = 20004
Decoder: # GRU cells = 1, Hidden size = 256
Parameters: Dropout = 0.2, Show attention = False
-------------------------------------------------
TRAINING SETTINGS
Seed = 34, # Epochs = 20, Batch size = 64, Initial lr = 0.001


Epochs:   0%|          | 0/20 [00:00<?, ?it/s]

Training batches:   0%|          | 0/6280 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/785 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 01 | Time: 232m 40s
INFO:ncn.training:	Train Loss: 5.028
INFO:ncn.training:	 Val. Loss: 4.284


Training batches:   0%|          | 0/6280 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/785 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 02 | Time: 218m 58s
INFO:ncn.training:	Train Loss: 4.196
INFO:ncn.training:	 Val. Loss: 3.971


Training batches:   0%|          | 0/6280 [00:00<?, ?it/s]

Evaluating batches:   0%|          | 0/785 [00:00<?, ?it/s]

INFO:ncn.training:Epoch: 03 | Time: 237m 36s
INFO:ncn.training:	Train Loss: 3.949
INFO:ncn.training:	 Val. Loss: 3.824


Training batches:   0%|          | 0/6280 [00:00<?, ?it/s]

KeyboardInterrupt: 