In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import tensorflow as tf

from model import SiameseNet

from data_utils import QuoraDataset, DataIterator

from config_msrp import ConfigMSRP
from config import Config
from embeddings import load_embeddings

  return f(*args, **kwds)
Using TensorFlow backend.


In [3]:
### Loading config and pretrained Glove embeddings
config = ConfigMSRP()
loaded_embeddings, (w2idx, idx2w) = load_embeddings(config.glove_filename, binary=False)

Loading from saved word_embeddings
Loading vocab


In [4]:
### Loading Quora Datasets
qd_train = QuoraDataset(config.train_filename, save_path=config.train_save)
w2idx_train, idx2w_train = qd_train.w2idx, qd_train.idx2w

embeddings = np.random.normal(scale=0.001, size=(len(w2idx_train), config.we_dim))

In [5]:
for w, i in w2idx_train.items():
    idx = w2idx.get(w)
    if idx is not None:
        embeddings[i] = loaded_embeddings[idx]

In [6]:
qd_dev  = QuoraDataset(config.dev_filename, w2idx=w2idx_train, save_path=config.dev_save)
qd_test = QuoraDataset(config.test_filename, w2idx=w2idx_train, save_path=config.test_save)

In [7]:
train_data = qd_train.data(padlen=config.padlen)
dev_data = qd_dev.data(padlen=config.padlen)
test_data = qd_test.data(padlen=config.padlen)

In [8]:
np.array(dev_data[0])[:10, :10]

array([[    0,    21,  1507,  1847,  2499,     5,  3800,     0,     5,
          157],
       [   10,   809,    21,   247,   403,     0,   110,   136,   368,
          875],
       [ 1325,    25,    10,   284,  1388,    29,  1389,  1390,   157,
         1391],
       [   59,   268,   269,  6093,  1876,    27,    10,  2991,    13,
         7215],
       [   10,   117,  1157,   115,     0,    10,  2247,    13,    10,
         6394],
       [   10, 12243,   202,   153,   310, 14590,   136,  2872,  4481,
          304],
       [ 1164,     0,     0,   110,    10,     0,  5445,    38,     0,
         8756],
       [   59,   600,  1315,  1832,   690,   398,    10,     0,    80,
         3385],
       [12912,   157,    59,   187,  9940,   656,   136, 10107, 14690,
           25],
       [   10,   920,   348,   191,   349,    21,   350,   145,   351,
          352]])

In [None]:
### SiameseNet
model = SiameseNet(config, embeddings)
model.build()

In [None]:
model.train(train_data, dev_data, test_data)

  0%|          | 0/16 [00:00<?, ?it/s]

Training in hid-1024_feats-dist_lr-adam-0.0005-relu_bs-256_drop-0.0_bn-1_emb-0_padlen-40/msrp/
Epoch 1/50 :
Step 0/16
dev acc 66.96


  6%|▋         | 1/16 [00:02<00:34,  2.31s/it]

test acc 66.96


100%|██████████| 16/16 [00:08<00:00,  2.48it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

New best score on dev !
Epoch 2/50 :
Step 0/16
dev acc 72.58


  6%|▋         | 1/16 [00:02<00:31,  2.08s/it]

test acc 72.58


100%|██████████| 16/16 [00:08<00:00,  2.46it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

New best score on dev !
Epoch 3/50 :
Step 0/16
dev acc 73.62


  6%|▋         | 1/16 [00:01<00:29,  1.95s/it]

test acc 73.62


100%|██████████| 16/16 [00:08<00:00,  2.42it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

New best score on dev !
Epoch 4/50 :
Step 0/16
dev acc 72.58


  6%|▋         | 1/16 [00:02<00:30,  2.04s/it]

test acc 72.58


100%|██████████| 16/16 [00:08<00:00,  2.40it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch 5/50 :
Step 0/16
dev acc 74.32


  6%|▋         | 1/16 [00:02<00:31,  2.09s/it]

test acc 74.32


100%|██████████| 16/16 [00:08<00:00,  2.41it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

New best score on dev !
Epoch 6/50 :
Step 0/16
dev acc 73.74


  6%|▋         | 1/16 [00:02<00:31,  2.10s/it]

test acc 73.74


100%|██████████| 16/16 [00:08<00:00,  2.36it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch 7/50 :
Step 0/16
dev acc 74.84


  6%|▋         | 1/16 [00:02<00:31,  2.09s/it]

test acc 74.84


100%|██████████| 16/16 [00:08<00:00,  2.31it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

New best score on dev !
Epoch 8/50 :
Step 0/16
dev acc 74.96


  6%|▋         | 1/16 [00:02<00:30,  2.01s/it]

test acc 74.96


100%|██████████| 16/16 [00:08<00:00,  2.38it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

New best score on dev !
Epoch 9/50 :
Step 0/16
dev acc 74.61


  6%|▋         | 1/16 [00:02<00:30,  2.05s/it]

test acc 74.61


100%|██████████| 16/16 [00:08<00:00,  2.35it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch 10/50 :
Step 0/16
dev acc 75.36


  6%|▋         | 1/16 [00:02<00:31,  2.08s/it]

test acc 75.36


100%|██████████| 16/16 [00:08<00:00,  2.37it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

New best score on dev !
Epoch 11/50 :
Step 0/16
dev acc 75.88


  6%|▋         | 1/16 [00:02<00:30,  2.00s/it]

test acc 75.88


100%|██████████| 16/16 [00:08<00:00,  2.40it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

New best score on dev !
Epoch 12/50 :
Step 0/16
dev acc 75.48


  6%|▋         | 1/16 [00:02<00:30,  2.05s/it]

test acc 75.48


 25%|██▌       | 4/16 [00:03<00:11,  1.01it/s]