## LM Evaluation

In [1]:
from fastai.text import *
import html

In [2]:
PATH=Path('data/aclImdb/')
CLAS_PATH=Path('data/imdb_clas/')
LM_PATH=Path('data/imdb_lm/')

In [3]:
trn_lm = np.load(LM_PATH/'tmp'/'trn_ids.npy')
val_lm = np.load(LM_PATH/'tmp'/'val_ids.npy')
itos = pickle.load(open(LM_PATH/'tmp'/'itos.pkl', 'rb'))

In [4]:
max_vocab = 60000
min_freq = 2

In [5]:
stoi = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itos)})
len(itos)

60002

In [6]:
vs=len(itos)
vs,len(trn_lm)

(60002, 90000)

In [7]:
em_sz,nh,nl = 400,1150,3

## Load existing LM model and weights

In [8]:
PRE_PATH = PATH/'models'/'wt103'
PRE_LM_PATH = PRE_PATH/'fwd_wt103.h5'

In [9]:
wgts = torch.load(PRE_LM_PATH, map_location=lambda storage, loc: storage)

In [10]:
wd=1e-7
bptt=70
bs=250
opt_fn = partial(optim.Adam, betas=(0.8, 0.99))

In [11]:
trn_dl = LanguageModelLoader(np.concatenate(trn_lm), bs, bptt)
val_dl = LanguageModelLoader(np.concatenate(val_lm), bs, bptt)
md = LanguageModelData(PATH, 1, vs, trn_dl, val_dl, bs=bs, bptt=bptt)

In [12]:
drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15])*0.7

In [13]:
learner= md.get_model(opt_fn, em_sz, nh, nl, 
    dropouti=drops[0], dropout=drops[1], wdrop=drops[2], dropoute=drops[3], dropouth=drops[4])

learner.metrics = [accuracy]
learner.unfreeze()

## Replace weights from classifier-encoder (not LM)

In [14]:
learner.load('lm1')
#learner.load_encoder('lm1_enc')
learner.load_encoder('clas_2_enc')

## Let's evaluate

Our goal: Encode 3 sentences using a pre-trained encoder and check the similarity scores between each pair of sentences. We use 2 methods to calculate semantic similarity: cosine similarity and inner product of encodings.

In [15]:
# cosine similarity - to check quality of our sentence encoder
def cos_sim(v1,v2):
    return F.cosine_similarity(T(v1).unsqueeze(0),T(v2).unsqueeze(0)).mean()

### Round 1 - simple sentences

In [16]:
x_inp = ["i like apples",
         "i want to buy some apples",
         "where is your cell phone"]

In [17]:
tok = Tokenizer().proc_all_mp(partition_by_cores(x_inp))

In [18]:
tok

[['i', 'like', 'apples'],
 ['i', 'want', 'to', 'buy', 'some', 'apples'],
 ['where', 'is', 'your', 'cell', 'phone']]

In [19]:
X = [[stoi[o1] for o1 in o] for o in tok]; X

[[12, 52, 13154], [12, 203, 8, 808, 64, 13154], [134, 9, 146, 2739, 1668]]

In [20]:
m = learner.model

# Set batch size to 1
#m[0].bs=1
# Turn off dropout
#m.eval()
# Reset hidden state
#m.reset()

kk0=m[0](V(T([X[0]]))) #first sentence in X - sentence level encoding....10 words 400 dim vecs
kk1=m[0](V(T([X[1]]))) #second sentence in X - sentence level encoding....10 words 400 dim vecs
kk2=m[0](V(T([X[2]]))) #third sentence in X - sentence level encoding....10 words 400 dim vecs


kk0=to_np(kk0)
kk1=to_np(kk1)
kk2=to_np(kk2)


kk0 = (kk0[0][2][0][-1]) # 1st sentence encoding 400 dims. -1 is the last element that's supposed to have the final encoded state
kk1 = (kk1[0][2][0][-1]) # 2nd sentence encoding 400 dims
kk2 = (kk2[0][2][0][-1]) # 3rd sentence encoding 400 dims


kk1.shape

(400,)

In [21]:
x_inp

['i like apples', 'i want to buy some apples', 'where is your cell phone']

In [22]:
cos_sim(kk0,kk1), cos_sim(kk1,kk2), cos_sim(kk0,kk2)

(0.9012137055397034, 0.10716721415519714, 0.10006072372198105)

In [23]:
np.inner(kk0,kk1)

2.2242155

In [24]:
np.inner(kk1,kk2)

0.43152505

In [25]:
np.inner(kk0,kk2)

0.48829192

### Round 2 - increase sentence complexity

In [26]:
x_inp = ["i like apples and oranges",
         "i hate all fruits especially apples and oranges",
         "i am going to buy some apples and oranges"]

In [27]:
tok = Tokenizer().proc_all_mp(partition_by_cores(x_inp))

In [28]:
tok

[['i', 'like', 'apples', 'and', 'oranges'],
 ['i', 'hate', 'all', 'fruits', 'especially', 'apples', 'and', 'oranges'],
 ['i', 'am', 'going', 'to', 'buy', 'some', 'apples', 'and', 'oranges']]

In [29]:
X = [[stoi[o1] for o1 in o] for o in tok]; X

[[12, 52, 13154, 5, 20864],
 [12, 738, 43, 22144, 280, 13154, 5, 20864],
 [12, 261, 182, 8, 808, 64, 13154, 5, 20864]]

In [30]:
m = learner.model

# Set batch size to 1
#m[0].bs=1
# Turn off dropout
#m.eval()
# Reset hidden state
#m.reset()

kk0=m[0](V(T([X[0]]))) #first sentence in X - sentence level encoding....400 dim vecs
kk1=m[0](V(T([X[1]]))) #second sentence in X - sentence level encoding....400 dim vecs
kk2=m[0](V(T([X[2]]))) #third sentence in X - sentence level encoding....400 dim vecs


kk0=to_np(kk0)
kk1=to_np(kk1)
kk2=to_np(kk2)


kk0 = (kk0[0][2][0][-1]) # 1st sentence encoding 400 dims. -1 is the last element that's supposed to have the final encoded state
kk1 = (kk1[0][2][0][-1]) # 2nd sentence encoding 400 dims
kk2 = (kk2[0][2][0][-1]) # 3rd sentence encoding 400 dims


kk1.shape

(400,)

In [31]:
x_inp

['i like apples and oranges',
 'i hate all fruits especially apples and oranges',
 'i am going to buy some apples and oranges']

In [32]:
cos_sim(kk0,kk1), cos_sim(kk1,kk2), cos_sim(kk0,kk2)

(0.43747785687446594, 0.8603231906890869, 0.4448050260543823)

In [33]:
np.inner(kk0,kk1)

0.7238263

In [34]:
np.inner(kk1,kk2)

1.9672345

In [35]:
np.inner(kk0,kk2)

0.62568533

### Round 3 - more complex!

In [36]:
x_inp = ["let's talk about fruits for a second. Apples are nice. Oranges too. I kinda like them.",
         "i compared the prices of apples and oranges at walmart and kroger stores",
         "oh you wanna talk about apples. sure. i am not sure if i have said this before but i do like them and oranges."]

In [37]:
tok = Tokenizer().proc_all_mp(partition_by_cores(x_inp))

In [38]:
tok

[['let',
  "'s",
  'talk',
  'about',
  'fruits',
  'for',
  'a',
  'second',
  '.',
  'apples',
  'are',
  'nice',
  '.',
  'oranges',
  'too',
  '.',
  'i',
  'kinda',
  'like',
  'them',
  '.'],
 ['i',
  'compared',
  'the',
  'prices',
  'of',
  'apples',
  'and',
  'oranges',
  'at',
  'walmart',
  'and',
  'kroger',
  'stores'],
 ['oh',
  'you',
  'wanna',
  'talk',
  'about',
  'apples',
  '.',
  'sure',
  '.',
  'i',
  'am',
  'not',
  'sure',
  'if',
  'i',
  'have',
  'said',
  'this',
  'before',
  'but',
  'i',
  'do',
  'like',
  'them',
  'and',
  'oranges',
  '.']]

In [39]:
X = [[stoi[o1] for o1 in o] for o in tok]; X

[[302,
  16,
  713,
  58,
  22144,
  22,
  6,
  349,
  3,
  13154,
  33,
  358,
  3,
  20864,
  116,
  3,
  12,
  2040,
  52,
  110,
  3],
 [12, 1128, 2, 12023, 7, 13154, 5, 20864, 44, 17680, 5, 0, 5400],
 [452,
  26,
  2890,
  713,
  58,
  13154,
  3,
  273,
  3,
  12,
  261,
  32,
  273,
  62,
  12,
  36,
  326,
  13,
  176,
  24,
  12,
  57,
  52,
  110,
  5,
  20864,
  3]]

In [40]:
m = learner.model

# Set batch size to 1
#m[0].bs=1
# Turn off dropout
#m.eval()
# Reset hidden state
#m.reset()

kk0=m[0](V(T([X[0]]))) #first sentence in X - sentence level encoding....400 dim vecs
kk1=m[0](V(T([X[1]]))) #second sentence in X - sentence level encoding....400 dim vecs
kk2=m[0](V(T([X[2]]))) #third sentence in X - sentence level encoding....400 dim vecs


kk0=to_np(kk0)
kk1=to_np(kk1)
kk2=to_np(kk2)


kk0 = (kk0[0][2][0][-1]) # 1st sentence encoding 400 dims. -1 is the last element that's supposed to have the final encoded state
kk1 = (kk1[0][2][0][-1]) # 2nd sentence encoding 400 dims
kk2 = (kk2[0][2][0][-1]) # 3rd sentence encoding 400 dims


kk1.shape

(400,)

In [41]:
x_inp

["let's talk about fruits for a second. Apples are nice. Oranges too. I kinda like them.",
 'i compared the prices of apples and oranges at walmart and kroger stores',
 'oh you wanna talk about apples. sure. i am not sure if i have said this before but i do like them and oranges.']

In [42]:
cos_sim(kk0,kk1), cos_sim(kk1,kk2), cos_sim(kk0,kk2)

(0.23472610116004944, 0.2449840009212494, 0.9800421595573425)

In [43]:
np.inner(kk0,kk1)

0.47004822

In [44]:
np.inner(kk1,kk2)

0.45767003

In [45]:
np.inner(kk0,kk2)

1.5888402

### Round 4 - really complex

In [46]:
x_inp = ["there is no comparison here. you are comparing apples to oranges",
         "i compared the prices of apples and oranges at walmart and kroger stores",
         "i don't see anything common between these two categories."]

In [47]:
tok = Tokenizer().proc_all_mp(partition_by_cores(x_inp))

In [48]:
tok

[['there',
  'is',
  'no',
  'comparison',
  'here',
  '.',
  'you',
  'are',
  'comparing',
  'apples',
  'to',
  'oranges'],
 ['i',
  'compared',
  'the',
  'prices',
  'of',
  'apples',
  'and',
  'oranges',
  'at',
  'walmart',
  'and',
  'kroger',
  'stores'],
 ['i',
  'do',
  "n't",
  'see',
  'anything',
  'common',
  'between',
  'these',
  'two',
  'categories',
  '.']]

In [49]:
X = [[stoi[o1] for o1 in o] for o in tok]; X

[[53, 9, 73, 1884, 148, 3, 26, 33, 4324, 13154, 8, 20864],
 [12, 1128, 2, 12023, 7, 13154, 5, 20864, 44, 17680, 5, 0, 5400],
 [12, 57, 29, 83, 255, 1116, 222, 150, 126, 9281, 3]]

In [50]:
m = learner.model

# Set batch size to 1
#m[0].bs=1
# Turn off dropout
#m.eval()
# Reset hidden state
#m.reset()

kk0=m[0](V(T([X[0]]))) #first sentence in X - sentence level encoding....400 dim vecs
kk1=m[0](V(T([X[1]]))) #second sentence in X - sentence level encoding....400 dim vecs
kk2=m[0](V(T([X[2]]))) #third sentence in X - sentence level encoding....400 dim vecs


kk0=to_np(kk0)
kk1=to_np(kk1)
kk2=to_np(kk2)


kk0 = (kk0[0][2][0][-1]) # 1st sentence encoding 400 dims. -1 is the last element that's supposed to have the final encoded state
kk1 = (kk1[0][2][0][-1]) # 2nd sentence encoding 400 dims
kk2 = (kk2[0][2][0][-1]) # 3rd sentence encoding 400 dims


kk1.shape

(400,)

In [51]:
x_inp

['there is no comparison here. you are comparing apples to oranges',
 'i compared the prices of apples and oranges at walmart and kroger stores',
 "i don't see anything common between these two categories."]

In [52]:
cos_sim(kk0,kk1), cos_sim(kk1,kk2), cos_sim(kk0,kk2)

(0.47336602210998535, 0.24636584520339966, 0.11633665859699249)

In [53]:
np.inner(kk0,kk1)

1.4132059

In [54]:
np.inner(kk1,kk2)

0.46766788

In [55]:
np.inner(kk0,kk2)

0.23948124

# Quora dataset

In [56]:
QUESTION_PAIRS_FILE = '/home/a8an18/.keras/datasets/quora_duplicate_questions.tsv'
#print("Processing", QUESTION_PAIRS_FILE)

question1 = []
question2 = []
is_duplicate = []
with open(QUESTION_PAIRS_FILE, encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter='\t')
    for row in reader:
        question1.append(row['question1'])
        question2.append(row['question2'])
        is_duplicate.append(int(row['is_duplicate']))

print('Question pairs: %d' % len(question1))

Question pairs: 404290


In [57]:
question1[:10]

['What is the step by step guide to invest in share market in india?',
 'What is the story of Kohinoor (Koh-i-Noor) Diamond?',
 'How can I increase the speed of my internet connection while using a VPN?',
 'Why am I mentally very lonely? How can I solve it?',
 'Which one dissolve in water quikly sugar, salt, methane and carbon di oxide?',
 'Astrology: I am a Capricorn Sun Cap moon and cap rising...what does that say about me?',
 'Should I buy tiago?',
 'How can I be a good geologist?',
 'When do you use シ instead of し?',
 'Motorola (company): Can I hack my Charter Motorolla DCX3400?']

In [58]:
question2[:10]

['What is the step by step guide to invest in share market?',
 'What would happen if the Indian government stole the Kohinoor (Koh-i-Noor) diamond back?',
 'How can Internet speed be increased by hacking through DNS?',
 'Find the remainder when [math]23^{24}[/math] is divided by 24,23?',
 'Which fish would survive in salt water?',
 "I'm a triple Capricorn (Sun, Moon and ascendant in Capricorn) What does this say about me?",
 'What keeps childern active and far from phone and video games?',
 'What should I do to be a great geologist?',
 'When do you use "&" instead of "and"?',
 'How do I hack Motorola DCX3400 for free internet?']

In [59]:
is_duplicate[:10]

[0, 0, 0, 0, 0, 1, 0, 1, 0, 0]

In [60]:
chunksize=24000

In [61]:
tok_q1 = Tokenizer.proc_all_mp(partition_by_cores(question1))

In [62]:
tok_q2 = Tokenizer.proc_all_mp(partition_by_cores(question2))

In [63]:
tok_q2[:10]

[['what',
  'is',
  'the',
  'step',
  'by',
  'step',
  'guide',
  'to',
  'invest',
  'in',
  'share',
  'market',
  '?'],
 ['what',
  'would',
  'happen',
  'if',
  'the',
  'indian',
  'government',
  'stole',
  'the',
  'kohinoor',
  '(',
  'koh',
  '-',
  'i',
  '-',
  'noor',
  ')',
  'diamond',
  'back',
  '?'],
 ['how',
  'can',
  'internet',
  'speed',
  'be',
  'increased',
  'by',
  'hacking',
  'through',
  't_up',
  'dns',
  '?'],
 ['find',
  'the',
  'remainder',
  'when',
  '[',
  'math]23^{24',
  '}',
  '[',
  '/',
  'math',
  ']',
  'is',
  'divided',
  'by',
  '24,23',
  '?'],
 ['which', 'fish', 'would', 'survive', 'in', 'salt', 'water', '?'],
 ['i',
  "'m",
  'a',
  'triple',
  'capricorn',
  '(',
  'sun',
  ',',
  'moon',
  'and',
  'ascendant',
  'in',
  'capricorn',
  ')',
  'what',
  'does',
  'this',
  'say',
  'about',
  'me',
  '?'],
 ['what',
  'keeps',
  'childern',
  'active',
  'and',
  'far',
  'from',
  'phone',
  'and',
  'video',
  'games',
  '?'],
 [

In [64]:
ques = tok_q1 + tok_q2

In [65]:
freq = Counter(p for o in ques for p in o)

In [66]:
freq.most_common(25)

[('?', 852054),
 ('the', 377634),
 ('what', 324433),
 ('is', 271122),
 ('i', 223363),
 ('how', 220656),
 ('a', 211277),
 ('to', 205717),
 ('in', 196940),
 ('do', 169773),
 ('of', 159862),
 ('are', 146580),
 ('and', 133925),
 ('can', 114550),
 ('for', 104498),
 (',', 98321),
 ('t_up', 97217),
 ('you', 93102),
 ('why', 84030),
 ('it', 71057),
 ('my', 70930),
 ('best', 70596),
 ('on', 60715),
 ('does', 59502),
 ('.', 49499)]

In [67]:
max_vocab = 60000
min_freq = 2

In [68]:
itos = [o for o,c in freq.most_common(max_vocab) if c>min_freq]
itos.insert(0, '_pad_')
itos.insert(0, '_unk_')

In [69]:
itos[:10]

['_unk_', '_pad_', '?', 'the', 'what', 'is', 'i', 'how', 'a', 'to']

In [70]:
len(itos)

41665

In [71]:
stoi = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itos)})
list(stoi)[:10]

['_unk_', '_pad_', '?', 'the', 'what', 'is', 'i', 'how', 'a', 'to']

In [72]:
q1 = np.array([[stoi[o] for o in p] for p in tok_q1])
q2 = np.array([[stoi[o] for o in p] for p in tok_q2])

In [73]:
q1.shape,q2.shape

((404290,), (404290,))

In [74]:
str(q1[0])

'[4, 5, 3, 1254, 69, 1254, 2576, 9, 589, 10, 773, 390, 10, 43, 2]'

In [75]:
itos_arr = np.array(itos)
itos_arr[q1[0]]

array(['what', 'is', 'the', 'step', 'by', 'step', 'guide', 'to', 'invest', 'in', 'share', 'market', 'in',
       'india', '?'], dtype='<U65')

In [76]:
np.save('q1.npy', q1)
np.save('q2.npy', q2)
pickle.dump(itos, open('itos.pkl', 'wb'))

In [77]:
vs=len(itos) #vocab size
vs,len(q1)

(41665, 404290)

In [78]:
itos2 = pickle.load((PRE_PATH/'itos_wt103.pkl').open('rb'))
stoi2 = collections.defaultdict(lambda:-1, {v:k for k,v in enumerate(itos2)})

In [79]:
wgts = torch.load(PRE_LM_PATH, map_location=lambda storage, loc: storage)

In [80]:
wgts.keys()

odict_keys(['0.encoder.weight', '0.encoder_with_dropout.embed.weight', '0.rnns.0.module.weight_ih_l0', '0.rnns.0.module.bias_ih_l0', '0.rnns.0.module.bias_hh_l0', '0.rnns.0.module.weight_hh_l0_raw', '0.rnns.1.module.weight_ih_l0', '0.rnns.1.module.bias_ih_l0', '0.rnns.1.module.bias_hh_l0', '0.rnns.1.module.weight_hh_l0_raw', '0.rnns.2.module.weight_ih_l0', '0.rnns.2.module.bias_ih_l0', '0.rnns.2.module.bias_hh_l0', '0.rnns.2.module.weight_hh_l0_raw', '1.decoder.weight'])

In [81]:
enc_wgts = to_np(wgts['0.encoder.weight'])
row_m = enc_wgts.mean(0) 
row_m.shape, row_m[:10]

((400,),
 array([-0.0183 , -0.13826,  0.01438, -0.01285,  0.00407,  0.01944,  0.01149, -0.13282, -0.02295, -0.01722],
       dtype=float32))

In [82]:
# Create embedding matrix and take token weights from wikitext103 if available
# Use 60002 instead of 41665 for future embedding matrix where backbone encoder needs to be loaded.
# not needed for simple model
new_w = np.zeros((60002, em_sz), dtype=np.float32)
for i,w in enumerate(itos):
    r = stoi2[w]
    new_w[i] = enc_wgts[r] if r>=0 else row_m

In [83]:
new_w.shape

(60002, 400)

In [84]:
wgts['0.encoder.weight'] = T(new_w)
wgts['0.encoder_with_dropout.embed.weight'] = T(np.copy(new_w))
wgts['1.decoder.weight'] = T(np.copy(new_w))

In [85]:
wgts['1.decoder.weight'].shape

torch.Size([60002, 400])

In [86]:
trn_keep = np.random.rand(len(q1))>0.1

In [87]:
q1_trn = q1[trn_keep]
q2_trn = q2[trn_keep]
lbl_trn = np.asarray(is_duplicate)[trn_keep]

In [88]:
np.asarray([lbl_trn]).T

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]])

In [89]:
(T(np.array([lbl_trn[101]]).T)).float()


 0
[torch.FloatTensor of size 1]

In [90]:
q1_trn.shape

(363951,)

In [91]:
q1_val = q1[~trn_keep]
q2_val = q2[~trn_keep]
lbl_val = np.asarray(is_duplicate)[~trn_keep]

In [92]:
lbl_val = np.asarray([lbl_val]).T
lbl_val.shape

(40339, 1)

In [93]:
lbl_trn = np.asarray([lbl_trn]).T
lbl_trn.shape

(363951, 1)

In [94]:
lbl_val = lbl_val.T
lbl_val.shape

(1, 40339)

In [95]:
lbl_trn = lbl_trn.T
lbl_trn.shape

(1, 363951)

In [96]:
vs,em_sz

(41665, 400)

In [97]:
class PairDataset(Dataset):
    def __init__(self, X, y): self.x1,self.x2,self.y = X[0],X[1],y
    def __getitem__(self, idx): return A(self.x1[idx], self.x2[idx], (T(self.y[idx]).float()))
    def __len__(self): return len(self.x1)

In [98]:
trn_ds = PairDataset(X=[q1_trn,q2_trn],y=(lbl_trn).T)
val_ds = PairDataset(X=[q1_val,q2_val],y=(lbl_val).T)

In [99]:
trn_ds.__getitem__(15)

[array([   4,   52,    8,   96, 1159,  113,   16,  488,  507,  853,  420,  299,   24,   38, 3639,  527,    2]),
 array([   7,   44,    8,   96, 1159,  332,    3,  299, 6219,   10,  117,   27, 1112,    9,  225,   10,  117,
           2]),
 array([1.], dtype=float32)]

In [111]:
# Unable to run with larger bs because of DataLoader transpose issue
bs=15
#bs=1

In [101]:
??DataLoader

In [112]:
trn_dl = DataLoader(trn_ds, bs, transpose=False, transpose_y=False, num_workers=1, 
                    pad_idx=1, pre_pad=False) #, sampler=trn_samp)
val_dl = DataLoader(val_ds, bs, transpose=False, transpose_y=False, num_workers=1, 
                    pad_idx=1, pre_pad=False) #, sampler=val_samp)
md = ModelData(PATH, trn_dl, val_dl)

In [113]:
it = iter(trn_dl)
its = [next(it) for i in range(5)]
#[(len(x1),len(x2),len(y.permute(1,0))) for x1,x2,y in its]
#[((y)) for x1,x2,y in its]
next(it)

[
 
 Columns 0 to 10 
     30     89    913      9    436      8   1496   6446   2136   1141     10
     20     11   3401  10382   3778      2      1      1      1      1      1
      4     13     31     12      3    249    395    945    432     10      3
      7     15      6    229     22    492     76   1677    547      2      1
      4    133      3    328    350     12      3   1570    903     17     14
      4      5      3     83     60      0     14  11686      2      1      1
     28      5      3     23   1148    268    206   3908     18    848      2
      4      5     41    447     12      3    359   7229     82  10718   8040
      4      5      3     23   2440    173     16    618    369   3134      2
    846    679     12    495     17  12677     82      7      5      3    283
      4     13     31     12      3     23   1518    196     10    116      2
      4    524      8  12678      2      1      1      1      1      1      1
      4      5   4706  31016     10   2522

### ISSUE: How do it get y transposed to shape 1X15 instead of 15X1 ?

# Create model - simple

In [104]:
def create_emb(vecs, itos, em_sz):
    emb = nn.Embedding(len(itos), em_sz, padding_idx=1)
    wgts = emb.weight.data
    miss = []
    for i,w in enumerate(itos):
        try: wgts[i] = torch.from_numpy(vecs[i])
        except: miss.append(w)
    print(len(miss),miss[5:10])
    return emb

In [105]:
nh,nl = 256,2

In [106]:
class PairRNN(nn.Module):
    #def __init__(self, vecs_enc, itos_enc, em_sz_enc, vecs_dec, itos_dec, em_sz_dec, nh, out_sl, nl=2):
    def __init__(self, vecs, itos, em_sz, nh, out_sl=75, nl=2, bs=100):
        super().__init__()
        self.nl,self.nh,self.out_sl,self.bs = nl,nh,out_sl,bs
        self.emb = create_emb(vecs, itos, em_sz)
        self.emb_drop = nn.Dropout(0.15)
        self.gru = nn.GRU(em_sz, nh, num_layers=nl, dropout=0.25)
        self.out = nn.Linear(nh, em_sz, bias=False)
        
    def forward(self, inp1, inp2):
        bs = self.bs
        h = self.initHidden(bs)
        emb1 = self.emb_drop(self.emb(inp1.permute(1,0)))
        emb2 = self.emb_drop(self.emb(inp2.permute(1,0)))
        out_1, h1 = self.gru(emb1, h)
        out_2, h2 = self.gru(emb2, h)
        h1 = self.out(h1)
        h2 = self.out(h2)
        return F.cosine_similarity(h1.unsqueeze(0),h2.unsqueeze(0)).mean()
    
    def initHidden(self, bs): return V(torch.zeros(self.nl, bs, self.nh))

In [107]:
opt_fn = partial(optim.Adam, betas=(0.8, 0.99))

In [108]:
rnn = PairRNN(new_w, itos, em_sz, nh, bs=bs)
learn = RNN_Learner(md, SingleModel(to_gpu(rnn)), opt_fn=opt_fn)
learn.crit = nn.L1Loss()
#nn.CosineEmbeddingLoss()

0 []


In [109]:
#learn.lr_find()
learn.fit(lr, 1, cycle_len=12, use_clr=(20,10))

NameError: name 'lr' is not defined

# Create model - with backbone

In [None]:
# TBD