In [2]:
%load_ext autoreload
%autoreload 2
import numpy as np
from tqdm import tqdm_notebook

## Initialization

In [3]:
def initialize_wrd_emb(vocab_size, emb_size):
    WRD_EMB = np.random.randn(vocab_size, emb_size) * 0.01
    return WRD_EMB

def initialize_dense(input_size, output_size):
    W = np.random.randn(output_size, input_size) * 0.01
    b = np.random.randn(output_size, 1) * 0.01
    return W, b

def initialize_parameters(vocab_size, emb_size):
    WRD_EMB = initialize_wrd_emb(vocab_size, emb_size)
    W, b = initialize_dense(emb_size, vocab_size)
    
    parameters = {}
    parameters['WRD_EMB'] = WRD_EMB
    parameters['W'] = W
    parameters['b'] = b
    
    return parameters

## Forward Propagation

In [4]:
def ind_to_word_vecs(inds, parameters):
    """
    inds -- shape: (1, number of examples)
    """
    m = inds.shape[1]
    WRD_EMB = parameters['WRD_EMB']
    word_vec = WRD_EMB[inds.flatten(), :].T
    
    assert(word_vec.shape == (WRD_EMB.shape[1], m))
    
    return word_vec

def linear_dense(word_vec, parameters):
    m = word_vec.shape[1]
    W = parameters['W']
    Z = np.dot(W, word_vec)
    
    assert(Z.shape == (W.shape[0], m))
    
    return W, b, Z

def softmax(Z):
    softmax_out = np.divide(np.exp(Z), np.sum(np.exp(Z), axis=0, keepdims=True) + 0.001)
    
    assert(softmax_out.shape == Z.shape)

    return softmax_out

def forward_propagation(inds, parameters):
    word_vec = ind_to_word_vecs(inds, parameters)
    W, b, Z = linear_dense(word_vec, parameters)
    softmax_out = softmax(Z)
    
    caches = {}
    caches['inds'] = inds
    caches['word_vec'] = word_vec
    caches['W'] = W
    caches['b'] = b
    caches['Z'] = Z
    
    return softmax_out, caches

## Cost Function

In [5]:
def cross_entropy(softmax_out, Y):
    m = softmax_out.shape[1]
    cost = -(1 / m) * np.sum(np.sum(Y * np.log(softmax_out + 0.001), axis=0, keepdims=True), axis=1)
    return cost

## Backward Propagation

In [6]:
def softmax_backward(Y, caches):
    Z = caches['Z']
    dL_dZ = Z - Y
    
    assert(dL_dZ.shape == Z.shape)
    
    return dL_dZ

def dense_backward(dL_dZ, caches):
    """
    Z = W * X + b
    """
    W = caches['W']
    b = caches['b']
    word_vec = caches['word_vec']
    m = word_vec.shape[1]
    
    dL_dW = (1 / m) * np.dot(dL_dZ, word_vecs_mean.T)
    dL_db = (1 / m) * np.sum(dL_dZ, axis=1, keepdims=True)
    dL_dword_vec = np.dot(W.T, dL_dZ)

    assert(W.shape == dL_dW.shape)
    assert(b.shape == dL_db.shape)
    assert(word_vec.shape == dL_dword_vec.shape)
    
    return dL_dW, dL_db, dL_dword_vec

def backward_propagation(Y, caches):
    dL_dZ = softmax_backward(Y, caches)
    dL_dW, dL_db, dL_dword_vec = dense_backward(dL_dZ, caches)
    
    gradients = dict()
    gradients['dL_dZ'] = dL_dZ
    gradients['dL_dW'] = dL_dW
    gradients['dL_db'] = dL_db
    gradients['dL_dword_vec'] = dL_dword_vec
    
    return gradients

def update_parameters(parameters, caches, gradients, learning_rate):
    CBOW_N = caches['inds'].shape[0]
    vocab_size, emb_size = parameters['WRD_EMB'].shape
    inds = caches['inds']
    WRD_EMB = parameters['WRD_EMB']
    dL_dword_vec = gradients['dL_dword_vecs']
    m = inds.shape[-1]
    
    WRD_EMB[inds.flatten(), :] -= dL_dword_vec.T * learning_rate

    parameters['W'] -= learning_rate * gradients['dL_dW']
    parameters['b'] -= learning_rate * gradients['dL_db']
    

In [48]:
from sys import getsizeof
import gc


def cbow_model(X, Y, vocab_size, emb_size, learning_rate, epochs, batch_size=256, parameters=None, print_cost=False):
    costs = []
    m = X.shape[1]
    if parameters is None:
        parameters = initialize_parameters(vocab_size, emb_size)

#     batch_inds = list(range(0, m, batch_size))
    for epoch in range(epochs):
#         np.random.shuffle(batch_inds)
        for b, i in enumerate(range(0, m, batch_size)):
            X_batch = X[:, i:i+batch_size]
            Y_batch = Y[:, i:i+batch_size]

            softmax_out, caches = forward_propagation(X_batch, parameters)
            gradients = backward_propagation(Y_batch, caches)
            update_parameters(parameters, caches, gradients, learning_rate)
            cost = cross_entropy(softmax_out, Y_batch)
#             if b % 100 == 0:
#                 print('epoch {}, {}/{} - Cost: {}'.format(epoch, b, 1000, np.squeeze(cost)))
#             gc.collect()
#             del X_batch
#             del Y_batch
#             del caches
#             del gradients
            
            
        costs.append(cost)
        if print_cost and epoch % 1000 == 0:
            print("Cost after epoch {}: {}".format(epoch, np.squeeze(cost)))
        
    return parameters

### Toy data
Sentence: I(0) would(1) like(2) to(3) get(4) a(5) better(6) job(7).  
vocab_size = 8  
```
[0, 2] [1]  
[1, 3] [2]  
[2, 4] [3]  
[3, 5] [4]  
[4, 6] [5]  
[5, 7] [6]
```

### Initialization Test

In [8]:
parameters = initialize_parameters(31295, 100)

### Forward Probagation Test

In [11]:
X[:, 0]

array([ 4520, 16807,  3646, 27552, 22269, 23619, 29791, 11870, 31053,
       27600])

In [12]:
softmax_out, caches = forward_propagation(X[:, 0].reshape(-1, 1), parameters)

In [13]:
np.sum(softmax_out)

0.9999999680468006

### Compute Cost Test

In [14]:
cost = cross_entropy(softmax_out, Y_one_hot[:, 0].reshape(-1, 1))

In [104]:
cost

array([6.87648511])

### Backward Probagation Test

In [25]:
gradients = backward_propagation(Y_one_hot[:, 0].reshape(-1, 1), caches)

### Model Test

In [42]:
cbow_model(X[:, 0].reshape(-1, 1), Y_one_hot[:, 0].reshape(-1, 1), 31295, 100, 0.05, 1, batch_size=256, parameters=None, print_cost=False)

epoch 0, 0/1000 - Cost: 6.876591503818192


{'W': array([[-1.47086901e-02,  2.46776711e-03,  8.18906010e-04, ...,
          7.11362578e-03, -1.51344408e-02,  2.04960263e-02],
        [-8.39211344e-04, -6.51600397e-03,  3.82367199e-03, ...,
          6.86077946e-03, -3.04584365e-02, -5.93998251e-03],
        [ 2.44059705e-03,  1.71246057e-02, -7.00175285e-03, ...,
          4.45684667e-03, -8.81435529e-03, -8.60441116e-03],
        ...,
        [-7.68705416e-03,  9.26354933e-03,  1.38015740e-02, ...,
         -6.52921729e-03,  1.65386648e-02,  4.51150994e-03],
        [-1.99787661e-03, -4.76608309e-03,  1.25434277e-02, ...,
          6.25772980e-03,  1.02281155e-02,  9.27558552e-03],
        [ 5.29647943e-03, -5.31701038e-05, -7.73642428e-03, ...,
          3.97533109e-03, -6.43360645e-03, -1.15651844e-02]]),
 'WRD_EMB': array([[-0.02116959,  0.01432641,  0.01467965, ..., -0.0030714 ,
          0.00436741,  0.00781568],
        [-0.00391822,  0.00905689, -0.00870093, ...,  0.00275129,
         -0.00073205, -0.00215951],
        [

## Stack Overflow data

In [7]:
import pickle

with open('pickles/X.pkl', 'rb') as file:
    X = pickle.load(file)
    
with open('pickles/Y.pkl', 'rb') as file:
    Y = pickle.load(file)
    
assert(X.shape[-1] == Y.shape[-1])

In [8]:
vocab_size = 31295
m = X.shape[-1]
batch_size = 256
emb_size = 100

Y_one_hot = np.zeros((vocab_size, m))
Y_one_hot[Y.flatten(), np.arange(m)] = 1

In [17]:
# ran_sam_inds = np.random.randint(m, size=2)
X_sample, Y_one_hot_sample = X[:, np.arange(10000)], Y_one_hot[:, np.arange(10000)]

In [18]:
print(X_sample.shape, Y_one_hot_sample.shape)

(10, 10000) (31295, 10000)


In [49]:
parameters = cbow_model(X, Y_one_hot, vocab_size, emb_size, 0.05, 2000, batch_size=126, print_cost=True)

NameError: name 'Y_one_hot' is not defined

In [None]:
WRD_EMB = parameters['WRD_EMB']

# with open('pickles/word_to_id.pkl', 'rb') as file:
#     word_to_id = pickle.load(file)
    
# with open('pickles/id_to_word.pkl', 'rb') as file:
#     id_to_word = pickle.load(file)

In [None]:
def find_top_n_similar(word, wrd_emb, n=10):
    id_ = word_to_id[word]
    vec_word = wrd_emb[id_, :]
    norm_vec_word = np.linalg.norm(vec_word)
    cos_sim = np.dot(wrd_emb, vec_word.T) / (np.linalg.norm(wrd_emb, axis=1) * norm_vec_word)
    top_n_ind = np.argsort(cos_sim)[-n:][::-1]
    return top_n_ind

In [None]:
inds = find_top_n_similar('sort', WRD_EMB, 20)

In [None]:
[id_to_word[id_] for id_ in inds]

In [None]:
id_to_word[156]

In [None]:
word_to_id['bfs']

## Toy Data: the quick brown fox jumped over the lazy dog

### CBOW

In [56]:
text_toy = ['the','quick','brown','fox','jumped','over','the','lazy','dog']
id_to_word_toy = dict()
word_to_id_toy = dict()
i = 0
for token in text_toy:
    if token in word_to_id_toy:
        continue
    id_to_word_toy[i] = token
    word_to_id_toy[token] = i
    i += 1
window_size = 1
example_len = 2 * 1 + 1
X_toy = []
Y_toy = []

for i in range(len(text_toy) - example_len + 1):
    X_toy.extend([word_to_id_toy[word] for word in text_toy[i:i+1] + text_toy[i+2:i+3]])
    Y_toy.append(word_to_id_toy[text_toy[i+1]])
        
X_toy = np.array(X_toy)
X_toy = X_toy.reshape(-1, window_size * 2).T
Y_toy = np.array(Y_toy)
Y_toy = Y_toy.reshape(1, -1)

In [57]:
vocab_size = 8
m = X_toy.shape[-1]
emb_size = 6

Y_toy_one_hot = np.zeros((vocab_size, m))
Y_toy_one_hot[Y_toy.flatten(), np.arange(m)] = 1

In [58]:
print(X_toy.shape, Y_toy_one_hot.shape)

(2, 7) (8, 7)


In [None]:
parameters = cbow_model(X_toy, Y_toy_one_hot, vocab_size, emb_size, 0.0025, 10000000, parameters=parameters, batch_size=126, print_cost=True)

Cost after epoch 0: 1.8594929193940735
Cost after epoch 1000: 1.856402710540856
Cost after epoch 2000: 1.8563665576270325
Cost after epoch 3000: 1.8563304119137587
Cost after epoch 4000: 1.8562942733990428
Cost after epoch 5000: 1.8562581420809632
Cost after epoch 6000: 1.8562220179576037
Cost after epoch 7000: 1.8561859010270512
Cost after epoch 8000: 1.856149791287392
Cost after epoch 9000: 1.8561136887367136
Cost after epoch 10000: 1.8560775933731033
Cost after epoch 11000: 1.85604150519465
Cost after epoch 12000: 1.856005424199442
Cost after epoch 13000: 1.8559693503855694
Cost after epoch 14000: 1.8559332837511224
Cost after epoch 15000: 1.855897224294191
Cost after epoch 16000: 1.8558611720128675
Cost after epoch 17000: 1.855825126905243
Cost after epoch 18000: 1.8557890889694106
Cost after epoch 19000: 1.855753058203463
Cost after epoch 20000: 1.8557170346054948
Cost after epoch 21000: 1.855681018173599
Cost after epoch 22000: 1.8556450089058714
Cost after epoch 23000: 1.8556090

Cost after epoch 191000: 1.8496608166257265
Cost after epoch 192000: 1.8496259980619913
Cost after epoch 193000: 1.8495911863451868
Cost after epoch 194000: 1.8495563814735063
Cost after epoch 195000: 1.8495215834451444
Cost after epoch 196000: 1.8494867922582954
Cost after epoch 197000: 1.8494520079111543
Cost after epoch 198000: 1.8494172304019179
Cost after epoch 199000: 1.8493824597287833
Cost after epoch 200000: 1.849347695889946
Cost after epoch 201000: 1.8493129388836045
Cost after epoch 202000: 1.8492781887079566
Cost after epoch 203000: 1.8492434453612012
Cost after epoch 204000: 1.849208708841538
Cost after epoch 205000: 1.8491739791471662
Cost after epoch 206000: 1.849139256276286
Cost after epoch 207000: 1.8491045402270996
Cost after epoch 208000: 1.849069830997807
Cost after epoch 209000: 1.8490351285866113
Cost after epoch 210000: 1.8490004329917142
Cost after epoch 211000: 1.8489657442113199
Cost after epoch 212000: 1.8489310622436312
Cost after epoch 213000: 1.848896387

Cost after epoch 379000: 1.8432333411493238
Cost after epoch 380000: 1.8431997786549792
Cost after epoch 381000: 1.8431662226775647
Cost after epoch 382000: 1.8431326732153774
Cost after epoch 383000: 1.8430991302667117
Cost after epoch 384000: 1.8430655938298632
Cost after epoch 385000: 1.84303206390313
Cost after epoch 386000: 1.8429985404848077
Cost after epoch 387000: 1.8429650235731958
Cost after epoch 388000: 1.842931513166591
Cost after epoch 389000: 1.842898009263293
Cost after epoch 390000: 1.8428645118616003
Cost after epoch 391000: 1.8428310209598129
Cost after epoch 392000: 1.842797536556231
Cost after epoch 393000: 1.8427640586491558
Cost after epoch 394000: 1.8427305872368882
Cost after epoch 395000: 1.8426971223177304
Cost after epoch 396000: 1.842663663889984
Cost after epoch 397000: 1.8426302119519526
Cost after epoch 398000: 1.8425967665019398
Cost after epoch 399000: 1.8425633275382491
Cost after epoch 400000: 1.842529895059185
Cost after epoch 401000: 1.842496469063

Cost after epoch 567000: 1.8370363146974242
Cost after epoch 568000: 1.8370039479763558
Cost after epoch 569000: 1.8369715874606658
Cost after epoch 570000: 1.8369392331487433
Cost after epoch 571000: 1.8369068850389805
Cost after epoch 572000: 1.8368745431297675
Cost after epoch 573000: 1.8368422074194946
Cost after epoch 574000: 1.8368098779065551
Cost after epoch 575000: 1.8367775545893412
Cost after epoch 576000: 1.8367452374662454
Cost after epoch 577000: 1.8367129265356628
Cost after epoch 578000: 1.836680621795985
Cost after epoch 579000: 1.8366483232456072
Cost after epoch 580000: 1.836616030882924
Cost after epoch 581000: 1.8365837447063313
Cost after epoch 582000: 1.8365514647142254
Cost after epoch 583000: 1.8365191909050016
Cost after epoch 584000: 1.8364869232770573
Cost after epoch 585000: 1.8364546618287891
Cost after epoch 586000: 1.8364224065585955
Cost after epoch 587000: 1.8363901574648742
Cost after epoch 588000: 1.8363579145460251
Cost after epoch 589000: 1.8363256

Cost after epoch 755000: 1.8310587189422995
Cost after epoch 756000: 1.8310274910555633
Cost after epoch 757000: 1.8309962690799284
Cost after epoch 758000: 1.8309650530138746
Cost after epoch 759000: 1.8309338428558806
Cost after epoch 760000: 1.8309026386044258
Cost after epoch 761000: 1.830871440257992
Cost after epoch 762000: 1.8308402478150578
Cost after epoch 763000: 1.8308090612741061
Cost after epoch 764000: 1.8307778806336183
Cost after epoch 765000: 1.8307467058920757
Cost after epoch 766000: 1.830715537047962
Cost after epoch 767000: 1.8306843740997598
Cost after epoch 768000: 1.830653217045952
Cost after epoch 769000: 1.8306220658850223
Cost after epoch 770000: 1.8305909206154567
Cost after epoch 771000: 1.8305597812357386
Cost after epoch 772000: 1.8305286477443536
Cost after epoch 773000: 1.830497520139788
Cost after epoch 774000: 1.8304663984205283
Cost after epoch 775000: 1.83043528258506
Cost after epoch 776000: 1.8304041726318712
Cost after epoch 777000: 1.83037306855

Cost after epoch 943000: 1.8252901466271387
Cost after epoch 944000: 1.8252600037808435
Cost after epoch 945000: 1.825229866567563
Cost after epoch 946000: 1.825199734985859
Cost after epoch 947000: 1.825169609034293
Cost after epoch 948000: 1.8251394887114305
Cost after epoch 949000: 1.8251093740158335
Cost after epoch 950000: 1.8250792649460665
Cost after epoch 951000: 1.8250491615006939
Cost after epoch 952000: 1.8250190636782797
Cost after epoch 953000: 1.82498897147739
Cost after epoch 954000: 1.8249588848965903
Cost after epoch 955000: 1.8249288039344471
Cost after epoch 956000: 1.8248987285895264
Cost after epoch 957000: 1.8248686588603953
Cost after epoch 958000: 1.8248385947456218
Cost after epoch 959000: 1.8248085362437725
Cost after epoch 960000: 1.8247784833534169
Cost after epoch 961000: 1.8247484360731228
Cost after epoch 962000: 1.8247183944014598
Cost after epoch 963000: 1.824688358336997
Cost after epoch 964000: 1.8246583278783053
Cost after epoch 965000: 1.82462830302

Cost after epoch 1129000: 1.8197789965279494
Cost after epoch 1130000: 1.8197498771318454
Cost after epoch 1131000: 1.8197207631085675
Cost after epoch 1132000: 1.8196916544567547
Cost after epoch 1133000: 1.8196625511750473
Cost after epoch 1134000: 1.8196334532620864
Cost after epoch 1135000: 1.8196043607165118
Cost after epoch 1136000: 1.8195752735369661
Cost after epoch 1137000: 1.8195461917220896
Cost after epoch 1138000: 1.819517115270525
Cost after epoch 1139000: 1.8194880441809147
Cost after epoch 1140000: 1.819458978451901
Cost after epoch 1141000: 1.8194299180821278
Cost after epoch 1142000: 1.8194008630702385
Cost after epoch 1143000: 1.8193718134148773
Cost after epoch 1144000: 1.8193427691146877
Cost after epoch 1145000: 1.8193137301683153
Cost after epoch 1146000: 1.8192846965744058
Cost after epoch 1147000: 1.8192556683316048
Cost after epoch 1148000: 1.8192266454385582
Cost after epoch 1149000: 1.8191976278939108
Cost after epoch 1150000: 1.819168615696312
Cost after ep

Cost after epoch 1313000: 1.8145101134650137
Cost after epoch 1314000: 1.8144819601707511
Cost after epoch 1315000: 1.8144538120056497
Cost after epoch 1316000: 1.8144256689684215
Cost after epoch 1317000: 1.814397531057778
Cost after epoch 1318000: 1.8143693982724312
Cost after epoch 1319000: 1.814341270611094
Cost after epoch 1320000: 1.8143131480724792
Cost after epoch 1321000: 1.8142850306553007
Cost after epoch 1322000: 1.814256918358272
Cost after epoch 1323000: 1.8142288111801066
Cost after epoch 1324000: 1.81420070911952
Cost after epoch 1325000: 1.8141726121752266
Cost after epoch 1326000: 1.8141445203459419
Cost after epoch 1327000: 1.8141164336303812
Cost after epoch 1328000: 1.8140883520272615
Cost after epoch 1329000: 1.814060275535298
Cost after epoch 1330000: 1.814032204153208
Cost after epoch 1331000: 1.814004137879709
Cost after epoch 1332000: 1.8139760767135176
Cost after epoch 1333000: 1.8139480206533531
Cost after epoch 1334000: 1.813919969697933
Cost after epoch 13

Cost after epoch 1497000: 1.8094149631183418
Cost after epoch 1498000: 1.8093877322775218
Cost after epoch 1499000: 1.8093605063350262
Cost after epoch 1500000: 1.8093332852896342
Cost after epoch 1501000: 1.8093060691401248
Cost after epoch 1502000: 1.8092788578852788
Cost after epoch 1503000: 1.809251651523875
Cost after epoch 1504000: 1.8092244500546955
Cost after epoch 1505000: 1.8091972534765195
Cost after epoch 1506000: 1.8091700617881294
Cost after epoch 1507000: 1.8091428749883065
Cost after epoch 1508000: 1.809115693075833
Cost after epoch 1509000: 1.8090885160494898
Cost after epoch 1510000: 1.8090613439080616
Cost after epoch 1511000: 1.8090341766503306
Cost after epoch 1512000: 1.8090070142750785
Cost after epoch 1513000: 1.8089798567810913
Cost after epoch 1514000: 1.8089527041671518
Cost after epoch 1515000: 1.8089255564320446
Cost after epoch 1516000: 1.8088984135745552
Cost after epoch 1517000: 1.8088712755934675
Cost after epoch 1518000: 1.8088441424875676
Cost after e

Cost after epoch 1681000: 1.8044857258300926
Cost after epoch 1682000: 1.8044593760838097
Cost after epoch 1683000: 1.8044330310170775
Cost after epoch 1684000: 1.8044066906287386
Cost after epoch 1685000: 1.8043803549176354
Cost after epoch 1686000: 1.804354023882612
Cost after epoch 1687000: 1.8043276975225107
Cost after epoch 1688000: 1.804301375836177
Cost after epoch 1689000: 1.804275058822455
Cost after epoch 1690000: 1.8042487464801877
Cost after epoch 1691000: 1.804222438808222
Cost after epoch 1692000: 1.8041961358054028
Cost after epoch 1693000: 1.8041698374705755
Cost after epoch 1694000: 1.8041435438025861
Cost after epoch 1695000: 1.8041172548002808
Cost after epoch 1696000: 1.8040909704625072
Cost after epoch 1697000: 1.8040646907881102
Cost after epoch 1698000: 1.8040384157759393
Cost after epoch 1699000: 1.8040121454248408
Cost after epoch 1700000: 1.8039858797336632
Cost after epoch 1701000: 1.803959618701255
Cost after epoch 1702000: 1.8039333623264646
Cost after epoc

Cost after epoch 1865000: 1.799714990618899
Cost after epoch 1866000: 1.79968948276173
Cost after epoch 1867000: 1.7996639793766864
Cost after epoch 1868000: 1.7996384804626704
Cost after epoch 1869000: 1.7996129860185852
Cost after epoch 1870000: 1.7995874960433338
Cost after epoch 1871000: 1.79956201053582
Cost after epoch 1872000: 1.7995365294949468
Cost after epoch 1873000: 1.7995110529196188
Cost after epoch 1874000: 1.7994855808087395
Cost after epoch 1875000: 1.7994601131612147
Cost after epoch 1876000: 1.7994346499759486
Cost after epoch 1877000: 1.799409191251847
Cost after epoch 1878000: 1.799383736987815
Cost after epoch 1879000: 1.7993582871827587
Cost after epoch 1880000: 1.7993328418355845
Cost after epoch 1881000: 1.7993074009451988
Cost after epoch 1882000: 1.7992819645105083
Cost after epoch 1883000: 1.7992565325304202
Cost after epoch 1884000: 1.7992311050038428
Cost after epoch 1885000: 1.7992056819296824
Cost after epoch 1886000: 1.7991802633068483
Cost after epoch 

Cost after epoch 2049000: 1.7950957309984068
Cost after epoch 2050000: 1.7950710278515118
Cost after epoch 2051000: 1.79504632898
Cost after epoch 2052000: 1.795021634382831
Cost after epoch 2053000: 1.7949969440589633
Cost after epoch 2054000: 1.794972258007356
Cost after epoch 2055000: 1.7949475762269689
Cost after epoch 2056000: 1.7949228987167622
Cost after epoch 2057000: 1.7948982254756958
Cost after epoch 2058000: 1.7948735565027312
Cost after epoch 2059000: 1.7948488917968284
Cost after epoch 2060000: 1.7948242313569485
Cost after epoch 2061000: 1.7947995751820531
Cost after epoch 2062000: 1.794774923271105
Cost after epoch 2063000: 1.7947502756230647
Cost after epoch 2064000: 1.7947256322368956
Cost after epoch 2065000: 1.7947009931115598
Cost after epoch 2066000: 1.7946763582460203
Cost after epoch 2067000: 1.7946517276392404
Cost after epoch 2068000: 1.794627101290183
Cost after epoch 2069000: 1.7946024791978135
Cost after epoch 2070000: 1.7945778613610948
Cost after epoch 20

Cost after epoch 2234000: 1.7905973487029039
Cost after epoch 2235000: 1.7905734190842706
Cost after epoch 2236000: 1.7905494935533524
Cost after epoch 2237000: 1.7905255721091609
Cost after epoch 2238000: 1.7905016547507073
Cost after epoch 2239000: 1.7904777414770063
Cost after epoch 2240000: 1.7904538322870696
Cost after epoch 2241000: 1.7904299271799107
Cost after epoch 2242000: 1.7904060261545438
Cost after epoch 2243000: 1.790382129209982
Cost after epoch 2244000: 1.7903582363452386
Cost after epoch 2245000: 1.7903343475593292
Cost after epoch 2246000: 1.7903104628512678
Cost after epoch 2247000: 1.7902865822200702
Cost after epoch 2248000: 1.7902627056647502
Cost after epoch 2249000: 1.7902388331843244
Cost after epoch 2250000: 1.7902149647778078
Cost after epoch 2251000: 1.7901911004442173
Cost after epoch 2252000: 1.7901672401825681
Cost after epoch 2253000: 1.790143383991878
Cost after epoch 2254000: 1.7901195318711636
Cost after epoch 2255000: 1.7900956838194415
Cost after e

In [61]:
WRD_EMB = parameters['WRD_EMB']

In [62]:
def find_top_n_similar(word, wrd_emb, n=10):
    id_ = word_to_id_toy[word]
    vec_word = wrd_emb[id_, :]
    norm_vec_word = np.linalg.norm(vec_word)
    cos_sim = np.dot(wrd_emb, vec_word.T) / (np.linalg.norm(wrd_emb, axis=1) * norm_vec_word)
    top_n_ind = np.argsort(cos_sim)[-n:][::-1]
    return [id_to_word_toy[id_] for id_ in top_n_ind]

In [66]:
find_top_n_similar('lazy', WRD_EMB)

['lazy', 'the', 'quick', 'jumped', 'brown', 'over', 'fox', 'dog']