## Introduction

The goal of word2vec and most NLP embedding schemes is to translate text into vectors so that they can then be processed using operations from linear algebra. Vectorizing text data allows us to then create predictive models that use these vectors as input to then perform something useful.

Word2vec is actually a collection of two different methods: continuous bag-of-words (CBOW) and skip-gram.

In [51]:

#--- IMPORT DEPENDENCIES ------------------------------------------------------+

import numpy as np
import re
from collections import defaultdict

#--- CONSTANTS ----------------------------------------------------------------+


class Word2Vec():
    def __init__ (self):
        self.n = settings['n']
        self.eta = settings['learning_rate']
        self.epochs = settings['epochs']
        self.window = settings['window_size']
        pass
    
    
    # GENERATE TRAINING DATA
    def generate_training_data(self, settings, corpus):

        # GENERATE WORD COUNTS
        word_counts = defaultdict(int)
        for row in corpus:
            for word in row:
                word_counts[word] += 1

        self.v_count = len(word_counts.keys())

        # GENERATE LOOKUP DICTIONARIES
        self.words_list = sorted(list(word_counts.keys()),reverse=False)
        self.word_index = dict((word, i) for i, word in enumerate(self.words_list))
        self.index_word = dict((i, word) for i, word in enumerate(self.words_list))

        training_data = []
        # CYCLE THROUGH EACH SENTENCE IN CORPUS
        for sentence in corpus:
            sent_len = len(sentence)

            # CYCLE THROUGH EACH WORD IN SENTENCE
            for i, word in enumerate(sentence):
                
                #w_target  = sentence[i]
                w_target = self.word2onehot(sentence[i])

                # CYCLE THROUGH CONTEXT WINDOW
                w_context = []
                for j in range(i-self.window, i+self.window+1):
                    if j!=i and j<=sent_len-1 and j>=0:
                        w_context.append(self.word2onehot(sentence[j]))
                training_data.append([w_target, w_context])
        return np.array(training_data)


    # SOFTMAX ACTIVATION FUNCTION
    def softmax(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=0)


    # CONVERT WORD TO ONE HOT ENCODING
    def word2onehot(self, word):
        word_vec = [0 for i in range(0, self.v_count)]
        word_index = self.word_index[word]
        word_vec[word_index] = 1
        return word_vec


    # FORWARD PASS
    def forward_pass(self, x):
        h = np.dot(self.w1.T, x)
        u = np.dot(self.w2.T, h)
        y_c = self.softmax(u)
        return y_c, h, u
                

    # BACKPROPAGATION
    def backprop(self, e, h, x):
        dl_dw2 = np.outer(h, e)  
        dl_dw1 = np.outer(x, np.dot(self.w2, e.T))

        # UPDATE WEIGHTS
        self.w1 = self.w1 - (self.eta * dl_dw1)
        self.w2 = self.w2 - (self.eta * dl_dw2)
        pass


    # TRAIN W2V model
    def train(self, training_data):
        # INITIALIZE WEIGHT MATRICES
        self.w1 = np.random.uniform(-0.8, 0.8, (self.v_count, self.n))     # context matrix
        self.w2 = np.random.uniform(-0.8, 0.8, (self.n, self.v_count))     # embedding matrix
        
        # CYCLE THROUGH EACH EPOCH
        for i in range(0, self.epochs):

            self.loss = 0

            # CYCLE THROUGH EACH TRAINING SAMPLE
            for w_t, w_c in training_data:

                # FORWARD PASS
                y_pred, h, u = self.forward_pass(w_t)
                
                # CALCULATE ERROR
                EI = np.sum([np.subtract(y_pred, word) for word in w_c], axis=0)

                # BACKPROPAGATION
                self.backprop(EI, h, w_t)

                # CALCULATE LOSS
                self.loss += -np.sum([u[word.index(1)] for word in w_c]) + len(w_c) * np.log(np.sum(np.exp(u)))
                self.loss += -2*np.log(len(w_c)) -np.sum([u[word.index(1)] for word in w_c]) + (len(w_c) * np.log(np.sum(np.exp(u))))
                
            print ('EPOCH:',i, 'LOSS:', self.loss)
        pass


    # input a word, returns a vector (if available)
    def word_vec(self, word):
        w_index = self.word_index[word]
        v_w = self.w1[w_index]
        return v_w


    # input a vector, returns nearest word(s)
    def vec_sim(self, vec, top_n):

        # CYCLE THROUGH VOCAB
        word_sim = {}
        for i in range(self.v_count):
            v_w2 = self.w1[i]
            theta_num = np.dot(vec, v_w2)
            theta_den = np.linalg.norm(vec) * np.linalg.norm(v_w2)
            theta = theta_num / theta_den

            word = self.index_word[i]
            word_sim[word] = theta

        words_sorted = sorted(word_sim.items(),key=lambda x:x[1],reverse=True)

        for word, sim in words_sorted[:top_n]:
            print (word, sim)

        pass

    # input word, returns top [n] most similar words
    def word_sim(self, word, top_n):

        w1_index = self.word_index[word]
        v_w1 = self.w1[w1_index]

        # CYCLE THROUGH VOCAB
        word_sim = {}
        for i in range(self.v_count):
            v_w2 = self.w1[i]
            theta_num = np.dot(v_w1, v_w2)
            theta_den = np.linalg.norm(v_w1) * np.linalg.norm(v_w2)
            theta = theta_num / theta_den

            word = self.index_word[i]
            word_sim[word] = theta

        words_sorted = sorted(word_sim.items(), key=lambda x:x[1], reverse=True)

        for word, sim in words_sorted[:top_n]:
            print (word, sim)

        pass

In [60]:
#--- EXAMPLE RUN --------------------------------------------------------------+

settings = {}
settings['n'] = 5                   # dimension of word embeddings
settings['window_size'] = 2         # context window +/- center word
settings['min_count'] = 0           # minimum word count
settings['epochs'] = 4000           # number of training epochs
settings['neg_samp'] = 10           # number of negative words to use during training
settings['learning_rate'] = 0.01    # learning rate
np.random.seed(0)                   # set the seed for reproducibility

#corpus = [['the','quick','brown','fox','jumped','over','the','lazy','dog'], 
#          ['dog', 'and', 'cat', 'are', 'doministic', 'animals'],
#          ['cat', 'and', 'fox', 'are', 'similar']]

corpus = [['he', 'is', 'the', 'king'], 
          ['the', 'king', 'is', 'royal'], 
          ['she', 'is', 'the', 'royal', 'queen']]

# INITIALIZE W2V MODEL
w2v = Word2Vec()

# generate training data
training_data = w2v.generate_training_data(settings, corpus)

# train word2vec model
w2v.train(training_data)


#--- END ----------------------------------------------------------------------+

EPOCH: 0 LOSS: 113.15350137522731
EPOCH: 1 LOSS: 111.37612994879629
EPOCH: 2 LOSS: 109.7663314860884
EPOCH: 3 LOSS: 108.29598932991907
EPOCH: 4 LOSS: 106.94268282290022
EPOCH: 5 LOSS: 105.68836205586292
EPOCH: 6 LOSS: 104.51837378546573
EPOCH: 7 LOSS: 103.4207353208215
EPOCH: 8 LOSS: 102.38558633227478
EPOCH: 9 LOSS: 101.40477016127396
EPOCH: 10 LOSS: 100.47151059692719
EPOCH: 11 LOSS: 99.58015982714466
EPOCH: 12 LOSS: 98.72599997745613
EPOCH: 13 LOSS: 97.90508533561527
EPOCH: 14 LOSS: 97.11411568107742
EPOCH: 15 LOSS: 96.35033352619195
EPOCH: 16 LOSS: 95.61143981783238
EPOCH: 17 LOSS: 94.89552393846598
EPOCH: 18 LOSS: 94.20100481710328
EPOCH: 19 LOSS: 93.52658070455792
EPOCH: 20 LOSS: 92.87118574706916
EPOCH: 21 LOSS: 92.23395195097464
EPOCH: 22 LOSS: 91.61417549829306
EPOCH: 23 LOSS: 91.01128666850389
EPOCH: 24 LOSS: 90.42482285835436
EPOCH: 25 LOSS: 89.85440437741146
EPOCH: 26 LOSS: 89.29971283763561
EPOCH: 27 LOSS: 88.76047205431715
EPOCH: 28 LOSS: 88.23643143669331
EPOCH: 29 LOSS:

EPOCH: 268 LOSS: 67.16013004945681
EPOCH: 269 LOSS: 67.153187818425
EPOCH: 270 LOSS: 67.14632697569827
EPOCH: 271 LOSS: 67.13954627663763
EPOCH: 272 LOSS: 67.13284449907772
EPOCH: 273 LOSS: 67.1262204428903
EPOCH: 274 LOSS: 67.11967292955562
EPOCH: 275 LOSS: 67.11320080174167
EPOCH: 276 LOSS: 67.10680292289156
EPOCH: 277 LOSS: 67.10047817681851
EPOCH: 278 LOSS: 67.09422546730843
EPOCH: 279 LOSS: 67.08804371773019
EPOCH: 280 LOSS: 67.0819318706532
EPOCH: 281 LOSS: 67.07588888747237
EPOCH: 282 LOSS: 67.06991374804035
EPOCH: 283 LOSS: 67.06400545030672
EPOCH: 284 LOSS: 67.05816300996428
EPOCH: 285 LOSS: 67.05238546010223
EPOCH: 286 LOSS: 67.04667185086603
EPOCH: 287 LOSS: 67.04102124912413
EPOCH: 288 LOSS: 67.03543273814091
EPOCH: 289 LOSS: 67.02990541725659
EPOCH: 290 LOSS: 67.0244384015727
EPOCH: 291 LOSS: 67.01903082164478
EPOCH: 292 LOSS: 67.01368182318022
EPOCH: 293 LOSS: 67.00839056674283
EPOCH: 294 LOSS: 67.00315622746302
EPOCH: 295 LOSS: 66.99797799475374
EPOCH: 296 LOSS: 66.99285

EPOCH: 579 LOSS: 66.45300274134314
EPOCH: 580 LOSS: 66.45235870340647
EPOCH: 581 LOSS: 66.45171794734094
EPOCH: 582 LOSS: 66.45108044991677
EPOCH: 583 LOSS: 66.45044618810914
EPOCH: 584 LOSS: 66.4498151390957
EPOCH: 585 LOSS: 66.44918728025466
EPOCH: 586 LOSS: 66.44856258916249
EPOCH: 587 LOSS: 66.44794104359197
EPOCH: 588 LOSS: 66.4473226215101
EPOCH: 589 LOSS: 66.44670730107603
EPOCH: 590 LOSS: 66.44609506063907
EPOCH: 591 LOSS: 66.44548587873669
EPOCH: 592 LOSS: 66.44487973409254
EPOCH: 593 LOSS: 66.44427660561462
EPOCH: 594 LOSS: 66.44367647239318
EPOCH: 595 LOSS: 66.44307931369902
EPOCH: 596 LOSS: 66.44248510898146
EPOCH: 597 LOSS: 66.44189383786659
EPOCH: 598 LOSS: 66.44130548015542
EPOCH: 599 LOSS: 66.44072001582207
EPOCH: 600 LOSS: 66.44013742501201
EPOCH: 601 LOSS: 66.43955768804025
EPOCH: 602 LOSS: 66.43898078538967
EPOCH: 603 LOSS: 66.43840669770927
EPOCH: 604 LOSS: 66.43783540581245
EPOCH: 605 LOSS: 66.43726689067539
EPOCH: 606 LOSS: 66.43670113343534
EPOCH: 607 LOSS: 66.43

EPOCH: 852 LOSS: 66.3505280330098
EPOCH: 853 LOSS: 66.35031234354798
EPOCH: 854 LOSS: 66.35009732404698
EPOCH: 855 LOSS: 66.34988297127681
EPOCH: 856 LOSS: 66.3496692820268
EPOCH: 857 LOSS: 66.34945625310547
EPOCH: 858 LOSS: 66.34924388134033
EPOCH: 859 LOSS: 66.3490321635777
EPOCH: 860 LOSS: 66.34882109668273
EPOCH: 861 LOSS: 66.34861067753911
EPOCH: 862 LOSS: 66.34840090304908
EPOCH: 863 LOSS: 66.3481917701331
EPOCH: 864 LOSS: 66.34798327572997
EPOCH: 865 LOSS: 66.34777541679657
EPOCH: 866 LOSS: 66.34756819030764
EPOCH: 867 LOSS: 66.34736159325583
EPOCH: 868 LOSS: 66.3471556226515
EPOCH: 869 LOSS: 66.34695027552254
EPOCH: 870 LOSS: 66.34674554891438
EPOCH: 871 LOSS: 66.34654143988972
EPOCH: 872 LOSS: 66.34633794552857
EPOCH: 873 LOSS: 66.34613506292789
EPOCH: 874 LOSS: 66.34593278920178
EPOCH: 875 LOSS: 66.34573112148112
EPOCH: 876 LOSS: 66.34553005691349
EPOCH: 877 LOSS: 66.34532959266322
EPOCH: 878 LOSS: 66.34512972591102
EPOCH: 879 LOSS: 66.34493045385413
EPOCH: 880 LOSS: 66.34473

EPOCH: 1162 LOSS: 66.30492434938822
EPOCH: 1163 LOSS: 66.30482236498608
EPOCH: 1164 LOSS: 66.30472056740882
EPOCH: 1165 LOSS: 66.30461895600104
EPOCH: 1166 LOSS: 66.30451753011012
EPOCH: 1167 LOSS: 66.30441628908649
EPOCH: 1168 LOSS: 66.30431523228339
EPOCH: 1169 LOSS: 66.3042143590569
EPOCH: 1170 LOSS: 66.30411366876608
EPOCH: 1171 LOSS: 66.30401316077277
EPOCH: 1172 LOSS: 66.30391283444163
EPOCH: 1173 LOSS: 66.30381268914014
EPOCH: 1174 LOSS: 66.30371272423864
EPOCH: 1175 LOSS: 66.30361293911014
EPOCH: 1176 LOSS: 66.30351333313055
EPOCH: 1177 LOSS: 66.30341390567847
EPOCH: 1178 LOSS: 66.30331465613524
EPOCH: 1179 LOSS: 66.30321558388498
EPOCH: 1180 LOSS: 66.30311668831446
EPOCH: 1181 LOSS: 66.30301796881321
EPOCH: 1182 LOSS: 66.30291942477342
EPOCH: 1183 LOSS: 66.30282105558997
EPOCH: 1184 LOSS: 66.30272286066034
EPOCH: 1185 LOSS: 66.30262483938476
EPOCH: 1186 LOSS: 66.30252699116603
EPOCH: 1187 LOSS: 66.30242931540955
EPOCH: 1188 LOSS: 66.30233181152339
EPOCH: 1189 LOSS: 66.30223447

EPOCH: 1434 LOSS: 66.28240390492716
EPOCH: 1435 LOSS: 66.2823357037437
EPOCH: 1436 LOSS: 66.28226758303892
EPOCH: 1437 LOSS: 66.28219954259545
EPOCH: 1438 LOSS: 66.2821315821968
EPOCH: 1439 LOSS: 66.28206370162707
EPOCH: 1440 LOSS: 66.28199590067142
EPOCH: 1441 LOSS: 66.2819281791156
EPOCH: 1442 LOSS: 66.2818605367462
EPOCH: 1443 LOSS: 66.28179297335065
EPOCH: 1444 LOSS: 66.28172548871716
EPOCH: 1445 LOSS: 66.28165808263458
EPOCH: 1446 LOSS: 66.28159075489273
EPOCH: 1447 LOSS: 66.28152350528208
EPOCH: 1448 LOSS: 66.28145633359391
EPOCH: 1449 LOSS: 66.28138923962022
EPOCH: 1450 LOSS: 66.28132222315386
EPOCH: 1451 LOSS: 66.28125528398836
EPOCH: 1452 LOSS: 66.28118842191802
EPOCH: 1453 LOSS: 66.2811216367379
EPOCH: 1454 LOSS: 66.2810549282438
EPOCH: 1455 LOSS: 66.28098829623235
EPOCH: 1456 LOSS: 66.28092174050077
EPOCH: 1457 LOSS: 66.2808552608471
EPOCH: 1458 LOSS: 66.28078885707018
EPOCH: 1459 LOSS: 66.28072252896945
EPOCH: 1460 LOSS: 66.28065627634517
EPOCH: 1461 LOSS: 66.2805900989983


EPOCH: 1709 LOSS: 66.26608666147416
EPOCH: 1710 LOSS: 66.266034518091
EPOCH: 1711 LOSS: 66.26598241680932
EPOCH: 1712 LOSS: 66.26593035754397
EPOCH: 1713 LOSS: 66.2658783402101
EPOCH: 1714 LOSS: 66.26582636472305
EPOCH: 1715 LOSS: 66.26577443099849
EPOCH: 1716 LOSS: 66.26572253895235
EPOCH: 1717 LOSS: 66.26567068850075
EPOCH: 1718 LOSS: 66.26561887956021
EPOCH: 1719 LOSS: 66.26556711204736
EPOCH: 1720 LOSS: 66.26551538587917
EPOCH: 1721 LOSS: 66.26546370097287
EPOCH: 1722 LOSS: 66.26541205724597
EPOCH: 1723 LOSS: 66.26536045461613
EPOCH: 1724 LOSS: 66.26530889300142
EPOCH: 1725 LOSS: 66.26525737231998
EPOCH: 1726 LOSS: 66.2652058924904
EPOCH: 1727 LOSS: 66.26515445343139
EPOCH: 1728 LOSS: 66.26510305506199
EPOCH: 1729 LOSS: 66.26505169730137
EPOCH: 1730 LOSS: 66.26500038006915
EPOCH: 1731 LOSS: 66.26494910328505
EPOCH: 1732 LOSS: 66.26489786686903
EPOCH: 1733 LOSS: 66.26484667074133
EPOCH: 1734 LOSS: 66.26479551482254
EPOCH: 1735 LOSS: 66.26474439903333
EPOCH: 1736 LOSS: 66.26469332329

EPOCH: 2011 LOSS: 66.2519399886515
EPOCH: 2012 LOSS: 66.2518976344085
EPOCH: 2013 LOSS: 66.25185530527845
EPOCH: 2014 LOSS: 66.25181300122568
EPOCH: 2015 LOSS: 66.25177072221453
EPOCH: 2016 LOSS: 66.25172846820949
EPOCH: 2017 LOSS: 66.25168623917513
EPOCH: 2018 LOSS: 66.2516440350761
EPOCH: 2019 LOSS: 66.25160185587714
EPOCH: 2020 LOSS: 66.25155970154307
EPOCH: 2021 LOSS: 66.2515175720389
EPOCH: 2022 LOSS: 66.25147546732958
EPOCH: 2023 LOSS: 66.25143338738025
EPOCH: 2024 LOSS: 66.25139133215612
EPOCH: 2025 LOSS: 66.2513493016225
EPOCH: 2026 LOSS: 66.25130729574474
EPOCH: 2027 LOSS: 66.25126531448831
EPOCH: 2028 LOSS: 66.25122335781882
EPOCH: 2029 LOSS: 66.25118142570189
EPOCH: 2030 LOSS: 66.25113951810333
EPOCH: 2031 LOSS: 66.25109763498888
EPOCH: 2032 LOSS: 66.25105577632449
EPOCH: 2033 LOSS: 66.25101394207624
EPOCH: 2034 LOSS: 66.25097213221015
EPOCH: 2035 LOSS: 66.25093034669243
EPOCH: 2036 LOSS: 66.25088858548936
EPOCH: 2037 LOSS: 66.25084684856729
EPOCH: 2038 LOSS: 66.250805135892

EPOCH: 2310 LOSS: 66.24026291626433
EPOCH: 2311 LOSS: 66.24022681071176
EPOCH: 2312 LOSS: 66.24019072271068
EPOCH: 2313 LOSS: 66.24015465224363
EPOCH: 2314 LOSS: 66.24011859929311
EPOCH: 2315 LOSS: 66.24008256384155
EPOCH: 2316 LOSS: 66.24004654587159
EPOCH: 2317 LOSS: 66.24001054536579
EPOCH: 2318 LOSS: 66.23997456230678
EPOCH: 2319 LOSS: 66.23993859667728
EPOCH: 2320 LOSS: 66.23990264845997
EPOCH: 2321 LOSS: 66.2398667176376
EPOCH: 2322 LOSS: 66.23983080419293
EPOCH: 2323 LOSS: 66.23979490810882
EPOCH: 2324 LOSS: 66.2397590293681
EPOCH: 2325 LOSS: 66.23972316795366
EPOCH: 2326 LOSS: 66.23968732384843
EPOCH: 2327 LOSS: 66.23965149703542
EPOCH: 2328 LOSS: 66.23961568749758
EPOCH: 2329 LOSS: 66.23957989521793
EPOCH: 2330 LOSS: 66.23954412017957
EPOCH: 2331 LOSS: 66.23950836236561
EPOCH: 2332 LOSS: 66.2394726217592
EPOCH: 2333 LOSS: 66.23943689834351
EPOCH: 2334 LOSS: 66.23940119210172
EPOCH: 2335 LOSS: 66.2393655030171
EPOCH: 2336 LOSS: 66.23932983107298
EPOCH: 2337 LOSS: 66.23929417625

EPOCH: 2550 LOSS: 66.23206546133896
EPOCH: 2551 LOSS: 66.23203313727753
EPOCH: 2552 LOSS: 66.2320008274225
EPOCH: 2553 LOSS: 66.23196853176286
EPOCH: 2554 LOSS: 66.23193625028765
EPOCH: 2555 LOSS: 66.23190398298583
EPOCH: 2556 LOSS: 66.23187172984645
EPOCH: 2557 LOSS: 66.23183949085858
EPOCH: 2558 LOSS: 66.2318072660113
EPOCH: 2559 LOSS: 66.23177505529372
EPOCH: 2560 LOSS: 66.23174285869491
EPOCH: 2561 LOSS: 66.23171067620407
EPOCH: 2562 LOSS: 66.2316785078103
EPOCH: 2563 LOSS: 66.23164635350281
EPOCH: 2564 LOSS: 66.23161421327077
EPOCH: 2565 LOSS: 66.23158208710339
EPOCH: 2566 LOSS: 66.23154997498993
EPOCH: 2567 LOSS: 66.23151787691958
EPOCH: 2568 LOSS: 66.23148579288166
EPOCH: 2569 LOSS: 66.23145372286542
EPOCH: 2570 LOSS: 66.23142166686021
EPOCH: 2571 LOSS: 66.2313896248553
EPOCH: 2572 LOSS: 66.23135759684004
EPOCH: 2573 LOSS: 66.23132558280378
EPOCH: 2574 LOSS: 66.23129358273594
EPOCH: 2575 LOSS: 66.23126159662588
EPOCH: 2576 LOSS: 66.23122962446303
EPOCH: 2577 LOSS: 66.23119766623

EPOCH: 2826 LOSS: 66.22364894888614
EPOCH: 2827 LOSS: 66.22362018416672
EPOCH: 2828 LOSS: 66.22359143119891
EPOCH: 2829 LOSS: 66.22356268997551
EPOCH: 2830 LOSS: 66.22353396048936
EPOCH: 2831 LOSS: 66.22350524273324
EPOCH: 2832 LOSS: 66.22347653670003
EPOCH: 2833 LOSS: 66.22344784238255
EPOCH: 2834 LOSS: 66.22341915977367
EPOCH: 2835 LOSS: 66.22339048886626
EPOCH: 2836 LOSS: 66.22336182965316
EPOCH: 2837 LOSS: 66.2233331821273
EPOCH: 2838 LOSS: 66.22330454628157
EPOCH: 2839 LOSS: 66.22327592210881
EPOCH: 2840 LOSS: 66.22324730960202
EPOCH: 2841 LOSS: 66.22321870875408
EPOCH: 2842 LOSS: 66.22319011955793
EPOCH: 2843 LOSS: 66.2231615420065
EPOCH: 2844 LOSS: 66.22313297609276
EPOCH: 2845 LOSS: 66.22310442180965
EPOCH: 2846 LOSS: 66.22307587915012
EPOCH: 2847 LOSS: 66.22304734810726
EPOCH: 2848 LOSS: 66.22301882867393
EPOCH: 2849 LOSS: 66.22299032084315
EPOCH: 2850 LOSS: 66.22296182460796
EPOCH: 2851 LOSS: 66.2229333399614
EPOCH: 2852 LOSS: 66.22290486689643
EPOCH: 2853 LOSS: 66.2228764054

EPOCH: 3139 LOSS: 66.2151863195621
EPOCH: 3140 LOSS: 66.21516092346525
EPOCH: 3141 LOSS: 66.21513553724883
EPOCH: 3142 LOSS: 66.21511016090784
EPOCH: 3143 LOSS: 66.21508479443725
EPOCH: 3144 LOSS: 66.21505943783207
EPOCH: 3145 LOSS: 66.21503409108735
EPOCH: 3146 LOSS: 66.21500875419805
EPOCH: 3147 LOSS: 66.21498342715924
EPOCH: 3148 LOSS: 66.21495810996593
EPOCH: 3149 LOSS: 66.21493280261312
EPOCH: 3150 LOSS: 66.21490750509591
EPOCH: 3151 LOSS: 66.21488221740927
EPOCH: 3152 LOSS: 66.21485693954833
EPOCH: 3153 LOSS: 66.21483167150807
EPOCH: 3154 LOSS: 66.21480641328355
EPOCH: 3155 LOSS: 66.21478116486986
EPOCH: 3156 LOSS: 66.21475592626206
EPOCH: 3157 LOSS: 66.21473069745522
EPOCH: 3158 LOSS: 66.21470547844439
EPOCH: 3159 LOSS: 66.21468026922467
EPOCH: 3160 LOSS: 66.21465506979115
EPOCH: 3161 LOSS: 66.21462988013892
EPOCH: 3162 LOSS: 66.21460470026305
EPOCH: 3163 LOSS: 66.21457953015869
EPOCH: 3164 LOSS: 66.21455436982087
EPOCH: 3165 LOSS: 66.21452921924475
EPOCH: 3166 LOSS: 66.21450407

EPOCH: 3438 LOSS: 66.20801244706063
EPOCH: 3439 LOSS: 66.20798980132518
EPOCH: 3440 LOSS: 66.20796716415812
EPOCH: 3441 LOSS: 66.20794453555551
EPOCH: 3442 LOSS: 66.20792191551351
EPOCH: 3443 LOSS: 66.20789930402822
EPOCH: 3444 LOSS: 66.20787670109581
EPOCH: 3445 LOSS: 66.20785410671243
EPOCH: 3446 LOSS: 66.20783152087418
EPOCH: 3447 LOSS: 66.20780894357725
EPOCH: 3448 LOSS: 66.20778637481776
EPOCH: 3449 LOSS: 66.20776381459187
EPOCH: 3450 LOSS: 66.20774126289574
EPOCH: 3451 LOSS: 66.20771871972552
EPOCH: 3452 LOSS: 66.20769618507735
EPOCH: 3453 LOSS: 66.20767365894741
EPOCH: 3454 LOSS: 66.20765114133192
EPOCH: 3455 LOSS: 66.20762863222697
EPOCH: 3456 LOSS: 66.20760613162874
EPOCH: 3457 LOSS: 66.20758363953344
EPOCH: 3458 LOSS: 66.20756115593726
EPOCH: 3459 LOSS: 66.20753868083631
EPOCH: 3460 LOSS: 66.20751621422683
EPOCH: 3461 LOSS: 66.20749375610501
EPOCH: 3462 LOSS: 66.207471306467
EPOCH: 3463 LOSS: 66.20744886530902
EPOCH: 3464 LOSS: 66.20742643262723
EPOCH: 3465 LOSS: 66.207404008

EPOCH: 3698 LOSS: 66.20240233701541
EPOCH: 3699 LOSS: 66.20238179625969
EPOCH: 3700 LOSS: 66.20236126315092
EPOCH: 3701 LOSS: 66.20234073768592
EPOCH: 3702 LOSS: 66.2023202198613
EPOCH: 3703 LOSS: 66.20229970967395
EPOCH: 3704 LOSS: 66.20227920712053
EPOCH: 3705 LOSS: 66.20225871219785
EPOCH: 3706 LOSS: 66.20223822490262
EPOCH: 3707 LOSS: 66.20221774523162
EPOCH: 3708 LOSS: 66.20219727318161
EPOCH: 3709 LOSS: 66.20217680874937
EPOCH: 3710 LOSS: 66.20215635193165
EPOCH: 3711 LOSS: 66.20213590272517
EPOCH: 3712 LOSS: 66.20211546112677
EPOCH: 3713 LOSS: 66.20209502713318
EPOCH: 3714 LOSS: 66.20207460074118
EPOCH: 3715 LOSS: 66.20205418194759
EPOCH: 3716 LOSS: 66.20203377074911
EPOCH: 3717 LOSS: 66.20201336714253
EPOCH: 3718 LOSS: 66.20199297112467
EPOCH: 3719 LOSS: 66.20197258269229
EPOCH: 3720 LOSS: 66.20195220184216
EPOCH: 3721 LOSS: 66.2019318285711
EPOCH: 3722 LOSS: 66.20191146287586
EPOCH: 3723 LOSS: 66.20189110475329
EPOCH: 3724 LOSS: 66.20187075420007
EPOCH: 3725 LOSS: 66.201850411

EPOCH: 3981 LOSS: 66.19688275827681
EPOCH: 3982 LOSS: 66.19686425850878
EPOCH: 3983 LOSS: 66.1968457655368
EPOCH: 3984 LOSS: 66.19682727935805
EPOCH: 3985 LOSS: 66.19680879996973
EPOCH: 3986 LOSS: 66.19679032736907
EPOCH: 3987 LOSS: 66.19677186155332
EPOCH: 3988 LOSS: 66.19675340251965
EPOCH: 3989 LOSS: 66.1967349502653
EPOCH: 3990 LOSS: 66.19671650478749
EPOCH: 3991 LOSS: 66.19669806608348
EPOCH: 3992 LOSS: 66.19667963415047
EPOCH: 3993 LOSS: 66.19666120898566
EPOCH: 3994 LOSS: 66.19664279058635
EPOCH: 3995 LOSS: 66.19662437894972
EPOCH: 3996 LOSS: 66.19660597407298
EPOCH: 3997 LOSS: 66.19658757595343
EPOCH: 3998 LOSS: 66.19656918458826
EPOCH: 3999 LOSS: 66.19655079997466


In [61]:
w2v.word_sim('queen', 3)

queen 1.0
king 0.6787159441840385
is 0.5014178080332233


References:

https://nathanrooy.github.io/posts/2018-03-22/word2vec-from-scratch-with-python-and-numpy/