# Learn W2Vec 
https://towardsdatascience.com/learn-word2vec-by-implementing-it-in-tensorflow-45641adaf2ac

For an efficient implementation of word2vec try gensim with some corpus like text8.
https://radimrehurek.com/gensim/models/word2vec.html

other resources
http://colah.github.io/posts/2014-07-NLP-RNNs-Representations/


In [1]:
import numpy as np
import tensorflow as tf
corpus_raw = 'He is the king . The king is royal . She is the royal  queen '
# convert to lower case
corpus_raw = corpus_raw.lower()

In [3]:
# words to integers
words = []
for word in corpus_raw.split():
    if word != '.': # because we don't want to treat . as a word
        words.append(word)
words = set(words) # so that all duplicate words are removed
word2int = {}
int2word = {}
vocab_size = len(words) # gives the total number of unique words
for i,word in enumerate(words):
    word2int[word] = i
    int2word[i] = word

In [10]:
# raw sentences is a list of sentences.
raw_sentences = corpus_raw.split('.')
sentences = []
for sentence in raw_sentences:
    sentences.append(sentence.split())

In [12]:
data = []
WINDOW_SIZE = 2
for sentence in sentences:
    for word_index, word in enumerate(sentence):
        for nb_word in sentence[max(word_index - WINDOW_SIZE, 0) : min(word_index + WINDOW_SIZE, len(sentence)) + 1] : 
            if nb_word != word:
                data.append([word, nb_word])

In [15]:
# one hot vectors 
# i.e., 
# say we have a vocabulary of 3 words : pen, pineapple, apple
# where 
# word2int['pen'] -> 0 -> [1 0 0]
# word2int['pineapple'] -> 1 -> [0 1 0]
# word2int['apple'] -> 2 -> [0 0 1]
# function to convert numbers to one hot vectors
def to_one_hot(data_point_index, vocab_size):
    temp = np.zeros(vocab_size)
    temp[data_point_index] = 1
    return temp
x_train = [] # input word
y_train = [] # output word
for data_word in data:
    x_train.append(to_one_hot(word2int[ data_word[0] ], vocab_size))
    y_train.append(to_one_hot(word2int[ data_word[1] ], vocab_size))
    
# convert them to numpy arrays
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)

In [24]:
print(x_train.shape, y_train.shape)  # (34, 7) meaning 34 training points, where each point has 7 dimensions

(34, 7) (34, 7)


In [23]:
print(corpus_raw)
print(words)
print(word2int)
print(word2int['queen'])
print(int2word[4])
print(sentences)
print('\ndata')
print(data)
print('\nonehotvectors')
# print(x_train)
for i in range(len(x_train)):
    print(str(x_train[i]) + "         " + str(y_train[i]))

he is the king . the king is royal . she is the royal  queen 
{'she', 'is', 'king', 'he', 'queen', 'the', 'royal'}
{'is': 1, 'king': 2, 'he': 3, 'queen': 4, 'royal': 6, 'she': 0, 'the': 5}
4
queen
[['he', 'is', 'the', 'king'], ['the', 'king', 'is', 'royal'], ['she', 'is', 'the', 'royal', 'queen']]

data
[['he', 'is'], ['he', 'the'], ['is', 'he'], ['is', 'the'], ['is', 'king'], ['the', 'he'], ['the', 'is'], ['the', 'king'], ['king', 'is'], ['king', 'the'], ['the', 'king'], ['the', 'is'], ['king', 'the'], ['king', 'is'], ['king', 'royal'], ['is', 'the'], ['is', 'king'], ['is', 'royal'], ['royal', 'king'], ['royal', 'is'], ['she', 'is'], ['she', 'the'], ['is', 'she'], ['is', 'the'], ['is', 'royal'], ['the', 'she'], ['the', 'is'], ['the', 'royal'], ['the', 'queen'], ['royal', 'is'], ['royal', 'the'], ['royal', 'queen'], ['queen', 'the'], ['queen', 'royal']]

onehotvectors
[0. 0. 0. 1. 0. 0. 0.]         [0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0.]         [0. 0. 0. 0. 0. 1. 0.]
[0. 1. 0. 0

## model

In [25]:
# making placeholders for x_train and y_train
x = tf.placeholder(tf.float32, shape=(None, vocab_size))
y_label = tf.placeholder(tf.float32, shape=(None, vocab_size))

In [26]:
EMBEDDING_DIM = 5 # you can choose your own number
W1 = tf.Variable(tf.random_normal([vocab_size, EMBEDDING_DIM]))
b1 = tf.Variable(tf.random_normal([EMBEDDING_DIM])) #bias
hidden_representation = tf.add(tf.matmul(x,W1), b1)

In [27]:
W2 = tf.Variable(tf.random_normal([EMBEDDING_DIM, vocab_size]))
b2 = tf.Variable(tf.random_normal([vocab_size]))
prediction = tf.nn.softmax(tf.add( tf.matmul(hidden_representation, W2), b2))

In [28]:
# input_one_hot  --->  embedded repr. ---> predicted_neighbour_prob
# predicted_prob will be compared against a one hot vector to correct it.

In [29]:
#train
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init) #make sure you do this!
# define the loss function:
cross_entropy_loss = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(prediction), reduction_indices=[1]))
# define the training step:
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy_loss)
n_iters = 10000
# train for n_iter iterations
for _ in range(n_iters):
    sess.run(train_step, feed_dict={x: x_train, y_label: y_train})
    print('loss is : ', sess.run(cross_entropy_loss, feed_dict={x: x_train, y_label: y_train}))

loss is :  3.2796154
loss is :  3.0701673
loss is :  2.8952258
loss is :  2.750759
loss is :  2.6327817
loss is :  2.5369833
loss is :  2.4589076
loss is :  2.3944123
loss is :  2.3400483
loss is :  2.2931902
loss is :  2.2519584
loss is :  2.2150528
loss is :  2.1815836
loss is :  2.1509376
loss is :  2.1226811
loss is :  2.0965009
loss is :  2.0721586
loss is :  2.0494685
loss is :  2.0282788
loss is :  2.0084617
loss is :  1.9899073
loss is :  1.9725171
loss is :  1.956203
loss is :  1.9408839
loss is :  1.926485
loss is :  1.9129372
loss is :  1.9001752
loss is :  1.8881391
loss is :  1.8767728
loss is :  1.8660231
loss is :  1.8558416
loss is :  1.8461835
loss is :  1.8370069
loss is :  1.8282734
loss is :  1.8199481
loss is :  1.8119985
loss is :  1.8043957
loss is :  1.7971127
loss is :  1.7901255
loss is :  1.783412
loss is :  1.7769524
loss is :  1.7707287
loss is :  1.7647244
loss is :  1.7589248
loss is :  1.7533168
loss is :  1.7478878
loss is :  1.7426271
loss is :  1.7375

loss is :  1.3822054
loss is :  1.3819495
loss is :  1.3816948
loss is :  1.3814411
loss is :  1.3811886
loss is :  1.3809375
loss is :  1.3806872
loss is :  1.3804384
loss is :  1.3801906
loss is :  1.3799438
loss is :  1.3796984
loss is :  1.379454
loss is :  1.3792107
loss is :  1.3789685
loss is :  1.3787276
loss is :  1.3784876
loss is :  1.3782489
loss is :  1.3780111
loss is :  1.3777747
loss is :  1.3775392
loss is :  1.3773048
loss is :  1.3770715
loss is :  1.3768394
loss is :  1.3766083
loss is :  1.3763782
loss is :  1.3761492
loss is :  1.3759215
loss is :  1.3756945
loss is :  1.3754689
loss is :  1.3752443
loss is :  1.3750205
loss is :  1.3747979
loss is :  1.3745764
loss is :  1.374356
loss is :  1.3741366
loss is :  1.3739182
loss is :  1.3737007
loss is :  1.3734845
loss is :  1.3732692
loss is :  1.3730549
loss is :  1.3728417
loss is :  1.3726293
loss is :  1.3724182
loss is :  1.3722079
loss is :  1.3719987
loss is :  1.3717904
loss is :  1.371583
loss is :  1.371

loss is :  1.3348898
loss is :  1.3348576
loss is :  1.3348256
loss is :  1.3347938
loss is :  1.3347621
loss is :  1.3347303
loss is :  1.3346987
loss is :  1.3346673
loss is :  1.334636
loss is :  1.334605
loss is :  1.334574
loss is :  1.3345429
loss is :  1.334512
loss is :  1.3344812
loss is :  1.3344507
loss is :  1.3344202
loss is :  1.3343897
loss is :  1.3343595
loss is :  1.3343295
loss is :  1.3342993
loss is :  1.3342695
loss is :  1.3342395
loss is :  1.3342098
loss is :  1.3341802
loss is :  1.3341508
loss is :  1.3341211
loss is :  1.334092
loss is :  1.3340627
loss is :  1.3340336
loss is :  1.3340046
loss is :  1.3339758
loss is :  1.3339471
loss is :  1.3339183
loss is :  1.3338897
loss is :  1.3338612
loss is :  1.3338327
loss is :  1.3338046
loss is :  1.3337764
loss is :  1.3337483
loss is :  1.3337202
loss is :  1.3336926
loss is :  1.3336648
loss is :  1.3336371
loss is :  1.3336095
loss is :  1.3335818
loss is :  1.3335545
loss is :  1.3335273
loss is :  1.33349

loss is :  1.3268409
loss is :  1.3268329
loss is :  1.3268251
loss is :  1.3268173
loss is :  1.3268094
loss is :  1.3268014
loss is :  1.3267938
loss is :  1.3267858
loss is :  1.326778
loss is :  1.3267703
loss is :  1.3267626
loss is :  1.3267548
loss is :  1.3267471
loss is :  1.3267394
loss is :  1.3267317
loss is :  1.326724
loss is :  1.3267164
loss is :  1.3267088
loss is :  1.3267013
loss is :  1.3266935
loss is :  1.3266859
loss is :  1.3266784
loss is :  1.3266709
loss is :  1.3266633
loss is :  1.3266559
loss is :  1.3266484
loss is :  1.3266408
loss is :  1.3266333
loss is :  1.326626
loss is :  1.3266184
loss is :  1.326611
loss is :  1.3266035
loss is :  1.3265963
loss is :  1.3265887
loss is :  1.3265815
loss is :  1.3265741
loss is :  1.3265668
loss is :  1.3265594
loss is :  1.3265522
loss is :  1.3265449
loss is :  1.3265376
loss is :  1.3265305
loss is :  1.3265232
loss is :  1.3265159
loss is :  1.3265086
loss is :  1.3265015
loss is :  1.3264942
loss is :  1.3264

loss is :  1.3243275
loss is :  1.3243243
loss is :  1.324321
loss is :  1.3243177
loss is :  1.3243145
loss is :  1.3243113
loss is :  1.324308
loss is :  1.3243048
loss is :  1.3243016
loss is :  1.3242984
loss is :  1.3242952
loss is :  1.324292
loss is :  1.3242886
loss is :  1.3242855
loss is :  1.3242824
loss is :  1.3242791
loss is :  1.3242759
loss is :  1.3242728
loss is :  1.3242694
loss is :  1.3242663
loss is :  1.3242633
loss is :  1.32426
loss is :  1.3242569
loss is :  1.3242539
loss is :  1.3242506
loss is :  1.3242472
loss is :  1.3242444
loss is :  1.3242412
loss is :  1.3242381
loss is :  1.3242348
loss is :  1.3242319
loss is :  1.3242288
loss is :  1.3242254
loss is :  1.3242224
loss is :  1.3242193
loss is :  1.3242162
loss is :  1.3242131
loss is :  1.3242102
loss is :  1.3242068
loss is :  1.3242038
loss is :  1.3242009
loss is :  1.3241978
loss is :  1.3241947
loss is :  1.3241916
loss is :  1.3241886
loss is :  1.3241855
loss is :  1.3241825
loss is :  1.32417

loss is :  1.3232193
loss is :  1.3232175
loss is :  1.3232157
loss is :  1.3232139
loss is :  1.3232121
loss is :  1.3232102
loss is :  1.3232087
loss is :  1.3232068
loss is :  1.323205
loss is :  1.3232032
loss is :  1.3232014
loss is :  1.3231997
loss is :  1.323198
loss is :  1.3231963
loss is :  1.3231944
loss is :  1.3231927
loss is :  1.3231909
loss is :  1.323189
loss is :  1.3231875
loss is :  1.3231857
loss is :  1.3231839
loss is :  1.3231821
loss is :  1.3231803
loss is :  1.3231786
loss is :  1.323177
loss is :  1.3231752
loss is :  1.3231736
loss is :  1.3231717
loss is :  1.3231701
loss is :  1.3231683
loss is :  1.3231666
loss is :  1.3231648
loss is :  1.323163
loss is :  1.3231614
loss is :  1.3231597
loss is :  1.3231579
loss is :  1.3231562
loss is :  1.3231546
loss is :  1.3231528
loss is :  1.3231511
loss is :  1.3231496
loss is :  1.3231477
loss is :  1.3231459
loss is :  1.3231443
loss is :  1.3231426
loss is :  1.3231409
loss is :  1.3231392
loss is :  1.32313

loss is :  1.3224914
loss is :  1.3224901
loss is :  1.3224891
loss is :  1.3224881
loss is :  1.3224871
loss is :  1.3224862
loss is :  1.322485
loss is :  1.322484
loss is :  1.322483
loss is :  1.322482
loss is :  1.322481
loss is :  1.32248
loss is :  1.3224789
loss is :  1.322478
loss is :  1.3224769
loss is :  1.3224759
loss is :  1.3224748
loss is :  1.3224738
loss is :  1.3224728
loss is :  1.3224719
loss is :  1.3224707
loss is :  1.3224697
loss is :  1.3224688
loss is :  1.3224678
loss is :  1.3224667
loss is :  1.3224658
loss is :  1.3224647
loss is :  1.3224636
loss is :  1.3224628
loss is :  1.3224616
loss is :  1.3224608
loss is :  1.3224597
loss is :  1.3224587
loss is :  1.3224576
loss is :  1.3224567
loss is :  1.3224556
loss is :  1.3224546
loss is :  1.3224536
loss is :  1.3224527
loss is :  1.3224516
loss is :  1.3224508
loss is :  1.3224497
loss is :  1.3224486
loss is :  1.3224478
loss is :  1.3224467
loss is :  1.3224458
loss is :  1.3224447
loss is :  1.3224437


loss is :  1.3220403
loss is :  1.3220396
loss is :  1.3220388
loss is :  1.3220382
loss is :  1.3220376
loss is :  1.3220371
loss is :  1.3220361
loss is :  1.3220357
loss is :  1.322035
loss is :  1.3220342
loss is :  1.3220336
loss is :  1.322033
loss is :  1.3220323
loss is :  1.3220316
loss is :  1.322031
loss is :  1.3220303
loss is :  1.3220298
loss is :  1.322029
loss is :  1.3220283
loss is :  1.3220278
loss is :  1.3220271
loss is :  1.3220264
loss is :  1.3220258
loss is :  1.3220252
loss is :  1.3220246
loss is :  1.3220239
loss is :  1.3220232
loss is :  1.3220226
loss is :  1.322022
loss is :  1.3220212
loss is :  1.3220205
loss is :  1.32202
loss is :  1.3220195
loss is :  1.3220186
loss is :  1.322018
loss is :  1.3220174
loss is :  1.3220167
loss is :  1.3220161
loss is :  1.3220154
loss is :  1.3220148
loss is :  1.3220142
loss is :  1.3220135
loss is :  1.322013
loss is :  1.3220124
loss is :  1.3220116
loss is :  1.322011
loss is :  1.3220104
loss is :  1.3220098
lo

loss is :  1.3217717
loss is :  1.3217714
loss is :  1.3217709
loss is :  1.3217705
loss is :  1.32177
loss is :  1.3217695
loss is :  1.321769
loss is :  1.3217688
loss is :  1.3217683
loss is :  1.3217677
loss is :  1.3217672
loss is :  1.3217667
loss is :  1.3217663
loss is :  1.3217658
loss is :  1.3217654
loss is :  1.321765
loss is :  1.3217645
loss is :  1.3217639
loss is :  1.3217633
loss is :  1.3217629
loss is :  1.3217627
loss is :  1.321762
loss is :  1.3217615
loss is :  1.3217611
loss is :  1.3217608
loss is :  1.3217602
loss is :  1.3217597
loss is :  1.3217592
loss is :  1.3217587
loss is :  1.3217584
loss is :  1.3217579
loss is :  1.3217573
loss is :  1.321757
loss is :  1.3217565
loss is :  1.321756
loss is :  1.3217555
loss is :  1.3217552
loss is :  1.3217546
loss is :  1.3217542
loss is :  1.3217536
loss is :  1.3217533
loss is :  1.3217528
loss is :  1.3217523
loss is :  1.3217518
loss is :  1.3217514
loss is :  1.3217509
loss is :  1.3217504
loss is :  1.3217499

loss is :  1.3215734
loss is :  1.321573
loss is :  1.3215728
loss is :  1.3215723
loss is :  1.3215722
loss is :  1.3215717
loss is :  1.3215714
loss is :  1.3215709
loss is :  1.3215705
loss is :  1.3215703
loss is :  1.32157
loss is :  1.3215697
loss is :  1.3215691
loss is :  1.3215688
loss is :  1.3215685
loss is :  1.3215681
loss is :  1.3215679
loss is :  1.3215674
loss is :  1.3215672
loss is :  1.3215666
loss is :  1.3215665
loss is :  1.321566
loss is :  1.3215657
loss is :  1.3215653
loss is :  1.321565
loss is :  1.3215647
loss is :  1.3215643
loss is :  1.321564
loss is :  1.3215636
loss is :  1.3215632
loss is :  1.3215629
loss is :  1.3215626
loss is :  1.3215622
loss is :  1.3215617
loss is :  1.3215615
loss is :  1.3215612
loss is :  1.3215609
loss is :  1.3215606
loss is :  1.3215601
loss is :  1.3215598
loss is :  1.3215593
loss is :  1.3215591
loss is :  1.3215588
loss is :  1.3215585
loss is :  1.3215581
loss is :  1.3215578
loss is :  1.3215573
loss is :  1.321557

loss is :  1.3214388
loss is :  1.3214384
loss is :  1.3214382
loss is :  1.3214378
loss is :  1.3214375
loss is :  1.3214372
loss is :  1.321437
loss is :  1.3214369
loss is :  1.3214363
loss is :  1.3214362
loss is :  1.3214358
loss is :  1.3214356
loss is :  1.3214352
loss is :  1.3214351
loss is :  1.3214347
loss is :  1.3214346
loss is :  1.3214343
loss is :  1.3214339
loss is :  1.3214337
loss is :  1.3214333
loss is :  1.3214331
loss is :  1.321433
loss is :  1.3214326
loss is :  1.3214322
loss is :  1.321432
loss is :  1.3214318
loss is :  1.3214314
loss is :  1.3214312
loss is :  1.3214309
loss is :  1.3214307
loss is :  1.3214304
loss is :  1.3214301
loss is :  1.3214297
loss is :  1.3214294
loss is :  1.3214293
loss is :  1.321429
loss is :  1.3214285
loss is :  1.3214284
loss is :  1.3214281
loss is :  1.3214278
loss is :  1.3214276
loss is :  1.3214271
loss is :  1.3214271
loss is :  1.3214267
loss is :  1.3214264
loss is :  1.3214262
loss is :  1.3214259
loss is :  1.3214

loss is :  1.3213097
loss is :  1.3213096
loss is :  1.3213091
loss is :  1.3213091
loss is :  1.3213089
loss is :  1.3213085
loss is :  1.3213084
loss is :  1.3213081
loss is :  1.3213079
loss is :  1.3213077
loss is :  1.3213075
loss is :  1.3213072
loss is :  1.3213072
loss is :  1.3213071
loss is :  1.3213066
loss is :  1.3213063
loss is :  1.3213061
loss is :  1.3213059
loss is :  1.3213058
loss is :  1.3213055
loss is :  1.3213053
loss is :  1.3213052
loss is :  1.3213049
loss is :  1.3213048
loss is :  1.3213043
loss is :  1.3213042
loss is :  1.321304
loss is :  1.3213037
loss is :  1.3213035
loss is :  1.3213034
loss is :  1.3213032
loss is :  1.321303
loss is :  1.3213028
loss is :  1.3213025
loss is :  1.3213022
loss is :  1.3213019
loss is :  1.3213018
loss is :  1.3213015
loss is :  1.3213013
loss is :  1.3213011
loss is :  1.3213011
loss is :  1.3213007
loss is :  1.3213006
loss is :  1.3213004
loss is :  1.3213
loss is :  1.3212998
loss is :  1.3212996
loss is :  1.32129

loss is :  1.3212297
loss is :  1.3212293
loss is :  1.3212292
loss is :  1.3212291
loss is :  1.321229
loss is :  1.3212287
loss is :  1.3212285
loss is :  1.3212283
loss is :  1.3212283
loss is :  1.3212279
loss is :  1.3212278
loss is :  1.3212276
loss is :  1.3212273
loss is :  1.3212273
loss is :  1.3212271
loss is :  1.321227
loss is :  1.3212267
loss is :  1.3212265
loss is :  1.3212262
loss is :  1.3212261
loss is :  1.3212259
loss is :  1.3212256
loss is :  1.3212255
loss is :  1.3212254
loss is :  1.3212252
loss is :  1.321225
loss is :  1.3212249
loss is :  1.3212248
loss is :  1.3212246
loss is :  1.3212242
loss is :  1.321224
loss is :  1.3212237
loss is :  1.3212237
loss is :  1.3212235
loss is :  1.3212233
loss is :  1.3212231
loss is :  1.3212231
loss is :  1.3212229
loss is :  1.3212227
loss is :  1.3212225
loss is :  1.3212224
loss is :  1.3212221
loss is :  1.3212218
loss is :  1.3212216
loss is :  1.3212215
loss is :  1.3212214
loss is :  1.3212212
loss is :  1.3212

loss is :  1.3211384
loss is :  1.3211383
loss is :  1.3211381
loss is :  1.3211379
loss is :  1.3211379
loss is :  1.3211378
loss is :  1.3211375
loss is :  1.3211374
loss is :  1.3211374
loss is :  1.3211373
loss is :  1.3211371
loss is :  1.321137
loss is :  1.3211367
loss is :  1.3211365
loss is :  1.3211364
loss is :  1.3211362
loss is :  1.321136
loss is :  1.3211359
loss is :  1.3211358
loss is :  1.3211356
loss is :  1.3211355
loss is :  1.3211354
loss is :  1.3211353
loss is :  1.321135
loss is :  1.3211349
loss is :  1.3211349
loss is :  1.3211347
loss is :  1.3211344
loss is :  1.3211342
loss is :  1.3211341
loss is :  1.321134
loss is :  1.3211339
loss is :  1.3211337
loss is :  1.3211336
loss is :  1.3211335
loss is :  1.3211334
loss is :  1.3211331
loss is :  1.321133
loss is :  1.321133
loss is :  1.3211328
loss is :  1.3211325
loss is :  1.3211324
loss is :  1.3211323
loss is :  1.3211321
loss is :  1.321132
loss is :  1.3211318
loss is :  1.3211317
loss is :  1.3211317

loss is :  1.3210723
loss is :  1.3210723
loss is :  1.321072
loss is :  1.3210719
loss is :  1.3210719
loss is :  1.3210717
loss is :  1.3210715
loss is :  1.3210714
loss is :  1.3210714
loss is :  1.3210711
loss is :  1.321071
loss is :  1.3210709
loss is :  1.3210708
loss is :  1.3210707
loss is :  1.3210706
loss is :  1.3210706
loss is :  1.3210704
loss is :  1.3210703
loss is :  1.3210701
loss is :  1.32107
loss is :  1.3210698
loss is :  1.3210697
loss is :  1.3210695
loss is :  1.3210695
loss is :  1.3210692
loss is :  1.3210692
loss is :  1.321069
loss is :  1.321069
loss is :  1.3210689
loss is :  1.3210688
loss is :  1.3210686
loss is :  1.3210685
loss is :  1.3210684
loss is :  1.3210684
loss is :  1.3210682
loss is :  1.321068
loss is :  1.3210678
loss is :  1.3210678
loss is :  1.3210676
loss is :  1.3210673
loss is :  1.3210673
loss is :  1.3210672
loss is :  1.3210671
loss is :  1.321067
loss is :  1.3210669
loss is :  1.3210669
loss is :  1.3210667
loss is :  1.3210665


loss is :  1.3210177
loss is :  1.3210176
loss is :  1.3210175
loss is :  1.3210174
loss is :  1.3210173
loss is :  1.3210171
loss is :  1.3210171
loss is :  1.3210169
loss is :  1.3210169
loss is :  1.3210168
loss is :  1.3210167
loss is :  1.3210164
loss is :  1.3210164
loss is :  1.3210164
loss is :  1.3210162
loss is :  1.3210162
loss is :  1.321016
loss is :  1.321016
loss is :  1.3210157
loss is :  1.3210157
loss is :  1.3210156
loss is :  1.3210155
loss is :  1.3210155
loss is :  1.3210154
loss is :  1.3210151
loss is :  1.321015
loss is :  1.321015
loss is :  1.3210148
loss is :  1.3210148
loss is :  1.3210148
loss is :  1.3210145
loss is :  1.3210145
loss is :  1.3210143
loss is :  1.3210143
loss is :  1.3210142
loss is :  1.321014
loss is :  1.3210139
loss is :  1.3210138
loss is :  1.3210138
loss is :  1.3210136
loss is :  1.3210136
loss is :  1.3210135
loss is :  1.3210133
loss is :  1.3210133
loss is :  1.3210132
loss is :  1.321013
loss is :  1.3210129
loss is :  1.321012

loss is :  1.3209691
loss is :  1.320969
loss is :  1.320969
loss is :  1.3209689
loss is :  1.3209687
loss is :  1.3209686
loss is :  1.3209685
loss is :  1.3209685
loss is :  1.3209684
loss is :  1.3209683
loss is :  1.3209683
loss is :  1.320968
loss is :  1.320968
loss is :  1.3209679
loss is :  1.3209678
loss is :  1.3209678
loss is :  1.3209677
loss is :  1.3209677
loss is :  1.3209677
loss is :  1.3209676
loss is :  1.3209673
loss is :  1.3209672
loss is :  1.3209671
loss is :  1.3209671
loss is :  1.3209668
loss is :  1.3209668
loss is :  1.3209667
loss is :  1.3209667
loss is :  1.3209666
loss is :  1.3209665
loss is :  1.3209664
loss is :  1.3209664
loss is :  1.3209661
loss is :  1.3209661
loss is :  1.320966
loss is :  1.320966
loss is :  1.3209659
loss is :  1.3209658
loss is :  1.3209658
loss is :  1.3209656
loss is :  1.3209655
loss is :  1.3209655
loss is :  1.3209655
loss is :  1.3209652
loss is :  1.3209652
loss is :  1.320965
loss is :  1.320965
loss is :  1.3209648


loss is :  1.3209276
loss is :  1.3209276
loss is :  1.3209276
loss is :  1.3209276
loss is :  1.3209275
loss is :  1.3209274
loss is :  1.3209273
loss is :  1.3209273
loss is :  1.3209273
loss is :  1.320927
loss is :  1.320927
loss is :  1.3209269
loss is :  1.3209268
loss is :  1.3209268
loss is :  1.3209267
loss is :  1.3209265
loss is :  1.3209265
loss is :  1.3209264
loss is :  1.3209264
loss is :  1.3209262
loss is :  1.3209261
loss is :  1.3209261
loss is :  1.320926
loss is :  1.320926
loss is :  1.3209257
loss is :  1.3209257
loss is :  1.3209257
loss is :  1.3209256
loss is :  1.3209256
loss is :  1.3209255
loss is :  1.3209254
loss is :  1.3209254
loss is :  1.3209252
loss is :  1.3209251
loss is :  1.3209252
loss is :  1.320925
loss is :  1.3209249
loss is :  1.3209249
loss is :  1.3209249
loss is :  1.3209246
loss is :  1.3209246
loss is :  1.3209245
loss is :  1.3209244
loss is :  1.3209244
loss is :  1.3209244
loss is :  1.320924
loss is :  1.320924
loss is :  1.320924


loss is :  1.3208911
loss is :  1.3208911
loss is :  1.3208911
loss is :  1.3208911
loss is :  1.320891
loss is :  1.3208909
loss is :  1.3208909
loss is :  1.3208908
loss is :  1.3208907
loss is :  1.3208907
loss is :  1.3208905
loss is :  1.3208904
loss is :  1.3208904
loss is :  1.3208904
loss is :  1.3208903
loss is :  1.3208902
loss is :  1.3208902
loss is :  1.3208901
loss is :  1.32089
loss is :  1.32089
loss is :  1.3208898
loss is :  1.3208896
loss is :  1.3208896
loss is :  1.3208896
loss is :  1.3208895
loss is :  1.3208896
loss is :  1.3208895
loss is :  1.3208895
loss is :  1.3208894
loss is :  1.3208892
loss is :  1.3208892
loss is :  1.3208891
loss is :  1.320889
loss is :  1.320889
loss is :  1.320889
loss is :  1.3208889
loss is :  1.3208888
loss is :  1.3208888
loss is :  1.3208886
loss is :  1.3208885
loss is :  1.3208885
loss is :  1.3208885
loss is :  1.3208884
loss is :  1.3208883
loss is :  1.3208883
loss is :  1.3208882
loss is :  1.320888
loss is :  1.320888
lo

loss is :  1.3208607
loss is :  1.3208607
loss is :  1.3208607
loss is :  1.3208607
loss is :  1.3208605
loss is :  1.3208606
loss is :  1.3208605
loss is :  1.3208604
loss is :  1.3208604
loss is :  1.3208603
loss is :  1.3208603
loss is :  1.3208603
loss is :  1.3208601
loss is :  1.3208601
loss is :  1.32086
loss is :  1.3208598
loss is :  1.3208598
loss is :  1.3208597
loss is :  1.3208597
loss is :  1.3208596
loss is :  1.3208594
loss is :  1.3208594
loss is :  1.3208593
loss is :  1.3208593
loss is :  1.3208593
loss is :  1.3208592
loss is :  1.3208591
loss is :  1.3208591
loss is :  1.320859
loss is :  1.320859
loss is :  1.320859
loss is :  1.320859
loss is :  1.320859
loss is :  1.3208588
loss is :  1.3208588
loss is :  1.3208586
loss is :  1.3208586
loss is :  1.3208586
loss is :  1.3208586
loss is :  1.3208586
loss is :  1.3208584
loss is :  1.3208584
loss is :  1.3208584
loss is :  1.3208582
loss is :  1.3208581
loss is :  1.3208581
loss is :  1.320858
loss is :  1.3208579


loss is :  1.3208346
loss is :  1.3208345
loss is :  1.3208345
loss is :  1.3208345
loss is :  1.3208344
loss is :  1.3208343
loss is :  1.3208343
loss is :  1.3208343
loss is :  1.3208342
loss is :  1.320834
loss is :  1.320834
loss is :  1.320834
loss is :  1.320834
loss is :  1.3208339
loss is :  1.3208338
loss is :  1.3208338
loss is :  1.3208338
loss is :  1.3208338
loss is :  1.3208336
loss is :  1.3208336
loss is :  1.3208336
loss is :  1.3208336
loss is :  1.3208334
loss is :  1.3208333
loss is :  1.3208333
loss is :  1.3208333
loss is :  1.3208332
loss is :  1.3208331
loss is :  1.3208331
loss is :  1.3208331
loss is :  1.320833
loss is :  1.3208328
loss is :  1.3208328
loss is :  1.3208328
loss is :  1.3208328
loss is :  1.3208327
loss is :  1.3208326
loss is :  1.3208326
loss is :  1.3208326
loss is :  1.3208326
loss is :  1.3208325
loss is :  1.3208324
loss is :  1.3208324
loss is :  1.3208323
loss is :  1.3208324
loss is :  1.3208321
loss is :  1.3208321
loss is :  1.32083

In [30]:
print(sess.run(W1))
print('----------')
print(sess.run(b1))
print('----------')

[[-0.26188803 -0.79485154 -0.36377084  1.5716108  -0.41500798]
 [-1.2713832   2.455417    0.08861642 -0.585614    0.34666535]
 [ 0.3222762  -0.6533719  -1.4473431  -0.21603893  0.79306275]
 [-0.2947628  -1.1241205  -0.6775655   1.164433   -0.21536666]
 [-1.4992398   0.49101156 -2.1106145  -1.7184694   1.5599501 ]
 [ 0.9660012   0.71526957  2.1871178  -0.66519123  0.62819755]
 [-1.4551554  -0.95423204  1.5955362   0.8744619  -1.0043633 ]]
----------
[-0.31918272 -0.77230597 -0.8086878   0.6549498   0.4585308 ]
----------


In [31]:
vectors = sess.run(W1 + b1)
# if you work it out, you will see that it has the same effect as running the node hidden representation
print(vectors)

[[-0.5810708  -1.5671575  -1.1724586   2.2265606   0.04352283]
 [-1.5905659   1.683111   -0.7200714   0.06933576  0.80519617]
 [ 0.00309348 -1.4256778  -2.256031    0.43891084  1.2515936 ]
 [-0.6139455  -1.8964264  -1.4862533   1.8193828   0.24316415]
 [-1.8184226  -0.2812944  -2.9193025  -1.0635196   2.018481  ]
 [ 0.6468185  -0.0570364   1.37843    -0.01024145  1.0867283 ]
 [-1.7743381  -1.726538    0.7868484   1.5294117  -0.5458325 ]]


In [32]:
print(vectors[ word2int['queen'] ])

[-1.8184226 -0.2812944 -2.9193025 -1.0635196  2.018481 ]


## Applications

In [34]:
def euclidean_dist(vec1, vec2):
    return np.sqrt(np.sum((vec1-vec2)**2))

def find_closest(word_index, vectors):
    min_dist = 10000 # to act like positive infinity
    min_index = -1
    query_vector = vectors[word_index]
    for index, vector in enumerate(vectors):
        if euclidean_dist(vector, query_vector) < min_dist and not np.array_equal(vector, query_vector):
            min_dist = euclidean_dist(vector, query_vector)
            min_index = index
    return min_index

In [35]:
print(int2word[find_closest(word2int['king'], vectors)])
print(int2word[find_closest(word2int['queen'], vectors)])
print(int2word[find_closest(word2int['royal'], vectors)])

he
king
she


## VIS 

In [None]:
from sklearn.manifold import TSNE
model = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
vectors = model.fit_transform(vectors)

In [None]:
from sklearn import preprocessing
normalizer = preprocessing.Normalizer()
vectors =  normalizer.fit_transform(vectors, 'l2')

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
for word in words:
    print(word, vectors[word2int[word]][1])
    ax.annotate(word, (vectors[word2int[word]][0],vectors[word2int[word]][1] ))
plt.show()