**Problem statement:** 

Continuous bag of words (cbow) word2vec word embedding work is that it tends to predict the probability of a word given a context. A context may be a single word or a group of words. But for simplicity, I will take a single context word and try to predict a single target word. 

The purpose of this assignment is to be able to create a word embedding for the  given data set.  

**Data set :** w2v.txt 

### Importing libraries

In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten,Embedding,Dense
from scipy import spatial

### Function Definations

In [2]:
# Finding the maximum length of an array required to include all sentences. Vocabulary size, in order to create the 
# same size inputs by padding with zeros

def get_max_vocabulary_size(encoded_line_words):
    max_voc_size = 0
    max_integer_index = 0
    for l in encoded_line_words:
        if (len(l) > 0) and (np.argmax(l) > max_integer_index):
            max_integer_index = l[np.argmax(l)]
        if max_voc_size < len(l):
            max_voc_size = len(l)
    return max_voc_size

In [3]:
# Creating a new list with the targets for the words with window size as window_size. 
# Only positive window size is taken into consideration

def map_target(context, window_size):
    if window_size < 0:
        print("Only Positive window sie is expected.")
        return;
    target = context[window_size:]
    m = 10
    i = 0
    while i < window_size :
        target = np.append(target, [0])
        i = i + 1
    return np.array(target)

In [4]:
# Creating array of targets using the map_target function

def map_all_targets(context_list, window_size):
    targets = []
    for context in context_list:
        target_row = map_target(context, window_size)
        targets.append(np.array(target_row))
    return targets

In [5]:
def pad_sequences_words(file_lines_words, maxlen):
    padded_word = []
    for line_word in file_lines_words:
        blank_word_index = len(line_word)
        while blank_word_index < maxlen:
            line_word.append("")
            padded_word.append(line_word)
            blank_word_index = blank_word_index + 1

In [6]:
import re
def corpus_to_vocab(txt_file):
    corpus = ''
    with open(txt_file, 'r', encoding="utf-8") as file:
        try:
            for line in file:
                try:
                    line = line.strip()
                    if line!="":
                        corpus = corpus + ' ' + line
                except Exception as E:
                    print ("got An exception 2: ", E)
                    pass         
        except Exception as E:
            print ("got An exception 3: ", E)
            pass
        corpus = re.sub('[\.\,\"\'\(\)\n\s]+', ' ', corpus.strip().lower())
        return corpus.split(' ')

In [7]:
def unique_words_list(word_list):
    unique_words = []
    for w in word_list:
        if (w not in unique_words) and len(w)>2:
            unique_words.append(w)
    return unique_words

In [8]:
def word_OHE(vocabulary):
    OHE_word = [one_hot(wd.strip(),len(vocabulary))[0] for wd in vocabulary]
    OHE_word_pair = {wd.strip():one_hot(wd.strip(),len(vocabulary))[0] for wd in vocabulary}
    Word_OHE_pair = {one_hot(wd.strip(),len(vocabulary))[0]:wd.strip() for wd in vocabulary}
    return {'Word':vocabulary, 'OHE': OHE_word, 'Word_OHE':OHE_word_pair, 'OHE_Word': Word_OHE_pair}

In [9]:
w_list = corpus_to_vocab("w2v.txt")

In [10]:
vocabulary = unique_words_list(w_list)

In [11]:
dictionary = word_OHE(vocabulary)

In [12]:
targets = [target for target in dictionary['OHE']]

In [13]:
dictionary['OHE'][0]

26

In [14]:
index = 0
targets = []
for target in dictionary['OHE']:
    if index < len(dictionary['OHE'])-1:
        targets.append(dictionary['OHE'][index+1])
    index = index + 1
targets.append(0)

In [15]:
len(vocabulary)

255

In [16]:
len(targets)

255

In [17]:
context = np.array(dictionary['OHE'])

In [18]:
targets = np.array(targets)

In [19]:
# All the parameters calculation and constants are being set using this function. 
# Any changes to the parameters should be mafe here to avoid execution errors.

def parameters(context):
    reshaped_context = context.reshape(-1)
    INPUT_DIM = max(reshaped_context)+1
    OUTPUT_DIM = 8
    INPUT_LENGTH = len(context)
    EPOCHS = 1000
    VERBOSE = 1
    LOSS = 'categorical_crossentropy'
    ACTIVATION = 'softmax'
    OPTIMIZER = 'adam'
    MATRIX = ['accuracy']
    BESTMODEL = 'embeddings.h5'
    return (INPUT_DIM, OUTPUT_DIM, INPUT_LENGTH, EPOCHS, VERBOSE, LOSS, ACTIVATION, OPTIMIZER, MATRIX, BESTMODEL)

In [20]:
context

array([ 26, 141, 200, 231, 100, 195, 106,  89,   4,  46,  43, 204, 177,
       238,  77, 187, 227,  88, 112, 149, 163, 136,  46, 250,  27, 233,
       242, 186, 180,  24, 228,  24, 186, 165,  33,   5,  40, 202, 106,
       114, 213,  35, 228, 220,  27, 138,  92,  83,  32,  65, 122, 198,
        87, 171, 226, 243,  79, 248, 173,  71, 239, 123,  52, 245, 227,
       193, 191, 223, 128, 124, 190,  81,  63,   2, 143, 209, 233, 152,
       128,  19,  76, 145, 117, 194, 145,  32, 145,  96, 157,  98, 252,
        23,   5,  54,  54, 144, 215,  66,  73, 227,  46,  23, 113, 196,
       184, 139,  84, 227,  53, 106, 222, 132, 142,  28, 210,  99,  84,
        53, 202,  76, 167,  17,  71, 207, 152, 227, 161, 209, 152, 248,
       134, 142, 198, 107,  63, 240, 148, 230,  14, 196,  18,  88, 233,
        86, 218,  11, 224,  58,  34, 105, 138,  89, 156,  64, 233,  20,
       109,  72,  35, 110, 105,  67, 200, 106, 211,   7,  81, 192, 146,
       160,  16, 164,  50, 207, 103,  20, 207, 101, 178, 239,  1

In [21]:
# Confirming that the shapes of context and targets are same

context.shape, targets.shape

((255,), (255,))

In [22]:
context = context.reshape(-1, 255)
targets = targets.reshape(-1, 255)

In [23]:
context.shape, targets.shape

((1, 255), (1, 255))

In [37]:
# Unit testing the context and it's relavent target. Change the index between 0 to 12 to check the pair

index = 0
context[index], targets[index]

(array([ 26, 141, 200, 231, 100, 195, 106,  89,   4,  46,  43, 204, 177,
        238,  77, 187, 227,  88, 112, 149, 163, 136,  46, 250,  27, 233,
        242, 186, 180,  24, 228,  24, 186, 165,  33,   5,  40, 202, 106,
        114, 213,  35, 228, 220,  27, 138,  92,  83,  32,  65, 122, 198,
         87, 171, 226, 243,  79, 248, 173,  71, 239, 123,  52, 245, 227,
        193, 191, 223, 128, 124, 190,  81,  63,   2, 143, 209, 233, 152,
        128,  19,  76, 145, 117, 194, 145,  32, 145,  96, 157,  98, 252,
         23,   5,  54,  54, 144, 215,  66,  73, 227,  46,  23, 113, 196,
        184, 139,  84, 227,  53, 106, 222, 132, 142,  28, 210,  99,  84,
         53, 202,  76, 167,  17,  71, 207, 152, 227, 161, 209, 152, 248,
        134, 142, 198, 107,  63, 240, 148, 230,  14, 196,  18,  88, 233,
         86, 218,  11, 224,  58,  34, 105, 138,  89, 156,  64, 233,  20,
        109,  72,  35, 110, 105,  67, 200, 106, 211,   7,  81, 192, 146,
        160,  16, 164,  50, 207, 103,  20, 207, 101

In [25]:
INPUT_DIM, OUTPUT_DIM, INPUT_LENGTH, EPOCHS, VERBOSE, LOSS, ACTIVATION, OPTIMIZER, MATRIX, BESTMODEL = \
parameters(context)

In [26]:
# Calback array to save the best model. Best model is decided on the basis of accuracy.

callback_list = [tf.keras.callbacks.ModelCheckpoint(filepath=BESTMODEL, monitor = MATRIX[0], 
                                                            save_best_only=True)]

<IPython.core.display.Javascript object>

In [27]:
INPUT_DIM

253

In [28]:
# Creating the sequential model with Embedding layer with output dimention of 8
# The weights of the first layers will be used as the word embedding
# these weights will later be used to calculate the distance between the words.
# least distant words are most related and vice versa

model = Sequential()
embedding_layer = Embedding(input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, input_length=255)
model.add(embedding_layer)
model.add(Flatten())

# A softmax activation is used.

model.add(Dense(255, activation = ACTIVATION))

model.compile(optimizer = OPTIMIZER, loss=LOSS, metrics = MATRIX)

2022-04-04 18:23:26.524892: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [29]:
# Summary of the model 

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 255, 8)            2024      
_________________________________________________________________
flatten (Flatten)            (None, 2040)              0         
_________________________________________________________________
dense (Dense)                (None, 255)               520455    
Total params: 522,479
Trainable params: 522,479
Non-trainable params: 0
_________________________________________________________________
None


In [30]:
context

array([[ 26, 141, 200, 231, 100, 195, 106,  89,   4,  46,  43, 204, 177,
        238,  77, 187, 227,  88, 112, 149, 163, 136,  46, 250,  27, 233,
        242, 186, 180,  24, 228,  24, 186, 165,  33,   5,  40, 202, 106,
        114, 213,  35, 228, 220,  27, 138,  92,  83,  32,  65, 122, 198,
         87, 171, 226, 243,  79, 248, 173,  71, 239, 123,  52, 245, 227,
        193, 191, 223, 128, 124, 190,  81,  63,   2, 143, 209, 233, 152,
        128,  19,  76, 145, 117, 194, 145,  32, 145,  96, 157,  98, 252,
         23,   5,  54,  54, 144, 215,  66,  73, 227,  46,  23, 113, 196,
        184, 139,  84, 227,  53, 106, 222, 132, 142,  28, 210,  99,  84,
         53, 202,  76, 167,  17,  71, 207, 152, 227, 161, 209, 152, 248,
        134, 142, 198, 107,  63, 240, 148, 230,  14, 196,  18,  88, 233,
         86, 218,  11, 224,  58,  34, 105, 138,  89, 156,  64, 233,  20,
        109,  72,  35, 110, 105,  67, 200, 106, 211,   7,  81, 192, 146,
        160,  16, 164,  50, 207, 103,  20, 207, 101

In [31]:
targets

array([[141, 200, 231, 100, 195, 106,  89,   4,  46,  43, 204, 177, 238,
         77, 187, 227,  88, 112, 149, 163, 136,  46, 250,  27, 233, 242,
        186, 180,  24, 228,  24, 186, 165,  33,   5,  40, 202, 106, 114,
        213,  35, 228, 220,  27, 138,  92,  83,  32,  65, 122, 198,  87,
        171, 226, 243,  79, 248, 173,  71, 239, 123,  52, 245, 227, 193,
        191, 223, 128, 124, 190,  81,  63,   2, 143, 209, 233, 152, 128,
         19,  76, 145, 117, 194, 145,  32, 145,  96, 157,  98, 252,  23,
          5,  54,  54, 144, 215,  66,  73, 227,  46,  23, 113, 196, 184,
        139,  84, 227,  53, 106, 222, 132, 142,  28, 210,  99,  84,  53,
        202,  76, 167,  17,  71, 207, 152, 227, 161, 209, 152, 248, 134,
        142, 198, 107,  63, 240, 148, 230,  14, 196,  18,  88, 233,  86,
        218,  11, 224,  58,  34, 105, 138,  89, 156,  64, 233,  20, 109,
         72,  35, 110, 105,  67, 200, 106, 211,   7,  81, 192, 146, 160,
         16, 164,  50, 207, 103,  20, 207, 101, 178

In [32]:
# Training cell. History variable contains all the loss and accuracy at each step.

history = model.fit(context, targets, epochs = EPOCHS, verbose=VERBOSE, callbacks=callback_list)

2022-04-04 18:23:26.805219: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epo

Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/

Epoch 227/1000
Epoch 228/1000
Epoch 229/1000
Epoch 230/1000
Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000
Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/

Epoch 302/1000
Epoch 303/1000
Epoch 304/1000
Epoch 305/1000
Epoch 306/1000
Epoch 307/1000
Epoch 308/1000
Epoch 309/1000
Epoch 310/1000
Epoch 311/1000
Epoch 312/1000
Epoch 313/1000
Epoch 314/1000
Epoch 315/1000
Epoch 316/1000
Epoch 317/1000
Epoch 318/1000
Epoch 319/1000
Epoch 320/1000
Epoch 321/1000
Epoch 322/1000
Epoch 323/1000
Epoch 324/1000
Epoch 325/1000
Epoch 326/1000
Epoch 327/1000
Epoch 328/1000
Epoch 329/1000
Epoch 330/1000
Epoch 331/1000
Epoch 332/1000
Epoch 333/1000
Epoch 334/1000
Epoch 335/1000
Epoch 336/1000
Epoch 337/1000
Epoch 338/1000
Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/1000
Epoch 351/1000
Epoch 352/1000
Epoch 353/1000
Epoch 354/1000
Epoch 355/1000
Epoch 356/1000
Epoch 357/1000
Epoch 358/1000
Epoch 359/1000
Epoch 360/1000
Epoch 361/1000
Epoch 362/1000
Epoch 363/1000
Epoch 364/1000
Epoch 365/1000
Epoch 366/1000
Epoch 367/1000
Epoch 368/

Epoch 376/1000
Epoch 377/1000
Epoch 378/1000
Epoch 379/1000
Epoch 380/1000
Epoch 381/1000
Epoch 382/1000
Epoch 383/1000
Epoch 384/1000
Epoch 385/1000
Epoch 386/1000
Epoch 387/1000
Epoch 388/1000
Epoch 389/1000
Epoch 390/1000
Epoch 391/1000
Epoch 392/1000
Epoch 393/1000
Epoch 394/1000
Epoch 395/1000
Epoch 396/1000
Epoch 397/1000
Epoch 398/1000
Epoch 399/1000
Epoch 400/1000
Epoch 401/1000
Epoch 402/1000
Epoch 403/1000
Epoch 404/1000
Epoch 405/1000
Epoch 406/1000
Epoch 407/1000
Epoch 408/1000
Epoch 409/1000
Epoch 410/1000
Epoch 411/1000
Epoch 412/1000
Epoch 413/1000
Epoch 414/1000
Epoch 415/1000
Epoch 416/1000
Epoch 417/1000
Epoch 418/1000
Epoch 419/1000
Epoch 420/1000
Epoch 421/1000
Epoch 422/1000
Epoch 423/1000
Epoch 424/1000
Epoch 425/1000
Epoch 426/1000
Epoch 427/1000
Epoch 428/1000
Epoch 429/1000
Epoch 430/1000
Epoch 431/1000
Epoch 432/1000
Epoch 433/1000
Epoch 434/1000
Epoch 435/1000
Epoch 436/1000
Epoch 437/1000
Epoch 438/1000
Epoch 439/1000
Epoch 440/1000
Epoch 441/1000
Epoch 442/

Epoch 451/1000
Epoch 452/1000
Epoch 453/1000
Epoch 454/1000
Epoch 455/1000
Epoch 456/1000
Epoch 457/1000
Epoch 458/1000
Epoch 459/1000
Epoch 460/1000
Epoch 461/1000
Epoch 462/1000
Epoch 463/1000
Epoch 464/1000
Epoch 465/1000
Epoch 466/1000
Epoch 467/1000
Epoch 468/1000
Epoch 469/1000
Epoch 470/1000
Epoch 471/1000
Epoch 472/1000
Epoch 473/1000
Epoch 474/1000
Epoch 475/1000
Epoch 476/1000
Epoch 477/1000
Epoch 478/1000
Epoch 479/1000
Epoch 480/1000
Epoch 481/1000
Epoch 482/1000
Epoch 483/1000
Epoch 484/1000
Epoch 485/1000
Epoch 486/1000
Epoch 487/1000
Epoch 488/1000
Epoch 489/1000
Epoch 490/1000
Epoch 491/1000
Epoch 492/1000
Epoch 493/1000
Epoch 494/1000
Epoch 495/1000
Epoch 496/1000
Epoch 497/1000
Epoch 498/1000
Epoch 499/1000
Epoch 500/1000
Epoch 501/1000
Epoch 502/1000
Epoch 503/1000
Epoch 504/1000
Epoch 505/1000
Epoch 506/1000
Epoch 507/1000
Epoch 508/1000
Epoch 509/1000
Epoch 510/1000
Epoch 511/1000
Epoch 512/1000
Epoch 513/1000
Epoch 514/1000
Epoch 515/1000
Epoch 516/1000
Epoch 517/

Epoch 526/1000
Epoch 527/1000
Epoch 528/1000
Epoch 529/1000
Epoch 530/1000
Epoch 531/1000
Epoch 532/1000
Epoch 533/1000
Epoch 534/1000
Epoch 535/1000
Epoch 536/1000
Epoch 537/1000
Epoch 538/1000
Epoch 539/1000
Epoch 540/1000
Epoch 541/1000
Epoch 542/1000
Epoch 543/1000
Epoch 544/1000
Epoch 545/1000
Epoch 546/1000
Epoch 547/1000
Epoch 548/1000
Epoch 549/1000
Epoch 550/1000
Epoch 551/1000
Epoch 552/1000
Epoch 553/1000
Epoch 554/1000
Epoch 555/1000
Epoch 556/1000
Epoch 557/1000
Epoch 558/1000
Epoch 559/1000
Epoch 560/1000
Epoch 561/1000
Epoch 562/1000
Epoch 563/1000
Epoch 564/1000
Epoch 565/1000
Epoch 566/1000
Epoch 567/1000
Epoch 568/1000
Epoch 569/1000
Epoch 570/1000
Epoch 571/1000
Epoch 572/1000
Epoch 573/1000
Epoch 574/1000
Epoch 575/1000
Epoch 576/1000
Epoch 577/1000
Epoch 578/1000
Epoch 579/1000
Epoch 580/1000
Epoch 581/1000
Epoch 582/1000
Epoch 583/1000
Epoch 584/1000
Epoch 585/1000
Epoch 586/1000
Epoch 587/1000
Epoch 588/1000
Epoch 589/1000
Epoch 590/1000
Epoch 591/1000
Epoch 592/

Epoch 600/1000
Epoch 601/1000
Epoch 602/1000
Epoch 603/1000
Epoch 604/1000
Epoch 605/1000
Epoch 606/1000
Epoch 607/1000
Epoch 608/1000
Epoch 609/1000
Epoch 610/1000
Epoch 611/1000
Epoch 612/1000
Epoch 613/1000
Epoch 614/1000
Epoch 615/1000
Epoch 616/1000
Epoch 617/1000
Epoch 618/1000
Epoch 619/1000
Epoch 620/1000
Epoch 621/1000
Epoch 622/1000
Epoch 623/1000
Epoch 624/1000
Epoch 625/1000
Epoch 626/1000
Epoch 627/1000
Epoch 628/1000
Epoch 629/1000
Epoch 630/1000
Epoch 631/1000
Epoch 632/1000
Epoch 633/1000
Epoch 634/1000
Epoch 635/1000
Epoch 636/1000
Epoch 637/1000
Epoch 638/1000
Epoch 639/1000
Epoch 640/1000
Epoch 641/1000
Epoch 642/1000
Epoch 643/1000
Epoch 644/1000
Epoch 645/1000
Epoch 646/1000
Epoch 647/1000
Epoch 648/1000
Epoch 649/1000
Epoch 650/1000
Epoch 651/1000
Epoch 652/1000
Epoch 653/1000
Epoch 654/1000
Epoch 655/1000
Epoch 656/1000
Epoch 657/1000
Epoch 658/1000
Epoch 659/1000
Epoch 660/1000
Epoch 661/1000
Epoch 662/1000
Epoch 663/1000
Epoch 664/1000
Epoch 665/1000
Epoch 666/

Epoch 674/1000
Epoch 675/1000
Epoch 676/1000
Epoch 677/1000
Epoch 678/1000
Epoch 679/1000
Epoch 680/1000
Epoch 681/1000
Epoch 682/1000
Epoch 683/1000
Epoch 684/1000
Epoch 685/1000
Epoch 686/1000
Epoch 687/1000
Epoch 688/1000
Epoch 689/1000
Epoch 690/1000
Epoch 691/1000
Epoch 692/1000
Epoch 693/1000
Epoch 694/1000
Epoch 695/1000
Epoch 696/1000
Epoch 697/1000
Epoch 698/1000
Epoch 699/1000
Epoch 700/1000
Epoch 701/1000
Epoch 702/1000
Epoch 703/1000
Epoch 704/1000
Epoch 705/1000
Epoch 706/1000
Epoch 707/1000
Epoch 708/1000
Epoch 709/1000
Epoch 710/1000
Epoch 711/1000
Epoch 712/1000
Epoch 713/1000
Epoch 714/1000
Epoch 715/1000
Epoch 716/1000
Epoch 717/1000
Epoch 718/1000
Epoch 719/1000
Epoch 720/1000
Epoch 721/1000
Epoch 722/1000
Epoch 723/1000
Epoch 724/1000
Epoch 725/1000
Epoch 726/1000
Epoch 727/1000
Epoch 728/1000
Epoch 729/1000
Epoch 730/1000
Epoch 731/1000
Epoch 732/1000
Epoch 733/1000
Epoch 734/1000
Epoch 735/1000
Epoch 736/1000
Epoch 737/1000
Epoch 738/1000
Epoch 739/1000
Epoch 740/

Epoch 749/1000
Epoch 750/1000
Epoch 751/1000
Epoch 752/1000
Epoch 753/1000
Epoch 754/1000
Epoch 755/1000
Epoch 756/1000
Epoch 757/1000
Epoch 758/1000
Epoch 759/1000
Epoch 760/1000
Epoch 761/1000
Epoch 762/1000
Epoch 763/1000
Epoch 764/1000
Epoch 765/1000
Epoch 766/1000
Epoch 767/1000
Epoch 768/1000
Epoch 769/1000
Epoch 770/1000
Epoch 771/1000
Epoch 772/1000
Epoch 773/1000
Epoch 774/1000
Epoch 775/1000
Epoch 776/1000
Epoch 777/1000
Epoch 778/1000
Epoch 779/1000
Epoch 780/1000
Epoch 781/1000
Epoch 782/1000
Epoch 783/1000
Epoch 784/1000
Epoch 785/1000
Epoch 786/1000
Epoch 787/1000
Epoch 788/1000
Epoch 789/1000
Epoch 790/1000
Epoch 791/1000
Epoch 792/1000
Epoch 793/1000
Epoch 794/1000
Epoch 795/1000
Epoch 796/1000
Epoch 797/1000
Epoch 798/1000
Epoch 799/1000
Epoch 800/1000
Epoch 801/1000
Epoch 802/1000
Epoch 803/1000
Epoch 804/1000
Epoch 805/1000
Epoch 806/1000
Epoch 807/1000
Epoch 808/1000
Epoch 809/1000
Epoch 810/1000
Epoch 811/1000
Epoch 812/1000
Epoch 813/1000
Epoch 814/1000
Epoch 815/

Epoch 823/1000
Epoch 824/1000
Epoch 825/1000
Epoch 826/1000
Epoch 827/1000
Epoch 828/1000
Epoch 829/1000
Epoch 830/1000
Epoch 831/1000
Epoch 832/1000
Epoch 833/1000
Epoch 834/1000
Epoch 835/1000
Epoch 836/1000
Epoch 837/1000
Epoch 838/1000
Epoch 839/1000
Epoch 840/1000
Epoch 841/1000
Epoch 842/1000
Epoch 843/1000
Epoch 844/1000
Epoch 845/1000
Epoch 846/1000
Epoch 847/1000
Epoch 848/1000
Epoch 849/1000
Epoch 850/1000
Epoch 851/1000
Epoch 852/1000
Epoch 853/1000
Epoch 854/1000
Epoch 855/1000
Epoch 856/1000
Epoch 857/1000
Epoch 858/1000
Epoch 859/1000
Epoch 860/1000
Epoch 861/1000
Epoch 862/1000
Epoch 863/1000
Epoch 864/1000
Epoch 865/1000
Epoch 866/1000
Epoch 867/1000
Epoch 868/1000
Epoch 869/1000
Epoch 870/1000
Epoch 871/1000
Epoch 872/1000
Epoch 873/1000
Epoch 874/1000
Epoch 875/1000
Epoch 876/1000
Epoch 877/1000
Epoch 878/1000
Epoch 879/1000
Epoch 880/1000
Epoch 881/1000
Epoch 882/1000
Epoch 883/1000
Epoch 884/1000
Epoch 885/1000
Epoch 886/1000
Epoch 887/1000
Epoch 888/1000
Epoch 889/

Epoch 897/1000
Epoch 898/1000
Epoch 899/1000
Epoch 900/1000
Epoch 901/1000
Epoch 902/1000
Epoch 903/1000
Epoch 904/1000
Epoch 905/1000
Epoch 906/1000
Epoch 907/1000
Epoch 908/1000
Epoch 909/1000
Epoch 910/1000
Epoch 911/1000
Epoch 912/1000
Epoch 913/1000
Epoch 914/1000
Epoch 915/1000
Epoch 916/1000
Epoch 917/1000
Epoch 918/1000
Epoch 919/1000
Epoch 920/1000
Epoch 921/1000
Epoch 922/1000
Epoch 923/1000
Epoch 924/1000
Epoch 925/1000
Epoch 926/1000
Epoch 927/1000
Epoch 928/1000
Epoch 929/1000
Epoch 930/1000
Epoch 931/1000
Epoch 932/1000
Epoch 933/1000
Epoch 934/1000
Epoch 935/1000
Epoch 936/1000
Epoch 937/1000
Epoch 938/1000
Epoch 939/1000
Epoch 940/1000
Epoch 941/1000
Epoch 942/1000
Epoch 943/1000
Epoch 944/1000
Epoch 945/1000
Epoch 946/1000
Epoch 947/1000
Epoch 948/1000
Epoch 949/1000
Epoch 950/1000
Epoch 951/1000
Epoch 952/1000
Epoch 953/1000
Epoch 954/1000
Epoch 955/1000
Epoch 956/1000
Epoch 957/1000
Epoch 958/1000
Epoch 959/1000
Epoch 960/1000
Epoch 961/1000
Epoch 962/1000
Epoch 963/

Epoch 971/1000
Epoch 972/1000
Epoch 973/1000
Epoch 974/1000
Epoch 975/1000
Epoch 976/1000
Epoch 977/1000
Epoch 978/1000
Epoch 979/1000
Epoch 980/1000
Epoch 981/1000
Epoch 982/1000
Epoch 983/1000
Epoch 984/1000
Epoch 985/1000
Epoch 986/1000
Epoch 987/1000
Epoch 988/1000
Epoch 989/1000
Epoch 990/1000
Epoch 991/1000
Epoch 992/1000
Epoch 993/1000
Epoch 994/1000
Epoch 995/1000
Epoch 996/1000
Epoch 997/1000
Epoch 998/1000
Epoch 999/1000
Epoch 1000/1000


In [33]:
# The variable word_embeddings contains all the embeddings

word_embeddings = embedding_layer.get_weights()[0]

In [34]:
word_embeddings[0]

array([-0.02066271, -0.04141219, -0.0393253 ,  0.01212696, -0.03306778,
        0.00560141,  0.03302142, -0.03327538], dtype=float32)

In [35]:
words = []
for line in file_details['line_words']:
    for wd in line:
        words.append(wd)

NameError: name 'file_details' is not defined

In [None]:
file_details['padded_context'][0]

In [None]:
words

In [None]:
# Get the searched line and word indeces
def get_indices(word):
    line_index = 0
    word_index = 0
    for line in file_details['line_words']:
        for wrd in line:
            if wrd==word:
                return (line_index, word_index)
            word_index = word_index + 1
        line_index = line_index + 1
    return (-1,-1)

In [None]:
# Fetch the word embeddings

def fetch_embedding(line_index, word_index, word_embeddings):
    return word_embeddings[line_index]

In [None]:
# Function to find the least distant word with the help of word_embeddings. 
# given a word and the learned word_embeddings, this function will return the next (closest) word.
# evaluation_method can be cosine or eculedian 

def closest_word(word, word_embeddings, evaluation_method):
    line_index, word_index = get_indices(word)
    print(word_embeddings.shape)
    if (line_index == -1) or (word_index == -1):
        print("searched word not found in the given context!")
    else:
        embedding = fetch_embedding(line_index, word_index, word_embeddings)
        for embd in word_embeddings:
            result = 1 - spatial.distance.cosine(embedding, embd)
            print(result, line_index, word_index)
    return "word"

In [None]:
closest_word("2000", word_embeddings, 'cosine')

In [None]:
word_embeddings

In [None]:
file_details['line_words'][0][177]

In [None]:
file_details['padded_context'][0][0]

In [None]:
file_details

In [None]:
model.predict()

In [None]:
cont = []
targ = []
for ctx in context:
    for cx in ctx:
        cont.append(cx)
        targ.append(np.argmax(pred[cx]))
df = pd.DataFrame({'Word':cont, 'Next Predicted Word':targ})

In [None]:
df