In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.optim import SGD
from torch.nn.functional import cosine_similarity
from early_stopping_pytorch.pytorchtools import EarlyStopping
import numpy as np
torch.manual_seed(1)

<torch._C.Generator at 0x18b64954190>

# Generate raw corpus for various models

In [2]:
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

# vocab set and vocab size
vocab = set(raw_text)
vocab_size = len(vocab)

# construct dictionary to lookup 
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {ix: word for word, ix in word_to_ix.items()}
# construct training data: (context, target) pair
raw_data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    raw_data.append((context, target))
print(raw_data[:5])

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]


In [3]:
context, target = raw_data[0]
context
[word_to_ix[word] for word in context]

[48, 13, 30, 11]

# 1.  CBOW

## CBOW data loader

In [42]:
class cbow_dataset(Dataset):
    def __init__(self, raw_dataset, transform=None):
        # raw_dataset is a list of (context, target) pair
        self.dataset = raw_dataset
        self.transform = transform
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        context, target = self.dataset[idx]
        return {"data":torch.tensor([word_to_ix[word] for word in context]), "target":torch.tensor(word_to_ix[target])}

In [43]:
dataset = cbow_dataset(raw_data)
dataloader = DataLoader(dataset,batch_size=4)

## CBOW model

In [5]:
class CBOW(nn.Module):
    def __init__(self):
        super(CBOW, self).__init__()
        # parameter of shape (vocab_size, 3)
        self.embedding = nn.Embedding(vocab_size, 3)
        # matrix of shape (3, vocab_size)
        self.linear = nn.Linear(3, vocab_size, bias=False)
    def forward(self, x):
        # for batch this would be (B, 3)
        context_embed = self.embedding(x).sum(1)
        x = self.linear(context_embed)
        return x

In [6]:
model = CBOW()

In [7]:
criterion = nn.CrossEntropyLoss()

In [8]:
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)

In [44]:
def train(model, dataloader, epochs=1, early_stopping=False, retrain=False):
    # if retraining
    if retrain:
        for parameter in model.parameters():
            nn.init.normal_(parameter)
    else:
        pass
    
    # if early stopping
    if early_stopping:
        early_stopper=EarlyStopping()
    else:
        pass
    
    for epoch in range(epochs):
        average_loss = 0
        losses = []
        for i, data in enumerate(dataloader, 0):
            context = data["data"]
            target = data["target"]
            optimizer.zero_grad()
            outputs = model(context)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            losses.append(loss.detach().numpy())
        # average loss of this epoch
        average_loss = np.average(losses)
        print("average loss of epoch", epoch, ":", average_loss)
        
        early_stopper(average_loss, model)
        if early_stopper.early_stop:
            print("early stopping")
            break
    
    model.load_state_dict(torch.load('checkpoint.pt'))

    return model

In [45]:
model = train(model, dataloader,epochs=500, early_stopping=True)

average loss of epoch 0 : 0.0071551003
average loss of epoch 1 : 0.0071500777
average loss of epoch 2 : 0.0071450234
average loss of epoch 3 : 0.007140128
average loss of epoch 4 : 0.007135423
average loss of epoch 5 : 0.0071307183
average loss of epoch 6 : 0.0071251234
average loss of epoch 7 : 0.007120228
average loss of epoch 8 : 0.0071159047
average loss of epoch 9 : 0.007110564
average loss of epoch 10 : 0.007105732
average loss of epoch 11 : 0.007100741
average loss of epoch 12 : 0.0070961635
average loss of epoch 13 : 0.007091268
average loss of epoch 14 : 0.007086118
average loss of epoch 15 : 0.0070815403
average loss of epoch 16 : 0.0070766765
average loss of epoch 17 : 0.007071972
average loss of epoch 18 : 0.007066695
average loss of epoch 19 : 0.0070616724
average loss of epoch 20 : 0.007057667
average loss of epoch 21 : 0.0070526125
average loss of epoch 22 : 0.0070475894
average loss of epoch 23 : 0.007042694
average loss of epoch 24 : 0.0070379255
average loss of epoch 

average loss of epoch 206 : 0.0062523526
average loss of epoch 207 : 0.006248506
average loss of epoch 208 : 0.006244564
average loss of epoch 209 : 0.006240781
average loss of epoch 210 : 0.006237189
average loss of epoch 211 : 0.0062329927
average loss of epoch 212 : 0.0062292735
average loss of epoch 213 : 0.006225459
average loss of epoch 214 : 0.0062212306
average loss of epoch 215 : 0.0062177023
average loss of epoch 216 : 0.006213951
average loss of epoch 217 : 0.006210041
average loss of epoch 218 : 0.0062060994
average loss of epoch 219 : 0.0062023164
average loss of epoch 220 : 0.006198756
average loss of epoch 221 : 0.006194846
average loss of epoch 222 : 0.0061908723
average loss of epoch 223 : 0.006187439
average loss of epoch 224 : 0.0061834017
average loss of epoch 225 : 0.006179587
average loss of epoch 226 : 0.006176249
average loss of epoch 227 : 0.00617218
average loss of epoch 228 : 0.006168143
average loss of epoch 229 : 0.0061647096
average loss of epoch 230 : 0.0

average loss of epoch 418 : 0.005520153
average loss of epoch 419 : 0.005516847
average loss of epoch 420 : 0.0055143037
average loss of epoch 421 : 0.0055110296
average loss of epoch 422 : 0.0055080415
average loss of epoch 423 : 0.0055049895
average loss of epoch 424 : 0.0055015883
average loss of epoch 425 : 0.0054987273
average loss of epoch 426 : 0.005495294
average loss of epoch 427 : 0.005492274
average loss of epoch 428 : 0.0054894765
average loss of epoch 429 : 0.0054866155
average loss of epoch 430 : 0.00548385
average loss of epoch 431 : 0.0054802895
average loss of epoch 432 : 0.005477174
average loss of epoch 433 : 0.005474599
average loss of epoch 434 : 0.0054712933
average loss of epoch 435 : 0.0054679234
average loss of epoch 436 : 0.0054649035
average loss of epoch 437 : 0.005462424
average loss of epoch 438 : 0.005459277
average loss of epoch 439 : 0.0054559708
average loss of epoch 440 : 0.0054531097
average loss of epoch 441 : 0.00545009
average loss of epoch 442 : 

In [30]:
word_embedding = None
for submodule in model.children():
    if type(submodule)== nn.Linear:
        print(submodule.parameters())
        word_embedding = submodule.weight

<generator object Module.parameters at 0x000001F303A6A728>


In [31]:
word_embedding = word_embedding.data

In [63]:
word_embedding;

In [32]:
def similarity(word1, word2):
    return cosine_similarity(word_embedding[word_to_ix[word1]], word_embedding[word_to_ix[word2]], dim=0).numpy()

In [33]:
def similarity_topn(word, n):
    words = list(vocab)
    words.sort(key=lambda w: similarity(w, word), reverse=True)
    return words[0:n]

In [41]:
similarity_topn("idea", 10)

['idea',
 'computational',
 'evolution',
 'pattern',
 'is',
 'by',
 'As',
 'the',
 'process.',
 'computers.']

# 2. Skip-gram

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.optim import SGD
from torch.nn import Sequential
from early_stopping_pytorch.pytorchtools import EarlyStopping
from torch.nn.functional import cosine_similarity
import numpy as np
import random
torch.manual_seed(1)

<torch._C.Generator at 0x19e286b0070>

## Form Skip-gram dataset

In [14]:
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

# vocab set and vocab size
vocab = set(raw_text)
vocab_size = len(vocab)

# construct dictionary to lookup 
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {ix: word for word, ix in word_to_ix.items()}

raw_skip_gram_data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    center = raw_text[i]
    raw_skip_gram_data.append((center,context))
print(raw_skip_gram_data[:5])

[('about', ['We', 'are', 'to', 'study']), ('to', ['are', 'about', 'study', 'the']), ('study', ['about', 'to', 'the', 'idea']), ('the', ['to', 'study', 'idea', 'of']), ('idea', ['study', 'the', 'of', 'a'])]


In [15]:
class skipgram_dataset(Dataset):
    def __init__(self, raw_dataset, transform=None):
        # raw_dataset is a list of (context, target) pair
        self.dataset = raw_dataset
        self.transform = transform
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        center, context = self.dataset[idx]
        return {"data":torch.tensor(word_to_ix[center]), 
                "target":torch.tensor([word_to_ix[context_word] for context_word in context])}

In [16]:
dataset_skipgram = skipgram_dataset(raw_skip_gram_data)
dataloader_skipgram = DataLoader(dataset_skipgram, batch_size=4)

In [21]:
list(dataset_skipgram.__getitem__(0)["target"].numpy())

[42, 10, 31, 21]

In [6]:
model = Sequential(nn.Embedding(vocab_size, 3),
                  nn.Linear(3,vocab_size))

In [58]:
# skipgram with negative sampling
class skipgram_neg(nn.Module):
    def __init__(self, embedding_dim=3, neg_size=5):
        super(skipgram_neg, self).__init__()
        self.input = nn.Embedding(vocab_size, embedding_dim)
        self.output = nn.Embedding(vocab_size,embedding_dim)
        self.neg_size = neg_size
        
    def forward(self, x):
        data = x["data"]
        target = x["target"]
        # get input and out put embedding of data and target
        # shape: (B, embedding_dim)
        data_embedding = self.input(data)
        positive_samples = list(target.numpy())
        # shape: (B, pos_size, embeding_dim)
        positive_embedding = self.output(target)
        
        # negative sample that are not in the context
        negative_samples = []
        # generate negative samples and embedding of negative samples
        # using a uniform distribution
        while len(negative_samples) < self.neg_size:
            s = random.randint(0,vocab_size)
            if s not in positive_samples:
                negative_samples.append(s)
        negative_samples = torch.tensor(negative_samples)
        # shape: (B, neg_size, embedding_dim)
        negative_embedding = self.output(negative_samples)
        
        
        # positive part
        positive = F.logsigmoid(torch.matmul(positive_embedding, data_embedding.view(-1,self.embedding_dim,1))).sum()
        # negative part
        negative = 1/vocab_size * F.logsigmoid(-torch.matmul(negative_embedding, data_embedding.view(-1,self.embedding_dim,1))).sum()
        
        return postivie+negative

In [59]:
model = skipgram_neg()

In [60]:
model(dataset_skipgram.__getitem__(0))

NameError: name 'embedding_dim' is not defined

In [51]:
m = nn.LogSigmoid()
input = torch.tensor([1,0]).float()
output = m(input)

In [56]:
output.sum()

tensor(4)

In [49]:
input

tensor([-0.5247, -0.9767])

In [44]:
F.logsigmoid(output)

RuntimeError: _thnn_log_sigmoid_forward not supported on CPUType for Long

In [42]:
F.logsigmoid(torch.matmul(output, input))

RuntimeError: _thnn_log_sigmoid_forward not supported on CPUType for Long

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.01)

In [8]:
def train(model, dataloader, epochs=1, early_stopping=False, retrain=False):
    # if retraining
    if retrain:
        for parameter in model.parameters():
            nn.init.normal_(parameter)
    else:
        pass
    
    # if early stopping
    if early_stopping:
        early_stopper=EarlyStopping()
    else:
        pass
    
    for epoch in range(epochs):
        average_loss = 0
        losses = []
        for i, data in enumerate(dataloader, 0):
            context = data["data"]
            target = data["target"]
            optimizer.zero_grad()
            outputs = model(context)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            losses.append(loss.detach().numpy())
        # average loss of this epoch
        average_loss = np.average(losses)
        print("average loss of epoch", epoch, ":", average_loss)
        
        early_stopper(average_loss, model)
        if early_stopper.early_stop:
            print("early stopping")
            break
    
    model.load_state_dict(torch.load('checkpoint.pt'))

    return model

In [11]:
model = train(model,epochs=10000, dataloader=dataloader_skipgram, early_stopping=True)

average loss of epoch 0 : 4.0475607
average loss of epoch 1 : 4.030809
average loss of epoch 2 : 4.014647
average loss of epoch 3 : 3.9990537
average loss of epoch 4 : 3.9840071
average loss of epoch 5 : 3.9694893
average loss of epoch 6 : 3.9554796
average loss of epoch 7 : 3.9419599
average loss of epoch 8 : 3.9289124
average loss of epoch 9 : 3.9163206
average loss of epoch 10 : 3.9041667
average loss of epoch 11 : 3.892434
average loss of epoch 12 : 3.881107
average loss of epoch 13 : 3.8701699
average loss of epoch 14 : 3.8596065
average loss of epoch 15 : 3.8494031
average loss of epoch 16 : 3.8395438
average loss of epoch 17 : 3.8300147
average loss of epoch 18 : 3.8208025
average loss of epoch 19 : 3.811893
average loss of epoch 20 : 3.8032742
average loss of epoch 21 : 3.7949336
average loss of epoch 22 : 3.786858
average loss of epoch 23 : 3.7790377
average loss of epoch 24 : 3.7714612
average loss of epoch 25 : 3.7641177
average loss of epoch 26 : 3.7569978
average loss of e

average loss of epoch 221 : 3.339574
average loss of epoch 222 : 3.338413
average loss of epoch 223 : 3.3372533
average loss of epoch 224 : 3.3360958
average loss of epoch 225 : 3.3349397
average loss of epoch 226 : 3.3337853
average loss of epoch 227 : 3.3326323
average loss of epoch 228 : 3.3314812
average loss of epoch 229 : 3.3303318
average loss of epoch 230 : 3.3291836
average loss of epoch 231 : 3.3280375
average loss of epoch 232 : 3.3268926
average loss of epoch 233 : 3.3257496
average loss of epoch 234 : 3.3246076
average loss of epoch 235 : 3.3234677
average loss of epoch 236 : 3.3223293
average loss of epoch 237 : 3.321192
average loss of epoch 238 : 3.3200564
average loss of epoch 239 : 3.3189225
average loss of epoch 240 : 3.3177896
average loss of epoch 241 : 3.3166585
average loss of epoch 242 : 3.3155289
average loss of epoch 243 : 3.3144011
average loss of epoch 244 : 3.313274
average loss of epoch 245 : 3.3121483
average loss of epoch 246 : 3.3110247
average loss of 

average loss of epoch 441 : 3.1037974
average loss of epoch 442 : 3.1027012
average loss of epoch 443 : 3.1016035
average loss of epoch 444 : 3.1005046
average loss of epoch 445 : 3.0994043
average loss of epoch 446 : 3.0983028
average loss of epoch 447 : 3.0972
average loss of epoch 448 : 3.0960956
average loss of epoch 449 : 3.0949903
average loss of epoch 450 : 3.093883
average loss of epoch 451 : 3.0927744
average loss of epoch 452 : 3.091665
average loss of epoch 453 : 3.090554
average loss of epoch 454 : 3.0894415
average loss of epoch 455 : 3.0883276
average loss of epoch 456 : 3.0872123
average loss of epoch 457 : 3.0860956
average loss of epoch 458 : 3.0849776
average loss of epoch 459 : 3.0838578
average loss of epoch 460 : 3.0827365
average loss of epoch 461 : 3.0816143
average loss of epoch 462 : 3.0804904
average loss of epoch 463 : 3.0793653
average loss of epoch 464 : 3.0782387
average loss of epoch 465 : 3.0771103
average loss of epoch 466 : 3.0759802
average loss of ep

average loss of epoch 662 : 2.8385558
average loss of epoch 663 : 2.8373568
average loss of epoch 664 : 2.8361585
average loss of epoch 665 : 2.8349614
average loss of epoch 666 : 2.8337655
average loss of epoch 667 : 2.8325706
average loss of epoch 668 : 2.831377
average loss of epoch 669 : 2.8301845
average loss of epoch 670 : 2.8289933
average loss of epoch 671 : 2.8278031
average loss of epoch 672 : 2.8266144
average loss of epoch 673 : 2.8254268
average loss of epoch 674 : 2.8242407
average loss of epoch 675 : 2.8230557
average loss of epoch 676 : 2.8218718
average loss of epoch 677 : 2.8206894
average loss of epoch 678 : 2.8195083
average loss of epoch 679 : 2.8183286
average loss of epoch 680 : 2.8171499
average loss of epoch 681 : 2.8159738
average loss of epoch 682 : 2.814798
average loss of epoch 683 : 2.8136237
average loss of epoch 684 : 2.8124514
average loss of epoch 685 : 2.81128
average loss of epoch 686 : 2.81011
average loss of epoch 687 : 2.8089426
average loss of ep

average loss of epoch 885 : 2.6249082
average loss of epoch 886 : 2.6242173
average loss of epoch 887 : 2.6235282
average loss of epoch 888 : 2.6228411
average loss of epoch 889 : 2.622156
average loss of epoch 890 : 2.6214726
average loss of epoch 891 : 2.6207907
average loss of epoch 892 : 2.6201112
average loss of epoch 893 : 2.6194327
average loss of epoch 894 : 2.6187565
average loss of epoch 895 : 2.6180818
average loss of epoch 896 : 2.6174088
average loss of epoch 897 : 2.6167371
average loss of epoch 898 : 2.6160681
average loss of epoch 899 : 2.6154
average loss of epoch 900 : 2.614734
average loss of epoch 901 : 2.6140697
average loss of epoch 902 : 2.613407
average loss of epoch 903 : 2.6127458
average loss of epoch 904 : 2.6120863
average loss of epoch 905 : 2.6114287
average loss of epoch 906 : 2.6107721
average loss of epoch 907 : 2.6101177
average loss of epoch 908 : 2.609465
average loss of epoch 909 : 2.6088135
average loss of epoch 910 : 2.608164
average loss of epoc

average loss of epoch 1103 : 2.5045753
average loss of epoch 1104 : 2.5041192
average loss of epoch 1105 : 2.5036638
average loss of epoch 1106 : 2.5032089
average loss of epoch 1107 : 2.5027542
average loss of epoch 1108 : 2.5023
average loss of epoch 1109 : 2.5018473
average loss of epoch 1110 : 2.5013945
average loss of epoch 1111 : 2.5009425
average loss of epoch 1112 : 2.500491
average loss of epoch 1113 : 2.50004
average loss of epoch 1114 : 2.49959
average loss of epoch 1115 : 2.4991398
average loss of epoch 1116 : 2.498691
average loss of epoch 1117 : 2.4982421
average loss of epoch 1118 : 2.4977944
average loss of epoch 1119 : 2.4973466
average loss of epoch 1120 : 2.4969
average loss of epoch 1121 : 2.4964533
average loss of epoch 1122 : 2.4960077
average loss of epoch 1123 : 2.4955626
average loss of epoch 1124 : 2.4951177
average loss of epoch 1125 : 2.4946733
average loss of epoch 1126 : 2.4942303
average loss of epoch 1127 : 2.4937868
average loss of epoch 1128 : 2.493344

average loss of epoch 1320 : 2.4179423
average loss of epoch 1321 : 2.4175985
average loss of epoch 1322 : 2.417255
average loss of epoch 1323 : 2.4169126
average loss of epoch 1324 : 2.4165702
average loss of epoch 1325 : 2.4162283
average loss of epoch 1326 : 2.4158869
average loss of epoch 1327 : 2.4155457
average loss of epoch 1328 : 2.415206
average loss of epoch 1329 : 2.4148657
average loss of epoch 1330 : 2.414527
average loss of epoch 1331 : 2.4141877
average loss of epoch 1332 : 2.413849
average loss of epoch 1333 : 2.4135115
average loss of epoch 1334 : 2.4131742
average loss of epoch 1335 : 2.4128373
average loss of epoch 1336 : 2.412501
average loss of epoch 1337 : 2.4121652
average loss of epoch 1338 : 2.4118302
average loss of epoch 1339 : 2.411495
average loss of epoch 1340 : 2.4111607
average loss of epoch 1341 : 2.4108272
average loss of epoch 1342 : 2.4104936
average loss of epoch 1343 : 2.4101608
average loss of epoch 1344 : 2.4098282
average loss of epoch 1345 : 2.

average loss of epoch 1536 : 2.3545508
average loss of epoch 1537 : 2.3543034
average loss of epoch 1538 : 2.3540556
average loss of epoch 1539 : 2.3538089
average loss of epoch 1540 : 2.3535624
average loss of epoch 1541 : 2.3533165
average loss of epoch 1542 : 2.3530705
average loss of epoch 1543 : 2.3528254
average loss of epoch 1544 : 2.35258
average loss of epoch 1545 : 2.3523352
average loss of epoch 1546 : 2.352091
average loss of epoch 1547 : 2.351847
average loss of epoch 1548 : 2.3516037
average loss of epoch 1549 : 2.3513598
average loss of epoch 1550 : 2.3511174
average loss of epoch 1551 : 2.350875
average loss of epoch 1552 : 2.3506324
average loss of epoch 1553 : 2.3503904
average loss of epoch 1554 : 2.3501492
average loss of epoch 1555 : 2.3499079
average loss of epoch 1556 : 2.349667
average loss of epoch 1557 : 2.3494267
average loss of epoch 1558 : 2.3491867
average loss of epoch 1559 : 2.348947
average loss of epoch 1560 : 2.3487074
average loss of epoch 1561 : 2.3

average loss of epoch 1752 : 2.3082678
average loss of epoch 1753 : 2.3080812
average loss of epoch 1754 : 2.3078947
average loss of epoch 1755 : 2.307709
average loss of epoch 1756 : 2.3075228
average loss of epoch 1757 : 2.3073373
average loss of epoch 1758 : 2.3071518
average loss of epoch 1759 : 2.306966
average loss of epoch 1760 : 2.306781
average loss of epoch 1761 : 2.3065965
average loss of epoch 1762 : 2.3064117
average loss of epoch 1763 : 2.306227
average loss of epoch 1764 : 2.3060427
average loss of epoch 1765 : 2.305859
average loss of epoch 1766 : 2.3056746
average loss of epoch 1767 : 2.305491
average loss of epoch 1768 : 2.3053079
average loss of epoch 1769 : 2.3051243
average loss of epoch 1770 : 2.3049412
average loss of epoch 1771 : 2.3047583
average loss of epoch 1772 : 2.3045754
average loss of epoch 1773 : 2.3043928
average loss of epoch 1774 : 2.3042102
average loss of epoch 1775 : 2.304028
average loss of epoch 1776 : 2.3038464
average loss of epoch 1777 : 2.3

average loss of epoch 1967 : 2.271985
average loss of epoch 1968 : 2.2718306
average loss of epoch 1969 : 2.2716768
average loss of epoch 1970 : 2.271523
average loss of epoch 1971 : 2.2713692
average loss of epoch 1972 : 2.2712154
average loss of epoch 1973 : 2.271062
average loss of epoch 1974 : 2.2709088
average loss of epoch 1975 : 2.270755
average loss of epoch 1976 : 2.270602
average loss of epoch 1977 : 2.2704487
average loss of epoch 1978 : 2.2702959
average loss of epoch 1979 : 2.2701426
average loss of epoch 1980 : 2.2699897
average loss of epoch 1981 : 2.2698374
average loss of epoch 1982 : 2.2696846
average loss of epoch 1983 : 2.2695322
average loss of epoch 1984 : 2.26938
average loss of epoch 1985 : 2.269228
average loss of epoch 1986 : 2.2690759
average loss of epoch 1987 : 2.2689235
average loss of epoch 1988 : 2.268772
average loss of epoch 1989 : 2.2686198
average loss of epoch 1990 : 2.2684681
average loss of epoch 1991 : 2.2683165
average loss of epoch 1992 : 2.268

average loss of epoch 2182 : 2.241189
average loss of epoch 2183 : 2.241056
average loss of epoch 2184 : 2.2409227
average loss of epoch 2185 : 2.2407897
average loss of epoch 2186 : 2.240657
average loss of epoch 2187 : 2.2405243
average loss of epoch 2188 : 2.2403917
average loss of epoch 2189 : 2.2402592
average loss of epoch 2190 : 2.2401268
average loss of epoch 2191 : 2.2399938
average loss of epoch 2192 : 2.2398615
average loss of epoch 2193 : 2.2397294
average loss of epoch 2194 : 2.239597
average loss of epoch 2195 : 2.2394655
average loss of epoch 2196 : 2.2393334
average loss of epoch 2197 : 2.2392015
average loss of epoch 2198 : 2.2390695
average loss of epoch 2199 : 2.2389379
average loss of epoch 2200 : 2.238806
average loss of epoch 2201 : 2.2386746
average loss of epoch 2202 : 2.238543
average loss of epoch 2203 : 2.2384117
average loss of epoch 2204 : 2.2382805
average loss of epoch 2205 : 2.2381494
average loss of epoch 2206 : 2.2380185
average loss of epoch 2207 : 2.

average loss of epoch 2397 : 2.214465
average loss of epoch 2398 : 2.2143488
average loss of epoch 2399 : 2.2142332
average loss of epoch 2400 : 2.2141173
average loss of epoch 2401 : 2.2140014
average loss of epoch 2402 : 2.2138858
average loss of epoch 2403 : 2.2137702
average loss of epoch 2404 : 2.2136545
average loss of epoch 2405 : 2.213539
average loss of epoch 2406 : 2.2134237
average loss of epoch 2407 : 2.2133083
average loss of epoch 2408 : 2.213193
average loss of epoch 2409 : 2.213078
average loss of epoch 2410 : 2.2129629
average loss of epoch 2411 : 2.2128477
average loss of epoch 2412 : 2.2127326
average loss of epoch 2413 : 2.2126179
average loss of epoch 2414 : 2.212503
average loss of epoch 2415 : 2.212388
average loss of epoch 2416 : 2.2122734
average loss of epoch 2417 : 2.2121587
average loss of epoch 2418 : 2.212044
average loss of epoch 2419 : 2.2119298
average loss of epoch 2420 : 2.211815
average loss of epoch 2421 : 2.211701
average loss of epoch 2422 : 2.211

average loss of epoch 2611 : 2.1911626
average loss of epoch 2612 : 2.19106
average loss of epoch 2613 : 2.1909583
average loss of epoch 2614 : 2.1908562
average loss of epoch 2615 : 2.1907544
average loss of epoch 2616 : 2.1906524
average loss of epoch 2617 : 2.1905508
average loss of epoch 2618 : 2.1904488
average loss of epoch 2619 : 2.1903474
average loss of epoch 2620 : 2.1902456
average loss of epoch 2621 : 2.190144
average loss of epoch 2622 : 2.1900425
average loss of epoch 2623 : 2.189941
average loss of epoch 2624 : 2.1898398
average loss of epoch 2625 : 2.1897383
average loss of epoch 2626 : 2.1896374
average loss of epoch 2627 : 2.189536
average loss of epoch 2628 : 2.1894348
average loss of epoch 2629 : 2.1893337
average loss of epoch 2630 : 2.1892328
average loss of epoch 2631 : 2.1891317
average loss of epoch 2632 : 2.1890311
average loss of epoch 2633 : 2.1889303
average loss of epoch 2634 : 2.1888294
average loss of epoch 2635 : 2.1887288
average loss of epoch 2636 : 2

average loss of epoch 2827 : 2.170545
average loss of epoch 2828 : 2.1704562
average loss of epoch 2829 : 2.170368
average loss of epoch 2830 : 2.1702795
average loss of epoch 2831 : 2.1701908
average loss of epoch 2832 : 2.1701026
average loss of epoch 2833 : 2.1700144
average loss of epoch 2834 : 2.169926
average loss of epoch 2835 : 2.1698375
average loss of epoch 2836 : 2.16975
average loss of epoch 2837 : 2.1696615
average loss of epoch 2838 : 2.1695738
average loss of epoch 2839 : 2.1694858
average loss of epoch 2840 : 2.1693978
average loss of epoch 2841 : 2.16931
average loss of epoch 2842 : 2.1692224
average loss of epoch 2843 : 2.1691349
average loss of epoch 2844 : 2.169047
average loss of epoch 2845 : 2.1689596
average loss of epoch 2846 : 2.168872
average loss of epoch 2847 : 2.1687849
average loss of epoch 2848 : 2.1686976
average loss of epoch 2849 : 2.1686103
average loss of epoch 2850 : 2.1685228
average loss of epoch 2851 : 2.1684358
average loss of epoch 2852 : 2.168

average loss of epoch 3041 : 2.1530397
average loss of epoch 3042 : 2.1529644
average loss of epoch 3043 : 2.1528895
average loss of epoch 3044 : 2.1528146
average loss of epoch 3045 : 2.1527398
average loss of epoch 3046 : 2.152665
average loss of epoch 3047 : 2.1525903
average loss of epoch 3048 : 2.1525156
average loss of epoch 3049 : 2.152441
average loss of epoch 3050 : 2.1523664
average loss of epoch 3051 : 2.152292
average loss of epoch 3052 : 2.1522176
average loss of epoch 3053 : 2.1521432
average loss of epoch 3054 : 2.152069
average loss of epoch 3055 : 2.1519947
average loss of epoch 3056 : 2.1519208
average loss of epoch 3057 : 2.1518464
average loss of epoch 3058 : 2.1517725
average loss of epoch 3059 : 2.1516986
average loss of epoch 3060 : 2.1516247
average loss of epoch 3061 : 2.151551
average loss of epoch 3062 : 2.1514773
average loss of epoch 3063 : 2.1514034
average loss of epoch 3064 : 2.1513298
average loss of epoch 3065 : 2.151256
average loss of epoch 3066 : 2.

average loss of epoch 3257 : 2.1382048
average loss of epoch 3258 : 2.1381423
average loss of epoch 3259 : 2.1380796
average loss of epoch 3260 : 2.1380172
average loss of epoch 3261 : 2.1379547
average loss of epoch 3262 : 2.137892
average loss of epoch 3263 : 2.1378298
average loss of epoch 3264 : 2.1377673
average loss of epoch 3265 : 2.137705
average loss of epoch 3266 : 2.1376429
average loss of epoch 3267 : 2.1375809
average loss of epoch 3268 : 2.1375184
average loss of epoch 3269 : 2.1374564
average loss of epoch 3270 : 2.1373944
average loss of epoch 3271 : 2.1373324
average loss of epoch 3272 : 2.1372707
average loss of epoch 3273 : 2.137209
average loss of epoch 3274 : 2.137147
average loss of epoch 3275 : 2.1370852
average loss of epoch 3276 : 2.1370234
average loss of epoch 3277 : 2.136962
average loss of epoch 3278 : 2.1369002
average loss of epoch 3279 : 2.1368387
average loss of epoch 3280 : 2.136777
average loss of epoch 3281 : 2.1367157
average loss of epoch 3282 : 2.

average loss of epoch 3473 : 2.125791
average loss of epoch 3474 : 2.1257384
average loss of epoch 3475 : 2.1256857
average loss of epoch 3476 : 2.1256328
average loss of epoch 3477 : 2.1255805
average loss of epoch 3478 : 2.1255279
average loss of epoch 3479 : 2.1254754
average loss of epoch 3480 : 2.125423
average loss of epoch 3481 : 2.1253705
average loss of epoch 3482 : 2.1253183
average loss of epoch 3483 : 2.1252658
average loss of epoch 3484 : 2.1252134
average loss of epoch 3485 : 2.1251612
average loss of epoch 3486 : 2.1251092
average loss of epoch 3487 : 2.125057
average loss of epoch 3488 : 2.1250048
average loss of epoch 3489 : 2.1249526
average loss of epoch 3490 : 2.1249003
average loss of epoch 3491 : 2.1248484
average loss of epoch 3492 : 2.1247966
average loss of epoch 3493 : 2.1247447
average loss of epoch 3494 : 2.1246927
average loss of epoch 3495 : 2.1246407
average loss of epoch 3496 : 2.1245892
average loss of epoch 3497 : 2.1245372
average loss of epoch 3498 :

average loss of epoch 3688 : 2.1152914
average loss of epoch 3689 : 2.1152463
average loss of epoch 3690 : 2.115201
average loss of epoch 3691 : 2.1151557
average loss of epoch 3692 : 2.1151104
average loss of epoch 3693 : 2.115065
average loss of epoch 3694 : 2.1150203
average loss of epoch 3695 : 2.1149747
average loss of epoch 3696 : 2.1149297
average loss of epoch 3697 : 2.1148846
average loss of epoch 3698 : 2.1148396
average loss of epoch 3699 : 2.1147945
average loss of epoch 3700 : 2.1147494
average loss of epoch 3701 : 2.1147046
average loss of epoch 3702 : 2.1146598
average loss of epoch 3703 : 2.1146147
average loss of epoch 3704 : 2.1145697
average loss of epoch 3705 : 2.1145248
average loss of epoch 3706 : 2.11448
average loss of epoch 3707 : 2.1144352
average loss of epoch 3708 : 2.1143906
average loss of epoch 3709 : 2.1143458
average loss of epoch 3710 : 2.114301
average loss of epoch 3711 : 2.1142561
average loss of epoch 3712 : 2.1142116
average loss of epoch 3713 : 2

average loss of epoch 3901 : 2.1062298
average loss of epoch 3902 : 2.10619
average loss of epoch 3903 : 2.10615
average loss of epoch 3904 : 2.10611
average loss of epoch 3905 : 2.10607
average loss of epoch 3906 : 2.1060305
average loss of epoch 3907 : 2.1059904
average loss of epoch 3908 : 2.1059506
average loss of epoch 3909 : 2.1059105
average loss of epoch 3910 : 2.1058707
average loss of epoch 3911 : 2.105831
average loss of epoch 3912 : 2.1057913
average loss of epoch 3913 : 2.1057513
average loss of epoch 3914 : 2.1057117
average loss of epoch 3915 : 2.1056721
average loss of epoch 3916 : 2.105632
average loss of epoch 3917 : 2.1055925
average loss of epoch 3918 : 2.105553
average loss of epoch 3919 : 2.105513
average loss of epoch 3920 : 2.1054735
average loss of epoch 3921 : 2.105434
average loss of epoch 3922 : 2.1053944
average loss of epoch 3923 : 2.1053548
average loss of epoch 3924 : 2.1053154
average loss of epoch 3925 : 2.1052756
average loss of epoch 3926 : 2.1052363

average loss of epoch 4114 : 2.0981512
average loss of epoch 4115 : 2.0981154
average loss of epoch 4116 : 2.0980794
average loss of epoch 4117 : 2.0980432
average loss of epoch 4118 : 2.0980074
average loss of epoch 4119 : 2.0979714
average loss of epoch 4120 : 2.0979357
average loss of epoch 4121 : 2.0978997
average loss of epoch 4122 : 2.0978637
average loss of epoch 4123 : 2.097828
average loss of epoch 4124 : 2.097792
average loss of epoch 4125 : 2.0977564
average loss of epoch 4126 : 2.0977204
average loss of epoch 4127 : 2.0976849
average loss of epoch 4128 : 2.0976489
average loss of epoch 4129 : 2.097613
average loss of epoch 4130 : 2.097577
average loss of epoch 4131 : 2.0975413
average loss of epoch 4132 : 2.0975058
average loss of epoch 4133 : 2.09747
average loss of epoch 4134 : 2.0974343
average loss of epoch 4135 : 2.0973985
average loss of epoch 4136 : 2.0973632
average loss of epoch 4137 : 2.0973275
average loss of epoch 4138 : 2.0972917
average loss of epoch 4139 : 2.

average loss of epoch 4326 : 2.0908604
average loss of epoch 4327 : 2.0908275
average loss of epoch 4328 : 2.0907946
average loss of epoch 4329 : 2.0907617
average loss of epoch 4330 : 2.090729
average loss of epoch 4331 : 2.0906963
average loss of epoch 4332 : 2.0906634
average loss of epoch 4333 : 2.0906305
average loss of epoch 4334 : 2.0905976
average loss of epoch 4335 : 2.0905652
average loss of epoch 4336 : 2.0905323
average loss of epoch 4337 : 2.0904996
average loss of epoch 4338 : 2.0904665
average loss of epoch 4339 : 2.090434
average loss of epoch 4340 : 2.0904014
average loss of epoch 4341 : 2.0903685
average loss of epoch 4342 : 2.090336
average loss of epoch 4343 : 2.0903034
average loss of epoch 4344 : 2.0902708
average loss of epoch 4345 : 2.090238
average loss of epoch 4346 : 2.0902052
average loss of epoch 4347 : 2.0901728
average loss of epoch 4348 : 2.0901403
average loss of epoch 4349 : 2.0901077
average loss of epoch 4350 : 2.0900753
average loss of epoch 4351 : 

average loss of epoch 4543 : 2.0840213
average loss of epoch 4544 : 2.083991
average loss of epoch 4545 : 2.0839608
average loss of epoch 4546 : 2.0839305
average loss of epoch 4547 : 2.0839005
average loss of epoch 4548 : 2.0838702
average loss of epoch 4549 : 2.08384
average loss of epoch 4550 : 2.0838099
average loss of epoch 4551 : 2.0837798
average loss of epoch 4552 : 2.0837498
average loss of epoch 4553 : 2.0837195
average loss of epoch 4554 : 2.0836895
average loss of epoch 4555 : 2.0836594
average loss of epoch 4556 : 2.0836291
average loss of epoch 4557 : 2.083599
average loss of epoch 4558 : 2.0835688
average loss of epoch 4559 : 2.083539
average loss of epoch 4560 : 2.083509
average loss of epoch 4561 : 2.083479
average loss of epoch 4562 : 2.0834486
average loss of epoch 4563 : 2.0834188
average loss of epoch 4564 : 2.0833888
average loss of epoch 4565 : 2.083359
average loss of epoch 4566 : 2.0833287
average loss of epoch 4567 : 2.083299
average loss of epoch 4568 : 2.083

average loss of epoch 4757 : 2.0777965
average loss of epoch 4758 : 2.0777686
average loss of epoch 4759 : 2.0777404
average loss of epoch 4760 : 2.0777125
average loss of epoch 4761 : 2.0776846
average loss of epoch 4762 : 2.0776567
average loss of epoch 4763 : 2.0776289
average loss of epoch 4764 : 2.0776007
average loss of epoch 4765 : 2.077573
average loss of epoch 4766 : 2.077545
average loss of epoch 4767 : 2.0775168
average loss of epoch 4768 : 2.0774891
average loss of epoch 4769 : 2.0774615
average loss of epoch 4770 : 2.0774336
average loss of epoch 4771 : 2.0774057
average loss of epoch 4772 : 2.0773778
average loss of epoch 4773 : 2.0773501
average loss of epoch 4774 : 2.0773222
average loss of epoch 4775 : 2.077294
average loss of epoch 4776 : 2.0772665
average loss of epoch 4777 : 2.0772386
average loss of epoch 4778 : 2.077211
average loss of epoch 4779 : 2.0771832
average loss of epoch 4780 : 2.0771554
average loss of epoch 4781 : 2.0771275
average loss of epoch 4782 : 

average loss of epoch 4970 : 2.0720518
average loss of epoch 4971 : 2.0720258
average loss of epoch 4972 : 2.0719998
average loss of epoch 4973 : 2.0719738
average loss of epoch 4974 : 2.071948
average loss of epoch 4975 : 2.071922
average loss of epoch 4976 : 2.071896
average loss of epoch 4977 : 2.0718703
average loss of epoch 4978 : 2.071844
average loss of epoch 4979 : 2.0718184
average loss of epoch 4980 : 2.0717924
average loss of epoch 4981 : 2.0717666
average loss of epoch 4982 : 2.0717409
average loss of epoch 4983 : 2.0717146
average loss of epoch 4984 : 2.071689
average loss of epoch 4985 : 2.0716634
average loss of epoch 4986 : 2.0716372
average loss of epoch 4987 : 2.0716116
average loss of epoch 4988 : 2.0715857
average loss of epoch 4989 : 2.07156
average loss of epoch 4990 : 2.071534
average loss of epoch 4991 : 2.0715082
average loss of epoch 4992 : 2.0714824
average loss of epoch 4993 : 2.0714567
average loss of epoch 4994 : 2.071431
average loss of epoch 4995 : 2.071

average loss of epoch 5183 : 2.066721
average loss of epoch 5184 : 2.0666966
average loss of epoch 5185 : 2.0666723
average loss of epoch 5186 : 2.0666485
average loss of epoch 5187 : 2.0666244
average loss of epoch 5188 : 2.0666003
average loss of epoch 5189 : 2.0665765
average loss of epoch 5190 : 2.0665522
average loss of epoch 5191 : 2.066528
average loss of epoch 5192 : 2.0665042
average loss of epoch 5193 : 2.0664802
average loss of epoch 5194 : 2.066456
average loss of epoch 5195 : 2.0664322
average loss of epoch 5196 : 2.0664082
average loss of epoch 5197 : 2.0663843
average loss of epoch 5198 : 2.0663602
average loss of epoch 5199 : 2.0663364
average loss of epoch 5200 : 2.0663123
average loss of epoch 5201 : 2.0662885
average loss of epoch 5202 : 2.0662646
average loss of epoch 5203 : 2.0662405
average loss of epoch 5204 : 2.0662167
average loss of epoch 5205 : 2.0661929
average loss of epoch 5206 : 2.066169
average loss of epoch 5207 : 2.066145
average loss of epoch 5208 : 2

average loss of epoch 5396 : 2.0617802
average loss of epoch 5397 : 2.0617578
average loss of epoch 5398 : 2.0617356
average loss of epoch 5399 : 2.0617135
average loss of epoch 5400 : 2.061691
average loss of epoch 5401 : 2.0616689
average loss of epoch 5402 : 2.061647
average loss of epoch 5403 : 2.0616243
average loss of epoch 5404 : 2.061602
average loss of epoch 5405 : 2.0615797
average loss of epoch 5406 : 2.0615575
average loss of epoch 5407 : 2.0615354
average loss of epoch 5408 : 2.061513
average loss of epoch 5409 : 2.0614908
average loss of epoch 5410 : 2.0614686
average loss of epoch 5411 : 2.0614464
average loss of epoch 5412 : 2.0614245
average loss of epoch 5413 : 2.0614023
average loss of epoch 5414 : 2.0613801
average loss of epoch 5415 : 2.0613577
average loss of epoch 5416 : 2.0613358
average loss of epoch 5417 : 2.0613134
average loss of epoch 5418 : 2.0612915
average loss of epoch 5419 : 2.0612693
average loss of epoch 5420 : 2.061247
average loss of epoch 5421 : 2

average loss of epoch 5608 : 2.0572314
average loss of epoch 5609 : 2.0572107
average loss of epoch 5610 : 2.0571904
average loss of epoch 5611 : 2.05717
average loss of epoch 5612 : 2.0571494
average loss of epoch 5613 : 2.0571287
average loss of epoch 5614 : 2.057108
average loss of epoch 5615 : 2.0570874
average loss of epoch 5616 : 2.057067
average loss of epoch 5617 : 2.0570464
average loss of epoch 5618 : 2.0570257
average loss of epoch 5619 : 2.0570052
average loss of epoch 5620 : 2.056985
average loss of epoch 5621 : 2.0569642
average loss of epoch 5622 : 2.056944
average loss of epoch 5623 : 2.0569232
average loss of epoch 5624 : 2.0569026
average loss of epoch 5625 : 2.0568821
average loss of epoch 5626 : 2.0568616
average loss of epoch 5627 : 2.0568411
average loss of epoch 5628 : 2.0568206
average loss of epoch 5629 : 2.0568004
average loss of epoch 5630 : 2.0567799
average loss of epoch 5631 : 2.0567594
average loss of epoch 5632 : 2.0567389
average loss of epoch 5633 : 2.

average loss of epoch 5820 : 2.0530286
average loss of epoch 5821 : 2.0530095
average loss of epoch 5822 : 2.0529904
average loss of epoch 5823 : 2.0529714
average loss of epoch 5824 : 2.0529523
average loss of epoch 5825 : 2.0529332
average loss of epoch 5826 : 2.0529144
average loss of epoch 5827 : 2.0528953
average loss of epoch 5828 : 2.0528762
average loss of epoch 5829 : 2.0528572
average loss of epoch 5830 : 2.0528383
average loss of epoch 5831 : 2.0528193
average loss of epoch 5832 : 2.0528002
average loss of epoch 5833 : 2.0527816
average loss of epoch 5834 : 2.0527625
average loss of epoch 5835 : 2.0527434
average loss of epoch 5836 : 2.0527244
average loss of epoch 5837 : 2.0527055
average loss of epoch 5838 : 2.0526867
average loss of epoch 5839 : 2.0526676
average loss of epoch 5840 : 2.0526488
average loss of epoch 5841 : 2.0526297
average loss of epoch 5842 : 2.0526114
average loss of epoch 5843 : 2.0525923
average loss of epoch 5844 : 2.0525732
average loss of epoch 584

average loss of epoch 6034 : 2.049108
average loss of epoch 6035 : 2.0490904
average loss of epoch 6036 : 2.0490727
average loss of epoch 6037 : 2.049055
average loss of epoch 6038 : 2.0490372
average loss of epoch 6039 : 2.0490198
average loss of epoch 6040 : 2.0490022
average loss of epoch 6041 : 2.048985
average loss of epoch 6042 : 2.0489671
average loss of epoch 6043 : 2.0489495
average loss of epoch 6044 : 2.048932
average loss of epoch 6045 : 2.0489144
average loss of epoch 6046 : 2.048897
average loss of epoch 6047 : 2.0488794
average loss of epoch 6048 : 2.048862
average loss of epoch 6049 : 2.0488446
average loss of epoch 6050 : 2.048827
average loss of epoch 6051 : 2.0488095
average loss of epoch 6052 : 2.048792
average loss of epoch 6053 : 2.0487742
average loss of epoch 6054 : 2.0487566
average loss of epoch 6055 : 2.0487394
average loss of epoch 6056 : 2.0487218
average loss of epoch 6057 : 2.0487044
average loss of epoch 6058 : 2.048687
average loss of epoch 6059 : 2.048

average loss of epoch 6245 : 2.0455272
average loss of epoch 6246 : 2.045511
average loss of epoch 6247 : 2.0454946
average loss of epoch 6248 : 2.045478
average loss of epoch 6249 : 2.045462
average loss of epoch 6250 : 2.0454454
average loss of epoch 6251 : 2.0454292
average loss of epoch 6252 : 2.045413
average loss of epoch 6253 : 2.0453966
average loss of epoch 6254 : 2.0453804
average loss of epoch 6255 : 2.045364
average loss of epoch 6256 : 2.0453477
average loss of epoch 6257 : 2.0453315
average loss of epoch 6258 : 2.0453153
average loss of epoch 6259 : 2.0452988
average loss of epoch 6260 : 2.0452828
average loss of epoch 6261 : 2.0452664
average loss of epoch 6262 : 2.0452502
average loss of epoch 6263 : 2.045234
average loss of epoch 6264 : 2.0452175
average loss of epoch 6265 : 2.0452013
average loss of epoch 6266 : 2.0451853
average loss of epoch 6267 : 2.0451689
average loss of epoch 6268 : 2.0451527
average loss of epoch 6269 : 2.0451365
average loss of epoch 6270 : 2.

average loss of epoch 6463 : 2.042091
average loss of epoch 6464 : 2.0420756
average loss of epoch 6465 : 2.0420606
average loss of epoch 6466 : 2.0420449
average loss of epoch 6467 : 2.04203
average loss of epoch 6468 : 2.0420148
average loss of epoch 6469 : 2.0419996
average loss of epoch 6470 : 2.0419843
average loss of epoch 6471 : 2.0419695
average loss of epoch 6472 : 2.041954
average loss of epoch 6473 : 2.041939
average loss of epoch 6474 : 2.041924
average loss of epoch 6475 : 2.041909
average loss of epoch 6476 : 2.0418937
average loss of epoch 6477 : 2.0418785
average loss of epoch 6478 : 2.0418634
average loss of epoch 6479 : 2.041848
average loss of epoch 6480 : 2.041833
average loss of epoch 6481 : 2.041818
average loss of epoch 6482 : 2.0418026
average loss of epoch 6483 : 2.0417879
average loss of epoch 6484 : 2.0417726
average loss of epoch 6485 : 2.0417576
average loss of epoch 6486 : 2.0417423
average loss of epoch 6487 : 2.0417275
average loss of epoch 6488 : 2.0417

average loss of epoch 6679 : 2.0389183
average loss of epoch 6680 : 2.0389044
average loss of epoch 6681 : 2.0388901
average loss of epoch 6682 : 2.038876
average loss of epoch 6683 : 2.0388618
average loss of epoch 6684 : 2.0388477
average loss of epoch 6685 : 2.0388334
average loss of epoch 6686 : 2.0388193
average loss of epoch 6687 : 2.038805
average loss of epoch 6688 : 2.0387912
average loss of epoch 6689 : 2.0387769
average loss of epoch 6690 : 2.0387626
average loss of epoch 6691 : 2.0387483
average loss of epoch 6692 : 2.0387344
average loss of epoch 6693 : 2.0387201
average loss of epoch 6694 : 2.038706
average loss of epoch 6695 : 2.0386918
average loss of epoch 6696 : 2.038678
average loss of epoch 6697 : 2.0386636
average loss of epoch 6698 : 2.0386498
average loss of epoch 6699 : 2.0386355
average loss of epoch 6700 : 2.0386214
average loss of epoch 6701 : 2.0386074
average loss of epoch 6702 : 2.038593
average loss of epoch 6703 : 2.038579
average loss of epoch 6704 : 2.

average loss of epoch 6894 : 2.0359633
average loss of epoch 6895 : 2.03595
average loss of epoch 6896 : 2.0359366
average loss of epoch 6897 : 2.0359235
average loss of epoch 6898 : 2.0359101
average loss of epoch 6899 : 2.0358968
average loss of epoch 6900 : 2.0358837
average loss of epoch 6901 : 2.0358703
average loss of epoch 6902 : 2.035857
average loss of epoch 6903 : 2.0358436
average loss of epoch 6904 : 2.0358305
average loss of epoch 6905 : 2.0358171
average loss of epoch 6906 : 2.0358038
average loss of epoch 6907 : 2.0357907
average loss of epoch 6908 : 2.0357773
average loss of epoch 6909 : 2.0357642
average loss of epoch 6910 : 2.0357509
average loss of epoch 6911 : 2.0357378
average loss of epoch 6912 : 2.0357244
average loss of epoch 6913 : 2.035711
average loss of epoch 6914 : 2.0356977
average loss of epoch 6915 : 2.0356846
average loss of epoch 6916 : 2.0356715
average loss of epoch 6917 : 2.035658
average loss of epoch 6918 : 2.035645
average loss of epoch 6919 : 2.

average loss of epoch 7109 : 2.0331864
average loss of epoch 7110 : 2.0331738
average loss of epoch 7111 : 2.0331614
average loss of epoch 7112 : 2.033149
average loss of epoch 7113 : 2.0331366
average loss of epoch 7114 : 2.0331237
average loss of epoch 7115 : 2.033111
average loss of epoch 7116 : 2.0330987
average loss of epoch 7117 : 2.0330863
average loss of epoch 7118 : 2.0330737
average loss of epoch 7119 : 2.0330613
average loss of epoch 7120 : 2.0330489
average loss of epoch 7121 : 2.0330365
average loss of epoch 7122 : 2.033024
average loss of epoch 7123 : 2.0330114
average loss of epoch 7124 : 2.0329988
average loss of epoch 7125 : 2.0329864
average loss of epoch 7126 : 2.032974
average loss of epoch 7127 : 2.0329611
average loss of epoch 7128 : 2.032949
average loss of epoch 7129 : 2.0329363
average loss of epoch 7130 : 2.032924
average loss of epoch 7131 : 2.0329113
average loss of epoch 7132 : 2.032899
average loss of epoch 7133 : 2.0328863
average loss of epoch 7134 : 2.0

average loss of epoch 7325 : 2.0305555
average loss of epoch 7326 : 2.0305436
average loss of epoch 7327 : 2.030532
average loss of epoch 7328 : 2.0305197
average loss of epoch 7329 : 2.030508
average loss of epoch 7330 : 2.0304961
average loss of epoch 7331 : 2.0304847
average loss of epoch 7332 : 2.0304725
average loss of epoch 7333 : 2.0304606
average loss of epoch 7334 : 2.0304492
average loss of epoch 7335 : 2.030437
average loss of epoch 7336 : 2.0304253
average loss of epoch 7337 : 2.0304136
average loss of epoch 7338 : 2.0304017
average loss of epoch 7339 : 2.03039
average loss of epoch 7340 : 2.030378
average loss of epoch 7341 : 2.0303662
average loss of epoch 7342 : 2.0303545
average loss of epoch 7343 : 2.0303426
average loss of epoch 7344 : 2.0303311
average loss of epoch 7345 : 2.0303192
average loss of epoch 7346 : 2.0303073
average loss of epoch 7347 : 2.0302956
average loss of epoch 7348 : 2.0302835
average loss of epoch 7349 : 2.030272
average loss of epoch 7350 : 2.0

average loss of epoch 7539 : 2.0280876
average loss of epoch 7540 : 2.0280764
average loss of epoch 7541 : 2.0280652
average loss of epoch 7542 : 2.0280538
average loss of epoch 7543 : 2.0280426
average loss of epoch 7544 : 2.0280313
average loss of epoch 7545 : 2.0280204
average loss of epoch 7546 : 2.0280092
average loss of epoch 7547 : 2.0279977
average loss of epoch 7548 : 2.0279865
average loss of epoch 7549 : 2.0279753
average loss of epoch 7550 : 2.027964
average loss of epoch 7551 : 2.0279531
average loss of epoch 7552 : 2.0279417
average loss of epoch 7553 : 2.0279305
average loss of epoch 7554 : 2.0279193
average loss of epoch 7555 : 2.027908
average loss of epoch 7556 : 2.027897
average loss of epoch 7557 : 2.0278857
average loss of epoch 7558 : 2.0278747
average loss of epoch 7559 : 2.0278637
average loss of epoch 7560 : 2.027852
average loss of epoch 7561 : 2.027841
average loss of epoch 7562 : 2.02783
average loss of epoch 7563 : 2.0278187
average loss of epoch 7564 : 2.0

average loss of epoch 7750 : 2.0257738
average loss of epoch 7751 : 2.0257628
average loss of epoch 7752 : 2.0257523
average loss of epoch 7753 : 2.0257416
average loss of epoch 7754 : 2.0257308
average loss of epoch 7755 : 2.02572
average loss of epoch 7756 : 2.0257094
average loss of epoch 7757 : 2.0256987
average loss of epoch 7758 : 2.025688
average loss of epoch 7759 : 2.0256772
average loss of epoch 7760 : 2.0256667
average loss of epoch 7761 : 2.0256557
average loss of epoch 7762 : 2.0256453
average loss of epoch 7763 : 2.0256345
average loss of epoch 7764 : 2.025624
average loss of epoch 7765 : 2.025613
average loss of epoch 7766 : 2.0256026
average loss of epoch 7767 : 2.025592
average loss of epoch 7768 : 2.0255811
average loss of epoch 7769 : 2.0255706
average loss of epoch 7770 : 2.0255601
average loss of epoch 7771 : 2.0255492
average loss of epoch 7772 : 2.0255384
average loss of epoch 7773 : 2.0255277
average loss of epoch 7774 : 2.0255172
average loss of epoch 7775 : 2.

average loss of epoch 7965 : 2.0235243
average loss of epoch 7966 : 2.023514
average loss of epoch 7967 : 2.023504
average loss of epoch 7968 : 2.0234938
average loss of epoch 7969 : 2.0234835
average loss of epoch 7970 : 2.0234733
average loss of epoch 7971 : 2.0234632
average loss of epoch 7972 : 2.023453
average loss of epoch 7973 : 2.0234425
average loss of epoch 7974 : 2.0234325
average loss of epoch 7975 : 2.0234225
average loss of epoch 7976 : 2.0234122
average loss of epoch 7977 : 2.0234017
average loss of epoch 7978 : 2.0233917
average loss of epoch 7979 : 2.0233815
average loss of epoch 7980 : 2.0233712
average loss of epoch 7981 : 2.0233612
average loss of epoch 7982 : 2.0233512
average loss of epoch 7983 : 2.0233407
average loss of epoch 7984 : 2.0233307
average loss of epoch 7985 : 2.0233202
average loss of epoch 7986 : 2.0233102
average loss of epoch 7987 : 2.0233
average loss of epoch 7988 : 2.0232897
average loss of epoch 7989 : 2.0232797
average loss of epoch 7990 : 2.

average loss of epoch 8178 : 2.0213943
average loss of epoch 8179 : 2.0213845
average loss of epoch 8180 : 2.0213745
average loss of epoch 8181 : 2.0213652
average loss of epoch 8182 : 2.0213552
average loss of epoch 8183 : 2.0213454
average loss of epoch 8184 : 2.0213356
average loss of epoch 8185 : 2.0213258
average loss of epoch 8186 : 2.021316
average loss of epoch 8187 : 2.0213063
average loss of epoch 8188 : 2.0212965
average loss of epoch 8189 : 2.0212865
average loss of epoch 8190 : 2.0212772
average loss of epoch 8191 : 2.0212674
average loss of epoch 8192 : 2.0212574
average loss of epoch 8193 : 2.0212476
average loss of epoch 8194 : 2.021238
average loss of epoch 8195 : 2.0212283
average loss of epoch 8196 : 2.0212183
average loss of epoch 8197 : 2.0212088
average loss of epoch 8198 : 2.021199
average loss of epoch 8199 : 2.0211892
average loss of epoch 8200 : 2.0211797
average loss of epoch 8201 : 2.02117
average loss of epoch 8202 : 2.0211601
average loss of epoch 8203 : 2

average loss of epoch 8395 : 2.0193183
average loss of epoch 8396 : 2.019309
average loss of epoch 8397 : 2.0192995
average loss of epoch 8398 : 2.0192902
average loss of epoch 8399 : 2.0192811
average loss of epoch 8400 : 2.0192716
average loss of epoch 8401 : 2.0192623
average loss of epoch 8402 : 2.0192528
average loss of epoch 8403 : 2.0192435
average loss of epoch 8404 : 2.0192342
average loss of epoch 8405 : 2.019225
average loss of epoch 8406 : 2.0192156
average loss of epoch 8407 : 2.019206
average loss of epoch 8408 : 2.019197
average loss of epoch 8409 : 2.0191875
average loss of epoch 8410 : 2.0191782
average loss of epoch 8411 : 2.019169
average loss of epoch 8412 : 2.0191596
average loss of epoch 8413 : 2.0191505
average loss of epoch 8414 : 2.019141
average loss of epoch 8415 : 2.0191314
average loss of epoch 8416 : 2.0191224
average loss of epoch 8417 : 2.019113
average loss of epoch 8418 : 2.0191038
average loss of epoch 8419 : 2.0190945
average loss of epoch 8420 : 2.0

average loss of epoch 8607 : 2.0173788
average loss of epoch 8608 : 2.01737
average loss of epoch 8609 : 2.0173612
average loss of epoch 8610 : 2.0173519
average loss of epoch 8611 : 2.017343
average loss of epoch 8612 : 2.0173342
average loss of epoch 8613 : 2.0173252
average loss of epoch 8614 : 2.0173163
average loss of epoch 8615 : 2.0173075
average loss of epoch 8616 : 2.0172985
average loss of epoch 8617 : 2.0172896
average loss of epoch 8618 : 2.0172806
average loss of epoch 8619 : 2.0172715
average loss of epoch 8620 : 2.0172627
average loss of epoch 8621 : 2.017254
average loss of epoch 8622 : 2.017245
average loss of epoch 8623 : 2.017236
average loss of epoch 8624 : 2.017227
average loss of epoch 8625 : 2.0172184
average loss of epoch 8626 : 2.017209
average loss of epoch 8627 : 2.0172005
average loss of epoch 8628 : 2.0171916
average loss of epoch 8629 : 2.0171824
average loss of epoch 8630 : 2.0171735
average loss of epoch 8631 : 2.017165
average loss of epoch 8632 : 2.017

average loss of epoch 8822 : 2.0155015
average loss of epoch 8823 : 2.0154932
average loss of epoch 8824 : 2.0154846
average loss of epoch 8825 : 2.015476
average loss of epoch 8826 : 2.0154676
average loss of epoch 8827 : 2.015459
average loss of epoch 8828 : 2.0154505
average loss of epoch 8829 : 2.0154421
average loss of epoch 8830 : 2.0154335
average loss of epoch 8831 : 2.015425
average loss of epoch 8832 : 2.0154166
average loss of epoch 8833 : 2.015408
average loss of epoch 8834 : 2.0153995
average loss of epoch 8835 : 2.0153909
average loss of epoch 8836 : 2.0153828
average loss of epoch 8837 : 2.0153742
average loss of epoch 8838 : 2.0153656
average loss of epoch 8839 : 2.015357
average loss of epoch 8840 : 2.0153487
average loss of epoch 8841 : 2.01534
average loss of epoch 8842 : 2.0153317
average loss of epoch 8843 : 2.0153232
average loss of epoch 8844 : 2.0153146
average loss of epoch 8845 : 2.0153062
average loss of epoch 8846 : 2.0152977
average loss of epoch 8847 : 2.0

average loss of epoch 9034 : 2.013739
average loss of epoch 9035 : 2.013731
average loss of epoch 9036 : 2.0137231
average loss of epoch 9037 : 2.0137148
average loss of epoch 9038 : 2.0137067
average loss of epoch 9039 : 2.0136988
average loss of epoch 9040 : 2.0136905
average loss of epoch 9041 : 2.0136824
average loss of epoch 9042 : 2.0136743
average loss of epoch 9043 : 2.0136664
average loss of epoch 9044 : 2.0136583
average loss of epoch 9045 : 2.01365
average loss of epoch 9046 : 2.013642
average loss of epoch 9047 : 2.013634
average loss of epoch 9048 : 2.0136256
average loss of epoch 9049 : 2.0136178
average loss of epoch 9050 : 2.0136096
average loss of epoch 9051 : 2.0136015
average loss of epoch 9052 : 2.013594
average loss of epoch 9053 : 2.0135856
average loss of epoch 9054 : 2.0135775
average loss of epoch 9055 : 2.0135694
average loss of epoch 9056 : 2.0135612
average loss of epoch 9057 : 2.0135531
average loss of epoch 9058 : 2.013545
average loss of epoch 9059 : 2.01

average loss of epoch 9248 : 2.0120466
average loss of epoch 9249 : 2.0120387
average loss of epoch 9250 : 2.0120313
average loss of epoch 9251 : 2.0120237
average loss of epoch 9252 : 2.0120158
average loss of epoch 9253 : 2.0120082
average loss of epoch 9254 : 2.0120006
average loss of epoch 9255 : 2.011993
average loss of epoch 9256 : 2.011985
average loss of epoch 9257 : 2.0119772
average loss of epoch 9258 : 2.0119698
average loss of epoch 9259 : 2.011962
average loss of epoch 9260 : 2.0119543
average loss of epoch 9261 : 2.0119467
average loss of epoch 9262 : 2.011939
average loss of epoch 9263 : 2.0119314
average loss of epoch 9264 : 2.0119238
average loss of epoch 9265 : 2.0119157
average loss of epoch 9266 : 2.0119083
average loss of epoch 9267 : 2.0119004
average loss of epoch 9268 : 2.0118928
average loss of epoch 9269 : 2.0118852
average loss of epoch 9270 : 2.0118775
average loss of epoch 9271 : 2.01187
average loss of epoch 9272 : 2.011862
average loss of epoch 9273 : 2.0

average loss of epoch 9460 : 2.0104501
average loss of epoch 9461 : 2.0104427
average loss of epoch 9462 : 2.0104353
average loss of epoch 9463 : 2.0104282
average loss of epoch 9464 : 2.0104208
average loss of epoch 9465 : 2.0104134
average loss of epoch 9466 : 2.010406
average loss of epoch 9467 : 2.0103989
average loss of epoch 9468 : 2.0103915
average loss of epoch 9469 : 2.0103843
average loss of epoch 9470 : 2.0103767
average loss of epoch 9471 : 2.0103693
average loss of epoch 9472 : 2.0103624
average loss of epoch 9473 : 2.0103548
average loss of epoch 9474 : 2.0103476
average loss of epoch 9475 : 2.01034
average loss of epoch 9476 : 2.0103328
average loss of epoch 9477 : 2.0103254
average loss of epoch 9478 : 2.010318
average loss of epoch 9479 : 2.0103106
average loss of epoch 9480 : 2.0103035
average loss of epoch 9481 : 2.010296
average loss of epoch 9482 : 2.0102887
average loss of epoch 9483 : 2.0102813
average loss of epoch 9484 : 2.0102744
average loss of epoch 9485 : 2

average loss of epoch 9672 : 2.008926
average loss of epoch 9673 : 2.008919
average loss of epoch 9674 : 2.0089118
average loss of epoch 9675 : 2.008905
average loss of epoch 9676 : 2.0088978
average loss of epoch 9677 : 2.0088906
average loss of epoch 9678 : 2.0088837
average loss of epoch 9679 : 2.0088766
average loss of epoch 9680 : 2.0088696
average loss of epoch 9681 : 2.0088627
average loss of epoch 9682 : 2.0088556
average loss of epoch 9683 : 2.0088487
average loss of epoch 9684 : 2.0088418
average loss of epoch 9685 : 2.0088346
average loss of epoch 9686 : 2.0088277
average loss of epoch 9687 : 2.0088205
average loss of epoch 9688 : 2.0088136
average loss of epoch 9689 : 2.0088065
average loss of epoch 9690 : 2.0087996
average loss of epoch 9691 : 2.0087924
average loss of epoch 9692 : 2.0087855
average loss of epoch 9693 : 2.0087788
average loss of epoch 9694 : 2.0087717
average loss of epoch 9695 : 2.0087645
average loss of epoch 9696 : 2.0087576
average loss of epoch 9697 :

average loss of epoch 9889 : 2.0074317
average loss of epoch 9890 : 2.007425
average loss of epoch 9891 : 2.0074184
average loss of epoch 9892 : 2.0074115
average loss of epoch 9893 : 2.0074048
average loss of epoch 9894 : 2.0073981
average loss of epoch 9895 : 2.0073912
average loss of epoch 9896 : 2.0073845
average loss of epoch 9897 : 2.007378
average loss of epoch 9898 : 2.0073712
average loss of epoch 9899 : 2.0073643
average loss of epoch 9900 : 2.0073576
average loss of epoch 9901 : 2.0073507
average loss of epoch 9902 : 2.007344
average loss of epoch 9903 : 2.0073376
average loss of epoch 9904 : 2.0073307
average loss of epoch 9905 : 2.007324
average loss of epoch 9906 : 2.0073173
average loss of epoch 9907 : 2.0073106
average loss of epoch 9908 : 2.0073037
average loss of epoch 9909 : 2.007297
average loss of epoch 9910 : 2.0072901
average loss of epoch 9911 : 2.0072837
average loss of epoch 9912 : 2.007277
average loss of epoch 9913 : 2.0072703
average loss of epoch 9914 : 2.

In [39]:
word_embedding = model[0].weight.data

In [40]:
def similarity(word1, word2):
    return cosine_similarity(word_embedding[word_to_ix[word1]], word_embedding[word_to_ix[word2]], dim=0).numpy()

In [41]:
def similarity_topn(word, n):
    words = list(vocab)
    words.sort(key=lambda w: similarity(w, word), reverse=True)
    return words[0:n]

In [44]:
similarity_topn("We", 10)

['We',
 'is',
 'directed',
 'by',
 'process',
 'a',
 'are',
 'process.',
 'inhabit',
 'pattern']

## Skipgram with hierarchical softmax