In [279]:
import json
import torch
from torch.autograd import Variable
from torch.nn.functional import log_softmax
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam
import numpy as np


if torch.cuda.is_available():  
    dev = "cuda:0" 
else:  
    dev = "cpu"
    
device = torch.device('cpu')  


def read_file(file: str):
    with open(file, 'r') as f:
        file = json.loads(f.read())
    return file


def apply_importance_weighting(vocab_freq):
    cume = sum([i[1]**(3/4) for _, i in vocab_freq.items()])
    
    for key, vals in vocab_freq.items():
        vals = vals[0], vals[1]**(3/4) / cume
        vocab_freq[key] = vals
    return vocab_freq

In [280]:
vocab = read_file('../data/vocab.json')
vocab_freq = apply_importance_weighting(read_file('../data/vocab_freq.json'))
train = read_file('../data/train.json')
val = read_file('../data/val.json')
inv_vocab = {value: key for key, value in vocab.items()}
VOCAB_SIZE = len(vocab)


def generate_sample_words(size):
    words = np.random.choice(range(0, len(vocab)), size=size, p=[i[1] for _, i in vocab_freq.items()])
    return torch.from_numpy(words).to(device)

In [281]:
class SamplesDataset(torch.utils.data.Dataset):
    
    def __init__(self, ds):
        self.ds = ds
    
    def __len__(self):
        return len(self.ds)
    
    def __getitem__(self, idx):
        sample = self.ds[idx]
        target = vocab[sample['word']]       
        context = [vocab[word] for word in sample['context']]

        target = torch.tensor([target], dtype=torch.long, device=device)
        context = torch.tensor(context, dtype=torch.long, device=device)
        
        return target, context

        
def one_hot_id_tens(word_idx):
    x = torch.zeros(len(vocab), dtype=torch.float, device=device)
    x[word_idx] = 1.0
    return x

In [393]:
EMB_SIZE: int = 25
train_ds = SamplesDataset(train)
    
target_embs = Variable(torch.randn(VOCAB_SIZE, EMB_SIZE, dtype=torch.float), requires_grad=True).to(device)
context_embs = Variable(torch.randn(EMB_SIZE, VOCAB_SIZE, dtype=torch.float), requires_grad=True).to(device)
loss_fn = BCEWithLogitsLoss()

learning_rate = 1


for epoch in range(0, 3):
    print(f'EPOCH: {epoch}')
    loss = 0
    _cum_loss = 0
    for t, data in enumerate(train_ds):
        target, context = data

        target_oh = one_hot_id_tens(target)
        ctx_oh = one_hot_id_tens(context)

        #define network forward in the training loop
        l0_true = torch.matmul(target_oh, target_embs)  # embedding lookup
        l1_true = torch.matmul(l0_true, context_embs)  # pass through first layer

        neg_sample_ctx = generate_sample_words((5)).to(device)

        outputs = l1_true[torch.cat([context, neg_sample_ctx])]
        targets = torch.cat([torch.ones(4, device=device), torch.zeros(5, device=device)])

        loss = loss_fn(outputs, targets)
        _cum_loss += loss

        loss.backward()

        target_embs.data -= learning_rate * target_embs.grad.data
        context_embs.data -= learning_rate * context_embs.grad.data

        target_embs.grad.data.zero_()
        context_embs.grad.data.zero_()

        if t % 100 == 0:
            mean_loss = _cum_loss / t
            print(f'Iter: {t}, Loss: {mean_loss}', end='\r')


    
    
    


EPOCH: 0
EPOCH: 16200, Loss: 0.6551833152770996
EPOCH: 26200, Loss: 0.5983014702796936
Iter: 676200, Loss: 0.5889051556587219

In [394]:
deet = target_embs.data

In [414]:
from torch.nn import CosineSimilarity
cos = CosineSimilarity(dim=-1, eps=1e-6)
w_id= 4

with torch.no_grad():
    sims = cos(target_embs[w_id], target_embs)
    ms = torch.topk(sims, 10)


In [415]:
ms

torch.return_types.topk(
values=tensor([1.0000, 0.7292, 0.7183, 0.7156, 0.7007, 0.6981, 0.6926, 0.6809, 0.6765,
        0.6724]),
indices=tensor([   4,  828, 5845, 1295, 2726, 1506, 1296, 2038,  169,  551]))

In [423]:
inv_vocab[2726]

'come!'

In [419]:
vocab['god']

4

In [178]:
 loss_fn(outputs, targets)

tensor(0.7933, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)