In [1]:
from encoder import Encoder
import torch
import numpy as np
import torch.nn as nn
from torch import optim

In [2]:
W2V_PATH = "/home/jingjing/Desktop/InferSent-master/dataset/GloVe/glove.840B.300d.txt"

In [4]:
f = Encoder()
f.set_w2v_path(W2V_PATH)

In [8]:
def make_target(context_size, dim):
    targets = np.zeros((dim, dim))
    ctxt_sent_pos = list(range(-context_size, context_size+1))
    ctxt_sent_pos.remove(0)
    for ctxt in ctxt_sent_pos:
        targets += np.eye(3, k=ctxt)
    targets_sum = np.sum(targets,axis=1, keepdims=True)
    targets = targets / targets_sum
    targets = torch.from_numpy(targets)
    return targets

In [9]:
def make_senmat(enc, sentences, bsize, tokenize, verbose):
    enc.build_vocab(sentences, True)
    embeddings = enc.encode(sentences, bsize, tokenize, verbose)
    scores = np.matmul(embeddings,np.transpose(embeddings))
    scores_sum = np.sum(scores, axis=1, keepdims=True)
    scores = scores/scores_sum
    scores = torch.from_numpy(scores)
    scores.requires_grad = True
    return scores

In [10]:
def xentropy_cost(pred, target):
    logged = torch.log(pred)
    a = target.float()*logged
    cost = -torch.sum(a)
    return cost

In [11]:
data = [['Memories of childhood are unforgettable.', 'I was four years old when my grandfather died.',
             'I clearly remember how everybody in the house was weeping.'], [' The Moon is filled wit craters.', 'It has no light of its own.', 'It gets its light from the Sun.']]

In [13]:
test = ['Doctors lead a hard life.', 'Their life is very busy.', 'They get up early in the morning and go to the hospital.']

In [14]:
with torch.no_grad():
    scores = make_senmat(f, test, 400, False, True)
    print(scores)
    targets = make_target(1, 3)
    print(targets)
    loss = xentropy_cost(scores, targets)
    print(loss)

Found 23(/23) words with w2v vectors
Vocab size : 23
Nb words kept : 25/28 (89.3%)
torch.Size([13, 3, 300])
Speed : 103.0 sentences/s (cpu mode, bsize=400)
tensor([[ 0.3544,  0.3280,  0.3176],
        [ 0.3219,  0.3569,  0.3212],
        [ 0.3197,  0.3294,  0.3508]])
tensor([[ 0.0000,  1.0000,  0.0000],
        [ 0.5000,  0.0000,  0.5000],
        [ 0.0000,  1.0000,  0.0000]], dtype=torch.float64)
tensor(3.3598)


In [19]:
optimizer = optim.Adam(f.parameters(), lr=0.1)

In [20]:
for epoch in range(20):
    for instance in data:
        f.zero_grad()
        scores = make_senmat(f, instance, 400, False, False)
        targets = make_target(1,3)
        loss = xentropy_cost(scores, targets)
        print(epoch, loss)
        loss.backward()
        optimizer.step()

Found 25(/25) words with w2v vectors
Vocab size : 25
torch.Size([11, 3, 300])
0 tensor(3.3736)
Found 20(/20) words with w2v vectors
Vocab size : 20
torch.Size([8, 3, 300])
0 tensor(3.3448)
Found 25(/25) words with w2v vectors
Vocab size : 25
torch.Size([11, 3, 300])
1 tensor(3.3736)
Found 20(/20) words with w2v vectors
Vocab size : 20
torch.Size([8, 3, 300])
1 tensor(3.3448)
Found 25(/25) words with w2v vectors
Vocab size : 25
torch.Size([11, 3, 300])
2 tensor(3.3736)
Found 20(/20) words with w2v vectors
Vocab size : 20
torch.Size([8, 3, 300])
2 tensor(3.3448)
Found 25(/25) words with w2v vectors
Vocab size : 25
torch.Size([11, 3, 300])
3 tensor(3.3736)
Found 20(/20) words with w2v vectors
Vocab size : 20
torch.Size([8, 3, 300])
3 tensor(3.3448)
Found 25(/25) words with w2v vectors
Vocab size : 25
torch.Size([11, 3, 300])
4 tensor(3.3736)
Found 20(/20) words with w2v vectors
Vocab size : 20
torch.Size([8, 3, 300])
4 tensor(3.3448)
Found 25(/25) words with w2v vectors
Vocab size : 25
to

KeyboardInterrupt: 

In [18]:
with torch.no_grad():
    scores = make_senmat(f, test, 400, False, True)
    print(scores)
    targets = make_target(1, 3)
    print(targets)
    loss = xentropy_cost(scores, targets)
    print(loss)

Found 23(/23) words with w2v vectors
Vocab size : 23
Nb words kept : 25/28 (89.3%)
torch.Size([13, 3, 300])
Speed : 90.1 sentences/s (cpu mode, bsize=400)
tensor([[ 0.3544,  0.3280,  0.3176],
        [ 0.3219,  0.3569,  0.3212],
        [ 0.3197,  0.3294,  0.3508]])
tensor([[ 0.0000,  1.0000,  0.0000],
        [ 0.5000,  0.0000,  0.5000],
        [ 0.0000,  1.0000,  0.0000]], dtype=torch.float64)
tensor(3.3598)
