In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
torch.manual_seed(1)

<torch._C.Generator at 0x1ce0f800130>

In [2]:
inp = 17
emb = 16
hid = 16

In [3]:
embeds = nn.Embedding(inp, emb)
lstm = nn.LSTM(emb, hid)
linear = nn.Linear(hid, inp)
dropout = nn.Dropout(0.1)
softmax = nn.LogSoftmax(dim=1)

In [4]:
class LSTMPredictor(nn.Module):
    def __init__(self, input_dim, embed_dim, hidden_dim):
        super(LSTMPredictor, self).__init__()
        self.hidden_dim = hidden_dim
        self.embeds = nn.Embedding(input_dim, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim)
        self.linear = nn.Linear(hidden_dim, input_dim)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)
        self.hidden = self.initHidden()

    def forward(self, input, hidden):
        embeds = self.embeds(input)
        lstm_out, hidden = self.lstm(embeds.view(len(input), 1, -1), hidden)
        output = self.linear(lstm_out.view(len(input), -1))
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return (autograd.Variable(torch.zeros(1, 1, self.hidden_dim)).cuda(),
                autograd.Variable(torch.zeros(1, 1, self.hidden_dim)).cuda())

In [38]:
class BDLSTMPredictor(nn.Module):
    def __init__(self, input_dim, embed_dim, hidden_dim):
        super(BDLSTMPredictor, self).__init__()
        self.hidden_dim = hidden_dim
        self.embeds = nn.Embedding(input_dim, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, bidirectional=True)
        self.linear = nn.Linear(hidden_dim * 2, input_dim)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)
        self.hidden = self.initHidden()

    def forward(self, input, hidden):
        embeds = self.embeds(input)
        lstm_out, hidden = self.lstm(embeds.view(len(input), 1, -1), hidden)
        cbow = self.make_cbow(lstm_out, hidden)
        output = self.linear(cbow)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden
    
    def make_cbow(self, lstm_out, hidden):
        lstm_out = lstm_out.squeeze()
        forward, reverse = torch.chunk(lstm_out,2,dim=1)
        output = []
        for n in range(1, len(forward)-1):
            tmp = torch.cat([forward[n-1,:], reverse[n+1,:]], dim=0)
            output.append(tmp)
        output = torch.stack(output, dim=0)
        return output
    
    def initHidden(self):
        return (autograd.Variable(torch.zeros(2, 1, self.hidden_dim)).cuda(),
                autograd.Variable(torch.zeros(2, 1, self.hidden_dim)).cuda())

In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inputs = [torch.tensor([1,2,1,2,1,3,0,4,0,5,1,3,0,4,0,5,1], device=device) for _ in range(100)]
targets = [torch.tensor([2,1,2,1,3,0,4,0,5,1,3,0,4,0,5,1,6], device=device) for _ in range(100)]

In [44]:

lstm = LSTMPredictor(inp, emb, hid).to(device)
bilstm = BDLSTMPredictor(inp, emb, hid).to(device)
opt_lstm = optim.SGD(lstm.parameters(), lr=0.01)
opt_bilstm = optim.SGD(bilstm.parameters(), lr=0.01)
criterion = nn.NLLLoss()

'''
for i, t in zip(inputs, targets):
    hidden = lstm.initHidden()
    out, hidden = lstm(i, hidden)
    loss = criterion(out, t)
    loss.backward()
    opt_lstm.step()
'''
for i, t in zip(inputs, targets):
    hidden = bilstm.initHidden()
    out, hidden = bilstm(i, hidden)
    t = t[1:-1]
    loss = criterion(out, t)
    loss.backward()
    opt_bilstm.step()
out.shape

torch.Size([15, 17])

In [45]:
bilstm.eval()
with torch.no_grad():
    hidden = bilstm.initHidden()
    out, hidden = bilstm(inputs[0], hidden)
    print(out.shape)
    #print(out)
    topv, topi = out.topk(1)
    for n,i in enumerate(topi):
        print(targets[0][n+1], i)

torch.Size([15, 17])
tensor(1, device='cuda:0') tensor([1], device='cuda:0')
tensor(2, device='cuda:0') tensor([2], device='cuda:0')
tensor(1, device='cuda:0') tensor([1], device='cuda:0')
tensor(3, device='cuda:0') tensor([3], device='cuda:0')
tensor(0, device='cuda:0') tensor([0], device='cuda:0')
tensor(4, device='cuda:0') tensor([4], device='cuda:0')
tensor(0, device='cuda:0') tensor([0], device='cuda:0')
tensor(5, device='cuda:0') tensor([5], device='cuda:0')
tensor(1, device='cuda:0') tensor([1], device='cuda:0')
tensor(3, device='cuda:0') tensor([3], device='cuda:0')
tensor(0, device='cuda:0') tensor([0], device='cuda:0')
tensor(4, device='cuda:0') tensor([4], device='cuda:0')
tensor(0, device='cuda:0') tensor([0], device='cuda:0')
tensor(5, device='cuda:0') tensor([5], device='cuda:0')
tensor(1, device='cuda:0') tensor([1], device='cuda:0')


In [46]:
lstm.eval()
with torch.no_grad():
    hidden = lstm.initHidden()
    out, hidden = lstm(inputs[0], hidden)
    print(out.shape)
    #print(out)
    topv, topi = out.topk(1)
    for n,i in enumerate(topi):
        print(targets[0][n], i)

torch.Size([17, 17])
tensor(2, device='cuda:0') tensor([16], device='cuda:0')
tensor(1, device='cuda:0') tensor([16], device='cuda:0')
tensor(2, device='cuda:0') tensor([16], device='cuda:0')
tensor(1, device='cuda:0') tensor([16], device='cuda:0')
tensor(3, device='cuda:0') tensor([16], device='cuda:0')
tensor(0, device='cuda:0') tensor([16], device='cuda:0')
tensor(4, device='cuda:0') tensor([16], device='cuda:0')
tensor(0, device='cuda:0') tensor([16], device='cuda:0')
tensor(5, device='cuda:0') tensor([5], device='cuda:0')
tensor(1, device='cuda:0') tensor([5], device='cuda:0')
tensor(3, device='cuda:0') tensor([16], device='cuda:0')
tensor(0, device='cuda:0') tensor([16], device='cuda:0')
tensor(4, device='cuda:0') tensor([16], device='cuda:0')
tensor(0, device='cuda:0') tensor([16], device='cuda:0')
tensor(5, device='cuda:0') tensor([5], device='cuda:0')
tensor(1, device='cuda:0') tensor([5], device='cuda:0')
tensor(6, device='cuda:0') tensor([16], device='cuda:0')


In [12]:
emba = nn.Embedding(inp, emb).to(device)
lma = nn.LSTM(emb, hid).to(device)
lina = nn.Linear(hid, inp).to(device)
dropa = nn.Dropout(0.1).to(device)
softa = nn.LogSoftmax(dim=1).to(device)
hida = (autograd.Variable(torch.zeros(1, 1, hid)).cuda(),
            autograd.Variable(torch.zeros(1, 1, hid)).cuda())

embb = nn.Embedding(inp, emb).to(device)
lmb = nn.LSTM(emb, hid, bidirectional=True).to(device)
linb = nn.Linear(hid * 2, inp).to(device)
dropb = nn.Dropout(0.1).to(device)
softb = nn.LogSoftmax(dim=1).to(device)
hidb = (autograd.Variable(torch.zeros(2, 1, hid)).cuda(),
            autograd.Variable(torch.zeros(2, 1, hid)).cuda())

In [None]:
a_out = emba(inputs[0])
lstm_out, hidden = lma(a_out.view(len(inputs[0]), 1, -1), hida)
print('lstm:',lstm_out.shape, hidden[0].shape)
a_out = lina(a_out.view(len(inputs[0]), -1))
print('linear:', a_out.shape)
a_out = dropa(a_out)
a_out = softa(a_out)

In [21]:
b_out = embb(inputs[0])
b_out, b_hidden = lmb(b_out.view(len(inputs[0]), 1, -1), hidb)
print('bilstm:', b_out.shape, b_hidden[0].shape)
print(b_out.view(len(inputs[0]), -1).shape)
#b_out = linb(b_out.view(len(inputs[0]), -1))
#print('linear:', b_out.shape)
#b_out = dropb(b_out)
#b_out = softb(b_out)

bilstm: torch.Size([17, 1, 32]) torch.Size([2, 1, 16])
torch.Size([17, 32])


In [37]:
lstm_out = b_out.squeeze()
#17,32
print('sq:', lstm_out.shape)
forward, reverse = torch.chunk(lstm_out,2,dim=1)
print('chunk:',forward.shape)
#output = torch.zeros(len(lstm_out)-2, hidden).to(device)
# 15,16
output = []
for n in range(1, len(forward)-1):

    tmp = torch.cat([forward[n-1,:], reverse[n+1,:]], dim=0)

    output.append(tmp)
output = torch.stack(output, dim=0)
print('out:',output.shape)
#out:17-2, 16


sq: torch.Size([17, 32])
chunk: torch.Size([17, 16])
out: torch.Size([15, 32])


SyntaxError: 'return' outside function (<ipython-input-37-db926d2b42dc>, line 17)