In [155]:
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

In [51]:
seq_len = 21
# vocab_size = 1000
embd_size = 200
kernel = (5, embd_size)
out_chs = 64
batch_size = 11
ans_size = 100

In [76]:
def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)

In [145]:
def read_words(fpath, seq_len, filter_h):
    words = []
    with open(fpath, 'r') as f:
        lines = f.readlines()
        for line in lines[:5000]:
            tokens = line.split()
            # TODO only choose specified length sentence
            if len(tokens) == seq_len - 2:
                words.extend((['<pad>']*int(filter_h/2)) + ['<s>'] + tokens + ['</s>'])

    return words

words = read_words('./data/news.en-00001-of-00100', seq_len, kernel[0])
# words[:50]
vocab = []
for w in words:
    if w not in vocab:
        vocab.append(w)
vocab_size = len(vocab)
w2i = {'<unk>': 0}
w2i = dict((w, i) for i, w in enumerate(vocab, 1))
print('vocab_size', len(vocab))
data = [w2i[w] for w in words]

vocab_size 1504


In [149]:
def create_batches(data, batch_size, seq_len):
#     num_batches = int(len(data) / (batch_size*seq_len))
    ret_data = []
    X, Y = [], []
    for i in range(0, len(data)-(seq_len+1), seq_len):
        X.append(data[i:i+seq_len])
        Y.append(data[i+seq_len])
    for i in range(0, len(X)-batch_size, batch_size):
        ret_data.append((X[i:i+batch_size], Y[i:i+batch_size]))
    return ret_data
training_data = create_batches(data, batch_size, seq_len)

In [154]:
len(training_data[0])
training_data[0][1]

[20, 37, 50, 13, 73, 87, 50, 51, 48, 131, 1]

In [162]:
# In : (N, sentence_len)
# Out: (N, sentence_len, embd_size)
class GatedCNN(nn.Module):
    def __init__(self, seq_len, vocab_size, embd_size, kernel, out_chs, ans_size):
        super(GatedCNN, self).__init__()
        self.embd_size = embd_size
        self.embedding = nn.Embedding(vocab_size, embd_size)
        # nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, ...
        self.conv      = nn.Conv2d(1, out_chs, kernel, padding=(2, 0)) # )2, 99
        self.conv_gate = nn.Conv2d(1, out_chs, kernel, padding=(2, 0)) # )2, 99
        # todo bias
        
        self.conv2      = nn.Conv2d(out_chs, out_chs, (kernel[0], 1), padding=(2, 0)) # )2, 99
        self.conv_gate2 = nn.Conv2d(out_chs, out_chs, (kernel[0], 1), padding=(2, 0)) # )2, 99
        
        self.fc = nn.Linear(out_chs*seq_len, ans_size)

    def forward(self, x):
        # x: (N, seq_len)
        # Embedding
        bs = x.size(0) # batch size
        seq_len = x.size(1) # number of words in a sentence
        x = self.embedding(x) # (bs, word_len, embd_size)

        # CNN
        x = x.unsqueeze(1) # (bs, Cin, seq_len, embd_size), insert Channnel-In dim
        # Conv2d
        #    Input : (bs, Cin, Hin, Win )
        #    Output: (bs, Cout,Hout,Wout) 
        A = self.conv(x) # (bs, Cout, seq_len, 1?)
        B = self.conv_gate(x) # (bs, Cout, seq_len, 1?)
        h0 = A * F.sigmoid(B) # (bs, Cout, seq_len, 1?)
        
        A2 = self.conv2(h0)
        B2 = self.conv_gate2(h0)
        h1 = A2 * F.sigmoid(B2) # (bs, Cout, seq_len, 1?)
        
        # todo residual
        
        hL = h1 # (bs, Cout, seq_len, 1?)
        hL = hL.view(bs, -1) # (bs, Cout*seq_len)
        out = self.fc(hL) # (bs, ans_size)
        
        out = F.log_softmax(out)
        return out

model = GatedCNN(seq_len, vocab_size, embd_size, kernel, out_chs, vocab_size)
if torch.cuda.is_available():
    model.cuda()

In [163]:
def train(model, data, optimizer, loss_fn, n_epoch=10):
    for epoch in range(n_epoch):
        print('epoch', epoch)
        random.shuffle(data)
        for batch_ct, (X, Y) in enumerate(data):
            X = to_var(torch.LongTensor(X)) # (bs, seq_len)
            Y = to_var(torch.LongTensor(Y)) # (bs,)
            pred = model(X) # (bs, ans_size)
            loss = loss_fn(pred, Y)
            if batch_ct % 10 == 0:
                print('loss: {:.4f}'.format(loss.data[0]))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

optimizer = torch.optim.Adadelta(model.parameters())
loss_fn = nn.NLLLoss()
train(model, training_data, optimizer, loss_fn)

epoch 0
loss: 7.3353
loss: 5.5012
epoch 1
loss: 3.4595
loss: 4.3060
epoch 2
loss: 1.9070
loss: 0.2825
epoch 3
loss: 0.0219
loss: 0.0005
epoch 4
loss: 0.0012
loss: 0.0029
epoch 5
loss: 0.0014
loss: 0.0013
epoch 6
loss: 0.0009
loss: 0.0016
epoch 7
loss: 0.0005
loss: 0.0005
epoch 8
loss: 0.0005
loss: 0.0003
epoch 9
loss: 0.0004
loss: 0.0002
