In [2]:
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

In [51]:
seq_len = 21
# vocab_size = 1000
embd_size = 200
kernel = (5, embd_size)
out_chs = 64
batch_size = 11
ans_size = 100

In [76]:
def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)

In [98]:
def read_words(fpath, seq_len, filter_h):
    words = []
    with open(fpath, 'r') as f:
        lines = f.readlines()
        for line in lines[:5000]:
            tokens = line.split()
            # TODO only choose specified length sentence
            if len(tokens) == seq_len - 2:
                words.extend((['<pad>']*int(filter_h/2)) + ['<s>'] + tokens + ['</s>'])

    return words

words = read_words('./data/news.en-00001-of-00100', seq_len, kernel[0])
# words[:50]
vocab = []
for w in words:
    if w not in vocab:
        vocab.append(w)
vocab_size = len(vocab)
w2i = {'<unk>': 0}
w2i = dict((w, i) for i, w in enumerate(vocab, 1))
print('vocab_size', len(vocab))
data = [w2i[w] for w in words]

vocab_size 1504


In [104]:
def create_batches(data, batch_size, seq_len):
    num_batches = int(len(data) / (batch_size*seq_len))
    x_batches, y_batches = [], []
    X, Y = [], []
    for i in range(0, len(data)-(seq_len+1), seq_len):
        X.append(data[i:i+seq_len])
        Y.append(data[i+seq_len])
    for i in range(0, len(X)-batch_size, batch_size):
        x_batches.append(X[i:i+batch_size])
        y_batches.append(Y[i:i+batch_size])
    return x_batches, y_batches
x_batches, y_batches = create_batches(data, batch_size, seq_len)

In [116]:
# In : (N, sentence_len)
# Out: (N, sentence_len, embd_size)
class GatedCNN(nn.Module):
    def __init__(self, seq_len, vocab_size, embd_size, kernel, out_chs, ans_size):
        super(GatedCNN, self).__init__()
        self.embd_size = embd_size
        self.embedding = nn.Embedding(vocab_size, embd_size)
        # nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, ...
        self.conv      = nn.Conv2d(1, out_chs, kernel, padding=(2, 0)) # )2, 99
        self.conv_gate = nn.Conv2d(1, out_chs, kernel, padding=(2, 0)) # )2, 99
        # todo bias
        
        self.conv2      = nn.Conv2d(out_chs, out_chs, (kernel[0], 1), padding=(2, 0)) # )2, 99
        self.conv_gate2 = nn.Conv2d(out_chs, out_chs, (kernel[0], 1), padding=(2, 0)) # )2, 99
        
        self.fc = nn.Linear(out_chs*seq_len, ans_size)

    def forward(self, x):
        # x: (N, seq_len)
        # Embedding
        bs = x.size(0) # batch size
        seq_len = x.size(1) # number of words in a sentence
        x = self.embedding(x) # (bs, word_len, embd_size)

        # CNN
        x = x.unsqueeze(1) # (bs, Cin, seq_len, embd_size), insert Channnel-In dim
        # Conv2d
        #    Input : (bs, Cin, Hin, Win )
        #    Output: (bs, Cout,Hout,Wout) 
        A = self.conv(x) # (bs, Cout, seq_len, 1?)
        B = self.conv_gate(x) # (bs, Cout, seq_len, 1?)
        h0 = A * F.sigmoid(B) # (bs, Cout, seq_len, 1?)
        
        A2 = self.conv2(h0)
        B2 = self.conv_gate2(h0)
        h1 = A2 * F.sigmoid(B2) # (bs, Cout, seq_len, 1?)
        
        # todo residual
        
        hL = h1 # (bs, Cout, seq_len, 1?)
        hL = hL.view(bs, -1) # (bs, Cout*seq_len)
        out = self.fc(hL) # (bs, ans_size)
        
        out = F.log_softmax(out)
        return out

model = GatedCNN(seq_len, vocab_size, embd_size, kernel, out_chs, vocab_size)
if torch.cuda.is_available():
    model.cuda()

In [120]:
def train(model, x_batches, y_batches, optimizer, loss_fn, n_epoch=10):
    for i in range(n_epoch):
        for X, Y in zip(x_batches, y_batches):
            X = to_var(torch.LongTensor(X)) # (bs, seq_len)
            Y = to_var(torch.LongTensor(Y)) # (bs,)
            pred = model(X) # (bs, ans_size)
            loss = loss_fn(pred, Y)
            print(loss.data[0])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

optimizer = torch.optim.Adadelta(model.parameters())
loss_fn = nn.NLLLoss()
train(model, x_batches, y_batches, optimizer, loss_fn)

2.9580647945404053
4.094936847686768
3.82731556892395
3.6298820972442627
2.8523738384246826
2.9451749324798584
5.288064956665039
4.630376815795898
3.8837196826934814
4.588630199432373
3.4167747497558594
4.588595390319824
3.753164052963257
3.649923801422119
4.475085258483887
4.1205244064331055
0.298143595457077
1.0722720623016357
0.6575918793678284
0.34592583775520325
0.6243817806243896
0.25904762744903564
0.5418158769607544
0.7264187932014465
0.19047273695468903
0.5962465405464172
0.38761937618255615
0.7734092473983765
0.2996833622455597
0.2599756419658661
0.0948779359459877
0.4141019880771637
0.0033458361867815256
0.020814938470721245
0.005576220341026783
0.017768166959285736
0.012891336344182491
0.013592460192739964
0.009189345873892307
0.0055628689005970955
0.00047267565969377756
0.1872633546590805
0.0017988898325711489
0.01662115566432476
0.002414876827970147
0.0017759149195626378
0.003003467107191682
0.0009398894035257399
0.0006100047612562776
0.002473224187269807
0.00117293267976

In [88]:
x_batches[1]

[]