In [122]:
import os
import torch
from torch.autograd import Variable
import torch.nn.functional as F
from torch import LongTensor as LT
from torch import FloatTensor as FT

In [140]:
dpath = 'ijcnlp_dailydialog/'
STOKEN = '__eou__'
def load_data(dpath, mode):
    assert mode == 'train' or mode == 'test' or mode == 'validation'
    dial_f = os.path.join(dpath, '{}/dialogues_{}.txt'.format(mode, mode))
    act_f = os.path.join(dpath, '{}/dialogues_act_{}.txt'.format(mode, mode))
    emo_f = os.path.join(dpath, '{}/dialogues_emotion_{}.txt'.format(mode, mode))
    dlg_data, act_data, emo_data = [], [], []
    with open(dial_f, 'r') as f:
        lines = f.readlines()
        for l in lines:
            turns = [t.strip().split(' ') for t in l.split(STOKEN)]
            if turns[-1] == ['']:
                turns = turns[:-1]
            dlg_data.append(turns)
    with open(act_f, 'r') as f:
        lines = f.readlines()
        for l in lines:
            acts = [int(d) - 1 for d in l.strip().split(' ')] # -1 for range 0 - 3
            act_data.append(acts)
    with open(emo_f, 'r') as f:
        lines = f.readlines()
        for l in lines:
            emos = [int(d) for d in l.strip().split(' ')]
            emo_data.append(emos)
    return dlg_data, act_data, emo_data
        
train_data, train_act_data, train_emo_data = load_data(dpath, 'train')
test_data, test_act_data, test_emo_data  = load_data(dpath, 'test')
val_data, val_act_data, val_emo_data  = load_data(dpath, 'validation')
data = train_data + test_data + val_data

In [141]:
vocab = ['_UNK_'] + sorted(set(w for d in data for s in d for w in s))
print(len(vocab))
w2i = {w:i for i, w in enumerate(vocab)}
i2w = {i:w for w, i in enumerate(w2i)}

26987


In [142]:
def to_var(x, var_type=None):
    if var_type is not None:
        x = var_type(x)
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)


def pad(vec, sent_len, pad_token=0):
    pad_len = max(0, sent_len - len(vec))
    vec += [pad_token] * pad_len
    vec = vec[:sent_len]
    return vec

def vectorize(d, a, e):
    ret_data = []
    sent_max_len = max([len(s) for s in d])
    hist_max_len = len(d)
    print('sent_max_len', sent_max_len)
    print('hist_max_len', hist_max_len)
    for i, u in enumerate(d):
        u_v = pad([w2i[w] for w in u], sent_max_len)
        x_v = [u_v]
        for h in d[:i][::-1]: # reverse order
            x_v.append(pad([w2i[w] for w in h], sent_max_len))
        while len(x_v) < hist_max_len: # history padding
            x_v.append(pad([], sent_max_len))
        x_v = to_var(x_v, LT)
#         x_v = torch.stack(x_v, 0)
        ret_data.append((x_v, to_var([a[i]], LT), to_var([e[i]], FT)))
    return ret_data

i = 0
batch = vectorize(train_data[i], train_act_data[i], train_emo_data[i])

sent_max_len 27
hist_max_len 10


RuntimeError: cuda runtime error (59) : device-side assert triggered at /home/jonki/work/pytorch/aten/src/THC/generic/THCTensorCopy.c:20

In [143]:
import torch.nn as nn
import torch
import torch.nn.functional as F


class WordEmbedding(nn.Module):
    '''
    In : (N, sentence_len)
    Out: (N, sentence_len, embd_size)
    '''
    def __init__(self, vocab_size, embd_size, pre_embd_w=None, is_train_embd=False):
        super(WordEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embd_size)
        if pre_embd_w is not None:
            print('pre embedding weight is set')
            self.embedding.weight = nn.Parameter(pre_embd_w, requires_grad=is_train_embd)

    def forward(self, x):
        return self.embedding(x)


class SimpleLSTM(nn.Module):
    def __init__(self, vocab_size, embd_size, hidden_size, class_size, pre_embd_w=None):
        super(SimpleLSTM, self).__init__()
        self.embd_size = embd_size
        self.hidden_size = hidden_size
        self.embedding = WordEmbedding(vocab_size, embd_size, pre_embd_w)
        self.rnn = nn.GRU(embd_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, class_size)

    def forward(self, x):
        '''
        x: (bs, hist_len+1, sent_len)
        '''
        bs = x.size(0)

        x = self.embedding(x.view(bs, -1)) # (bs, -1, E)
        x, _ = self.rnn(x) # (bs, -1, H)
        x = torch.sum(x, 1) # (bs, H)
        print('x', x.size())
        y = self.fc(F.tanh(x.view(bs, -1))) # (bs, class_size)
        y = F.log_softmax(y, -1) # (bs, class_size)
        return y

In [145]:
embd_size = 128
hidden_size = 128
class_size = 5
model = SimpleLSTM(len(w2i), embd_size, hidden_size, class_size)
optimizer = torch.optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()))
if torch.cuda.is_available():
    model.cuda()

loss_fn = F.nll_loss
for d, a, e in zip(train_data, train_act_data, train_emo_data):
    batch = vectorize(d, a, e)
    x = torch.stack([turn[0] for turn in batch], 0)
    act_labels = torch.stack([turn[1] for turn in batch], 0).squeeze(1)
    print(act_labels)
    preds = model(x)
    loss = loss_fn(preds, act_labels)
    print(loss.data[0])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

RuntimeError: cuda runtime error (59) : device-side assert triggered at /home/jonki/work/pytorch/aten/src/THC/generic/THCTensorCopy.c:20

In [91]:
model.cuda()

SimpleLSTM(
  (embedding): WordEmbedding(
    (embedding): Embedding(26987, 128)
  )
  (rnn): GRU(128, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=4, bias=True)
)

In [92]:
model

SimpleLSTM(
  (embedding): WordEmbedding(
    (embedding): Embedding(26987, 128)
  )
  (rnn): GRU(128, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=4, bias=True)
)