In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
from itertools import chain

In [2]:
class AttnEncoder(nn.Module):
    def __init__(self, loc_num, embedding_dim, hidden_dim, n_layers=2):
        super(AttnEncoder, self).__init__()
        self.embedding = nn.Embedding(loc_num, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True)
    
    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.gru(embedded, None)
        return output, hidden

In [3]:
class AttnDecoder(nn.Module):
    def __init__(self, loc_num, embedding_dim, hidden_dim, length, n_layers=2):
        super(AttnDecoder, self).__init__()
        self.embedding = nn.Embedding(loc_num, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True)
        self.attn = nn.Linear(hidden_dim, hidden_dim)
        self.out = nn.Linear(hidden_dim * 2, loc_num - 2)
        
    def forward(self, encoder_outputs, x, hidden):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded, hidden)
        weights = self.score(encoder_outputs, output)
        context = torch.bmm(weights, encoder_outputs) # B, de_s, hidden_dim
        return self.out(torch.cat([context, output], dim=2)), weights
        
    def score(self, encoder_h, decoder_h):
        return nn.functional.softmax(torch.bmm(decoder_h, encoder_h.permute(0, 2, 1)), dim=2) # B, de_s, en_s

In [4]:
LOC_NUM = 1443
EMBEDDING_DIM = 64
HIDDEN_DIM = 256
N_LAYERS = 2

encoder = AttnEncoder(LOC_NUM, EMBEDDING_DIM, HIDDEN_DIM, N_LAYERS).cuda()
decoder = AttnDecoder(LOC_NUM, EMBEDDING_DIM, HIDDEN_DIM, 54).cuda()

In [5]:
optimizer = torch.optim.SGD(chain(encoder.parameters(), decoder.parameters()), lr=1e-2, momentum=0.9)
optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2, 4, 6, 8], gamma=0.1)

In [6]:
criteria = nn.CrossEntropyLoss(ignore_index=-1)

In [None]:
data = np.concatenate([np.genfromtxt('../data/dis_forautoencoder_2012_dec_tokyo/day_{}.csv'.format(d), delimiter=',', dtype=np.int32)
                       for d in range(1, 32)], axis=0)
data = data[:, 1:]
pivot = 90
data_in = data[:, :pivot]
data_out = data[:, pivot:]
data_size = data.shape[0]

In [None]:
batch_size = 64

for epoch in range(1, 11):
    avg_loss = 0.0
    cnt = 0
    np.random.shuffle(data)
    optim_scheduler.step()
    for i in range(0, data_size - batch_size, batch_size):
        cnt += 1
        x_batch = data_in[i: i + batch_size]
        y_batch = data_out[i: i + batch_size]
        tf_batch = np.ones([batch_size, 54], dtype=np.int32)
        tf_batch[:, 1:] = y_batch[:, :-1]

        x_batch = Variable(torch.LongTensor(x_batch)).cuda()
        y_batch = Variable(torch.LongTensor(y_batch)).cuda()
        tf_batch = Variable(torch.LongTensor(tf_batch)).cuda()

        encoder_outputs, hidden = encoder(x_batch)
        pred, _ = decoder(encoder_outputs, tf_batch, hidden)
        loss = criteria(pred.view(-1, LOC_NUM - 2), torch.clamp(y_batch - 2, min=-1).view(-1))

        optimizer.zero_grad()
        loss.backward()
        avg_loss += float(loss.data[0])
        optimizer.step()
        print('Epoch {:04d}, {:.1f}%, avg_loss={:.4f}'.format(epoch, i * 100 / data_size, avg_loss / cnt), end='\r')
    print('')
    torch.save(encoder, '../results/sadAttenSeq2Seq/attn_encoder')
    torch.save(decoder, '../results/sadAttenSeq2Seq/attn_decoder')

Epoch 0001, 100.0%, avg_loss=2.6157


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch 0002, 100.0%, avg_loss=1.8613
Epoch 0003, 100.0%, avg_loss=1.7548
Epoch 0004, 93.7%, avg_loss=1.7433