# >>> Seq2Seq on timeseries data <<<

### 0. packages

In [2]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import pandas as pd
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### 1. hyperparam

In [3]:
input_len = 20
target_len = 5
MAX_LENGTH = 20
train_ratio = 0.7

### 2. prepare train test data

In [4]:
stock = pd.read_excel('./0050.xlsx')

In [5]:
date2idx = {ele:idx for idx, ele in enumerate(list(stock['日期']))}

In [105]:
pretrain_embed = np.asarray([list(stock.iloc[i])[4:8] for i in range(len(stock))], dtype='float32')

In [106]:
# embedding with pretrain

stock_embedding = nn.Embedding(pretrain_embed.shape[0], pretrain_embed.shape[1])
stock_embedding.weight = nn.Parameter(torch.from_numpy(pretrain_embed))
stock_embedding.weight.requires_grad = False

In [8]:
# train test pair

tmp = list(date2idx.keys())
data_pair = [[tmp[i:i+input_len], tmp[i+input_len:i+input_len+target_len]]for i in range(len(tmp)-input_len-target_len)]
ts_split = int(len(data_pair)*train_ratio)
train_pair = data_pair[:ts_split]
test_pair = data_pair[ts_split:]

In [9]:
# change date to tensor for embedding lookup

def indexesFromSentence(lang, sentence):
    return [lang[word] for word in sentence]

def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(date2idx, pair[0])
    target_tensor = tensorFromSentence(date2idx, pair[1])
    return (input_tensor, target_tensor)

In [85]:
# utilities

import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [227]:
def normFloat(val):
    o = val
    a = "%.2f"%o

    '''
    0 1 2 3 4 5 6 7 8 9 0 
    0 0 0 5 5 5 5 5 0 0 0
    '''

    l = int(a[-1])

    if l == 0:
        o = round(float(a[:-1]), 1)
    elif l < 3:
        o = round(float(a[:-1]), 1)
    elif l >= 3 and l <= 7: 
        o = round(float(a[:-1]) + 0.05, 2)
    elif l > 7:  
        o = round(float(a[:-1]) + 0.05, 1)

    return o

### 3. seq2seq model

In [138]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(4, hidden_size)

    def forward(self, input, hidden):        
        embedded = stock_embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [136]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.gru = nn.GRU(hidden_size, hidden_size)
        
        self.linear1 = nn.Linear(hidden_size, 16)
        self.linear2 = nn.Linear(16, 1)

    def forward(self, input, hidden):  
        nxt_input, hidden = self.gru(input, hidden)
        output = self.linear1(nxt_input)
        output = self.linear2(output)
        
        return output, hidden, nxt_input

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [225]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]
    
    decoder_input = encoder_outputs[-1].view(1,1,-1)    
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden, decoder_input = decoder(
            decoder_input, decoder_hidden)
        
        y = stock_embedding(target_tensor[di])[:, -1].view(1, 1, -1)
                
        origin_output = decoder_output.detach().item()
        modify_output = normFloat(origin_output)
        modify_y = torch.tensor([y.detach().item() + (modify_output-origin_output)]).view(1, 1, -1)
        
        loss += criterion(decoder_output, modify_y)

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [151]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.0001):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(train_pair))
                      for i in range(n_iters)]
    criterion = nn.L1Loss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        
        print_loss_total += loss
        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

### 4. Evaluation

In [229]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(date2idx, sentence)
        
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_len):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = encoder_outputs[-1].view(1,1,-1)   
        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(target_len):
            decoder_output, decoder_hidden, decoder_input = decoder(
                decoder_input, decoder_hidden)
            
            decoded_words.append(normFloat(decoder_output.detach().item()))

        return decoded_words

In [169]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(test_pair)
        
        target_tensor = tensorFromSentence(date2idx, pair[1])
        ans = [stock_embedding(target_tensor[di])[:, -1].detach().item() for di in range(len(target_tensor))]
        
        print('>', pair[0])
        print('=', ans)
        output_words = evaluate(encoder, decoder, pair[0])
        print('<', output_words)
        print('')

In [244]:
evaluateRandomly(encoder1, decoder1, n=1)

> ['107/04/20', '107/04/23', '107/04/24', '107/04/25', '107/04/26', '107/04/27', '107/04/30', '107/05/02', '107/05/03', '107/05/04', '107/05/07', '107/05/08', '107/05/09', '107/05/10', '107/05/11', '107/05/14', '107/05/15', '107/05/16', '107/05/17', '107/05/18']
= [82.05000305175781, 81.75, 81.3499984741211, 81.55000305175781, 81.8499984741211]
< [64.85, 66.8, 66.85, 66.35, 65.8]



### 5. one click and GO

In [228]:
hidden_size = 256
encoder1 = EncoderRNN(hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size).to(device)

trainIters(encoder1, decoder1, 75000, print_every=100)

0m 7s (- 91m 14s) (100 0%) 68.7088
0m 12s (- 78m 32s) (200 0%) 66.7544
0m 18s (- 74m 48s) (300 0%) 65.5690
0m 23s (- 73m 6s) (400 0%) 56.1789
0m 28s (- 71m 31s) (500 0%) 23.9911
0m 34s (- 70m 25s) (600 0%) 5.8339
0m 39s (- 69m 29s) (700 0%) 5.0664
0m 44s (- 68m 58s) (800 1%) 5.2246
0m 49s (- 68m 33s) (900 1%) 5.2235
0m 55s (- 68m 1s) (1000 1%) 4.7214
1m 0s (- 67m 41s) (1100 1%) 4.9088
1m 5s (- 67m 37s) (1200 1%) 5.6655


KeyboardInterrupt: 