In [None]:
import numpy as np
from sklearn.metrics import r2_score
from pandas_datareader import data as pdr
import yfinance as yf
yf.pdr_override()
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")
%matplotlib inline

In [None]:
import torch

import copy
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split

from torch import nn, optim

import torch.nn.functional as F
#from arff2pandas import a2p


%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

rcParams['figure.figsize'] = 12, 8

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

In [None]:
s_target = 'GOOG'
df = pdr.get_data_yahoo(s_target, start='2014-01-01', end=datetime.now())
df.head()

In [None]:
df.columns

In [None]:
plt.figure(figsize=(16,6))
plt.title(s_target + ' Close Price History')
plt.plot(df['Close'])
plt.xlabel('Date', fontsize=14)
plt.ylabel('Close Price USD ($)', fontsize=14)
plt.show()

In [None]:
# Close(終値)のデータ
data = df
dataset = data.values

# データを0〜1の範囲に正規化
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data

In [None]:
scaled_data[0:1]

In [None]:
# 全体の80%をトレーニングデータとして扱う
training_data_len = int(np.ceil( len(dataset) * .8 ))

In [None]:
# どれくらいの期間をもとに予測するか
window_size = 60

train_data = scaled_data[0:int(training_data_len), :]

In [None]:
# train_dataをx_trainとy_trainに分ける
x_train, y_train = [], []
for i in range(window_size, len(train_data)):
    x_train.append(train_data[i-window_size:i])
    y_train.append(train_data[i, 0])

In [None]:
# numpy arrayに変換
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
train_data = [torch.tensor(s).float() for s in x_train]

In [None]:
# テストデータを作成
test_data = scaled_data[training_data_len - window_size: , :]

x_test = []
y_test = dataset[training_data_len:, :]
for i in range(window_size, len(test_data)):
    x_test.append(test_data[i-window_size:i])

# numpy arrayに変換
x_test = np.array(x_test)

In [None]:
test_data[0]

In [None]:
test_data = [torch.tensor(s).float() for s in x_test]

In [None]:
n_seq, seq_len, n_features = x_train.shape
x_train.shape

In [None]:
x_test.shape

In [None]:
class Encoder(nn.Module):
  def __init__(self, seq_len, n_features, embedding_dim=64):
    super(Encoder, self).__init__()
    self.seq_len, self.n_features = seq_len, n_features
    self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
    self.rnn1 = nn.LSTM(
      input_size=n_features,
      hidden_size=self.hidden_dim,
      num_layers=1,
      batch_first=True
    )
    self.rnn2 = nn.LSTM(
      input_size=self.hidden_dim,
      hidden_size=embedding_dim,
      num_layers=1,
      batch_first=True
    )

  def forward(self, x):
    x = x.reshape((1, -1, self.n_features))  # バッチサイズを1として、残りの次元を自動的に調整する
    x, (_, _) = self.rnn1(x)
    x, (hidden_n, _) = self.rnn2(x)

    return hidden_n.reshape((-1, self.embedding_dim))

In [None]:
class Decoder(nn.Module):
    def __init__(self,seq_len,input_dim = 64,n_features = 1):
      super(Decoder,self).__init__()

      self.seq_len,self.input_dim = seq_len,input_dim
      self.hidden_dim,self.n_features = 2*input_dim,n_features

      self.rnn1 = nn.LSTM(
          input_size = input_dim,
          hidden_size = input_dim,
          num_layers = 1,
          batch_first = True
      )

      self.rnn2 = nn.LSTM(
          input_size = input_dim,
          hidden_size = self.hidden_dim,
          num_layers = 1,
          batch_first = True
      )

      self.output_layer = nn.Linear(self.hidden_dim,n_features)

    def forward(self, x):
      x = x.repeat(self.seq_len, self.n_features)
      x = x.reshape(self.n_features, self.seq_len, self.input_dim)

      x, (hidden, cell) = self.rnn1(x)
      x, (hidden, cell) = self.rnn2(x)
      #print(len(x[0]))

      #x = x.reshape((-1, self.hidden_dim))

      return self.output_layer(x)


In [None]:
class RecurrentAutoencoder(nn.Module):
  def __init__(self,seq_len,n_features,embedding_dim=64):
    super(RecurrentAutoencoder, self).__init__()

    self.encoder = Encoder(seq_len,n_features,embedding_dim).to(device)
    self.decoder = Decoder(seq_len,embedding_dim,n_features).to(device)

  def forward(self,x):
    x = self.encoder(x)
    x = self.decoder(x)

    return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RecurrentAutoencoder(seq_len, n_features, 128)
model = model.to(device)

In [None]:
def train_model(model,train_dataset,val_dataset,n_epochs):
  optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
  criterion = nn.L1Loss(reduction = 'sum').to(device)
  history = dict(train = [],val = [])

  best_model_wts = copy.deepcopy(model.state_dict())
  best_loss = 10000.0

  for epoch in range(1,n_epochs+1):
    print('training')
    model = model.train()

    train_losses = []
    i=0
    j=0

    for seq_true in train_dataset:
      optimizer.zero_grad()
      print(i)
      i += 1

      seq_true = seq_true.to(device)
      seq_pred = model(seq_true)
      #print(len(seq_true[0]))
      #print(len(seq_pred[0]))

      loss = criterion(seq_true,seq_pred)

      loss.backward()
      optimizer.step()

      train_losses.append(loss.item())

    print(train_losses)
    val_losses = []
    model = model.eval()

    with torch.no_grad():
      for seq_true in val_dataset:
        print(j)
        j += j
        seq_true = seq_true.to(device)
        seq_pred = model(seq_true)

        loss = criterion(seq_true,seq_pred)
        val_losses.append(loss.item())
    print(val_losses)
    train_loss = np.mean(train_losses)
    val_loss = np.mean(val_losses)

    history['train'].append(train_loss)
    history['val'].append(val_loss)

    if val_loss < best_loss:
      best_loss = val_loss
      best_model_wts = copy.deepcopy(model.state_dict())

    print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')

  model.load_state_dict(best_model_wts)

  return model.eval(),history

In [None]:
model,history = train_model(
    model,
    train_data,
    test_data,
    n_epochs = 30
)

In [None]:
seq_true=train_data[0]
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
seq_true