In [1]:
import pandas_datareader.data as web

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import datetime

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

# S&P500 Dataset 만들기

In [2]:
class SP500(Dataset):
    def __init__(self, x_windows, y_windows, start, end):
        #self.symbol = symbol
        self.x_windows = x_windows
        self.y_windows = y_windows
        self.start = datetime.datetime(*start)
        self.end = datetime.datetime(*end)
        
        sp500 = web.DataReader('^GSPC', data_source='yahoo', start=self.start, end=self.end)
        
        # Normalization
        df_columns = sp500.columns
        scalar = StandardScaler()
        scalar.fit(sp500)

        df = scalar.transform(sp500)
        df = pd.DataFrame(df, columns=df_columns)
        df_label = np.array(df.copy())
        
        # Make None value
        ix = [(row, col) for row in range(df.shape[0]) for col in range(df.shape[1])]
        for row, col in random.sample(ix, int(round(.3*len(ix)))): # 30% 을 missing value로 만든다.
            df.iat[row, col] = None
            
        T = df.index.tolist()
        row = df.shape[0]
        col = df.shape[1]

        # Make mask Matrix (1==None value, 0==real value)
        M = np.ones([row, col])

        M = np.array(df.isnull(), dtype=int)
        # 우리는 missing을 0, real을 1로 해야되니까 1-M을 한다.
        M = 1 - M 
        
        df = np.array(df)
        # Make lag matrix
        lag_M = np.zeros([row, col])
        for i in range(1, row):
            for j in range(col):
                if M[i-1][j] == 1:
                    lag_M[i][j] = T[i] - T[i-1]

                elif M[i-1][j] == 0 and i>0:
                    lag_M[i][j] = lag_M[i-1][j] + T[i] - T[i-1]
        
        X, lag_M, df_label = torch.tensor(df), torch.tensor(lag_M), torch.tensor(df_label)
        
        # Make Slide Window data
        X = torch.tensor(np.nan_to_num(X))
        df_label = torch.tensor(np.nan_to_num(df_label))
        X_list = []
        y_list = []
        for i in range(len(X)-(self.x_windows + self.y_windows)):
            X_list.append(X[i : i+self.x_windows])
            y_list.append(df_label[i+self.x_windows : i+self.x_windows+self.y_windows])
        self.X = torch.stack(X_list)
        self.y = torch.stack(y_list)


        lag_list = []
        for i in range(len(lag_M)-(self.x_windows + self.y_windows)):
            lag_list.append(lag_M[i : i+self.x_windows])
        self.lag_M = torch.stack(lag_list)

            
    def __getitem__(self, index):
        return self.X[index].float(), self.lag_M[index].float(), self.y[index].float()

    def __len__(self):
        return len(self.X)

# DataLoader 만들기

- train data: 1999/01/01 ~ 2020/07/31 
- validation data: 2020/08/01 ~ 2020/09/15
- test data: 2020/09/16 ~ 2020/10/03

In [3]:
train_dataset = SP500(x_windows=10, y_windows=2, start=(1999,1,1), end=(2020,7,31))
val_dataset = SP500(x_windows=10, y_windows=2, start=(2020,8,1), end=(2020,9,15))
test_dataset = SP500(x_windows=10, y_windows=2, start=(2020, 9, 16), end=(2020, 10, 3))

train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=512, 
                                           drop_last=True)

val_loader = torch.utils.data.DataLoader(val_dataset, 
                                         batch_size=512, 
                                         drop_last=False)

test_loader = torch.utils.data.DataLoader(test_dataset, 
                                          batch_size=512, 
                                         drop_last=False)

  return self.partial_fit(X, y)
  app.launch_new_instance()
  return self.partial_fit(X, y)
  app.launch_new_instance()
  return self.partial_fit(X, y)
  app.launch_new_instance()


# Decay vector 구현

In [5]:
class TemporalDecay(nn.Module):
    def __init__(self, input_size, output_size, device):
        super(TemporalDecay, self).__init__()
        self.input_size=input_size
        self.output_size=output_size
        self.build(self.input_size, self.output_size) # nn.Linear()와 동일, 그저 파라미터를 개별적으로 구현하려고
        self.device = device
        
    def build(self, input_size, output_size):
        self.W = Parameter(torch.Tensor(self.output_size, self.input_size))
        self.b = Parameter(torch.Tensor(self.output_size))

        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.W.size(0))
        self.W.data.uniform_(-stdv, stdv)
        if self.b is not None:
            self.b.data.uniform_(-stdv, stdv)

    def forward(self,lag_matrix):
       
        beta = F.relu(F.linear(lag_matrix,self.W, self.b))
        beta = 1/torch.exp(beta).to(device)
        
        return beta

# GRU-I 구현

In [6]:
class Model(nn.Module):
    def __init__(self, input_dim, window_size, device):
        super(Model, self).__init__()
        self.window_size = window_size
        # 일반 gru로는 decay vector를 연산할 수 없어서 하나만 뽑아서 decay를 연산하는 작업을 해야함.
        self.grui_cell = nn.GRUCell(input_dim, 256) # GRUCell -> 하나의 셀만 만든다. 여기서 나온 hidden state에 decay vector 곱해서 for문을 돌림.
        self.time_decay = TemporalDecay(input_size = input_dim, output_size=1, device=device) # decay vector
        self.dropout = nn.Dropout(p=0.5)
        self.linear = nn.Linear(256, input_dim)
        self.device = device
    def forward(self, x, lag_matrix):
        result = []
        h = torch.zeros(x.size(0), 256).to(device)
        #c = torch.zeros(x.size(0), self.hidden_size).cuda()
        
        for t in range(self.window_size): # window_size = 10 (seq 개수)
            f = lag_matrix[:,t,:] # lage_matrix == [batch, seq, feature] -> seq만 돌린다.
            
            xinput = x[:,t,:] # x == [batch, seq, feature]
         
            beta = self.time_decay(f)
            beta = beta
            h = h * beta # 새로운 hidden vector (decay vector가 곱해진 놈)
            h = self.grui_cell(xinput, h)
            result.append(h)
            result_tensor = torch.stack(result, dim=1)
        # print(result_tensor.shape)

        output = self.linear(result_tensor[:,-1,:])
        return output

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = Model(input_dim=6, window_size=10, device=device).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 100
best_mae = 1000

for epoch in range(epochs):
    loss_list = []
    y_list = []
    output_list = []
    for i, batch in enumerate(train_loader):
        x = batch[0].to(device)
        lag_matrix = batch[1].to(device)
        label = batch[2][:,-1,:].to(device)

        output = model(x, lag_matrix)

        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_list.append(loss.item())

    loss_list = sum(loss_list) / len(loss_list)
    
    val_loss_list = []
    with torch.no_grad():
        for i, batch in enumerate(val_loader):
            x = batch[0].to(device)
            lag_matrix = batch[1].to(device)
            label = batch[2][:,-1,:]

            output = model(x, lag_matrix)

            y_list.append(label.cpu().detach().numpy())
            output_list.append(output.cpu().detach().numpy())
        
        y_list = np.squeeze(y_list)
        output_list = np.squeeze(output_list)
        mae = mean_absolute_error(y_list, output_list)
        rmse = mean_squared_error(y_list, output_list)**0.5

        print(f'Epoch[{epoch}/{epochs}] | Training loss:{loss_list:.3f} | MAE:{mae:.3f} | RMSE:{rmse:.3f}')
        
        if mae < best_mae:
            best_mae = mae
            torch.save(model.state_dict(), 'grui.pth')
print(f'Best MAE:{best_mae:.3f}') 

Epoch[0/100] | Training loss:0.689 | MAE:0.758 | RMSE:0.978
Epoch[1/100] | Training loss:0.367 | MAE:1.125 | RMSE:1.249


KeyboardInterrupt: 

In [8]:
model = Model(input_dim=6, window_size=10, device=device).to(device)
model.load_state_dict(torch.load('grui.pth'))
model.eval()
y_list = []
output_list = []
with torch.no_grad():
    for i, batch in enumerate(test_loader):
        x = batch[0].to(device)
        lag_matrix = batch[1].to(device)
        label = batch[2][:,-1,:]

        output = model(x, lag_matrix)

        y_list.append(label.cpu().detach().numpy())
        output_list.append(output.cpu().detach().numpy())
        
    y_list = np.squeeze(y_list)
    output_list = np.squeeze(output_list)
    mae = mean_absolute_error(y_list, output_list)
    rmse = mean_squared_error(y_list, output_list)**0.5
    r2 = r2_score(y_list, output_list)

    print(f'MAE:{mae:.3f} | RMSE:{rmse:.3f}')


MAE:0.974 | RMSE:1.028


# Vanila GRU

In [8]:
class SP500(Dataset):
    def __init__(self, x_windows, y_windows, start, end):
        #self.symbol = symbol
        self.x_windows = x_windows
        self.y_windows = y_windows
        self.start = datetime.datetime(*start)
        self.end = datetime.datetime(*end)
        
        sp500 = web.DataReader('^GSPC', data_source='yahoo', start=self.start, end=self.end)
        
        # Normalization
        df_columns = sp500.columns
        scalar = StandardScaler()
        scalar.fit(sp500)

        df = scalar.transform(sp500)
        df = pd.DataFrame(df, columns=df_columns)
        df_label = np.array(df.copy())
        
        # Make None value
        ix = [(row, col) for row in range(df.shape[0]) for col in range(df.shape[1])]
        for row, col in random.sample(ix, int(round(.3*len(ix)))):
            df.iat[row, col] = None
        df = df.fillna(0)
        #df = df.fillna(df.mean())
        df = np.array(df)

        X, df_label = torch.tensor(df), torch.tensor(df_label)
        
        # Make Slide Window data
        X = torch.tensor(np.nan_to_num(X))
        df_label = torch.tensor(np.nan_to_num(df_label))
        X_list = []
        y_list = []
        for i in range(len(X)-(self.x_windows + self.y_windows)):
            X_list.append(X[i : i+self.x_windows])
            y_list.append(df_label[i+self.x_windows : i+self.x_windows+self.y_windows])
        self.X = torch.stack(X_list)
        self.y = torch.stack(y_list)
            
    def __getitem__(self, index):
        return self.X[index].float(), self.y[index].float()

    def __len__(self):
        return len(self.X)



In [9]:
train_dataset = SP500(x_windows=10, y_windows=2, start=(1999,1,1), end=(2020,7,31))
val_dataset = SP500(x_windows=10, y_windows=2, start=(2020,8,1), end=(2020,9,15))
test_dataset = SP500(x_windows=10, y_windows=2, start=(2020, 9, 16), end=(2020, 10, 3))

train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=512, 
                                           drop_last=True)

val_loader = torch.utils.data.DataLoader(val_dataset, 
                                         batch_size=512, 
                                         drop_last=False)

test_loader = torch.utils.data.DataLoader(test_dataset, 
                                          batch_size=512, 
                                         drop_last=False)

  return self.partial_fit(X, y)
  app.launch_new_instance()
  return self.partial_fit(X, y)
  app.launch_new_instance()
  return self.partial_fit(X, y)
  app.launch_new_instance()


In [10]:
class GRU(nn.Module):
    def __init__(self, device):
        super(GRU, self).__init__()
        self.num_layers = 1
        self.gru = nn.GRU(6, 256, num_layers=self.num_layers)
        self.linear = nn.Linear(256, 6)
        self.device = device
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(1), 256).to(device)
        
        out, _ = self.gru(x, h0)
        
        out = self.linear(out[:, -1, :])
        return out

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = GRU(device).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


epochs = 100
best_mae = 10000

for epoch in range(epochs):
    loss_list = []
    y_list = []
    output_list = []
    model.train()
    for i, batch in enumerate(train_loader):
        x = batch[0].to(device)
        label = batch[1][:,-1,:].to(device)

        output = model(x)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_list.append(loss.item())

    loss_list = sum(loss_list) / len(loss_list)
    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(val_loader):
            x = batch[0].to(device)
            label = batch[1][:,-1,:].to(device)
            output = model(x)
            y_list.append(label.cpu().detach().numpy())
            output_list.append(output.cpu().detach().numpy())
            
        y_list = np.squeeze(y_list)
        output_list = np.squeeze(output_list)
        mae = mean_absolute_error(y_list, output_list)
        rmse = mean_squared_error(y_list, output_list)**0.5

        print(f'Epoch[{epoch}/{epochs}] Training loss:{loss_list:.3f} | MAE:{mae:.3f} | RMSE:{rmse:.3f}')
        
        if mae < best_mae:
            best_mae = mae
            torch.save(model.state_dict(), 'gru_imp.pth')
print(f'Best MAE:{best_mae:.3f}') 

In [4]:
model.load_state_dict(torch.load('gru_imp.pth'))
model.eval()
y_list = []
output_list = []
with torch.no_grad():
    for i, batch in enumerate(test_loader):
        x = batch[0].to(device)
        label = batch[1][:,-1,:].to(device)
        output = model(x)
        y_list.append(label.cpu().detach().numpy())
        output_list.append(output.cpu().detach().numpy())

    y_list = np.squeeze(y_list)
    output_list = np.squeeze(output_list)
    mae = mean_absolute_error(y_list, output_list)
    rmse = mean_squared_error(y_list, output_list)**0.5
    r2 = r2_score(y_list, output_list)

    print(f'MAE:{mae:.3f} | RMSE:{rmse:.3f}')


NameError: name 'model' is not defined