In [1]:
from sklearn.preprocessing import MinMaxScaler

from torch.utils.data import TensorDataset
from torch.utils.data import ConcatDataset
from torch.utils.data import DataLoader

import torch.optim as optim
import torch.nn as nn
import torch

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np 

In [2]:
device = torch.device('mps')

BATCH_SIZE = 20
EPOCH = 3000
LEARNING_RATE = 1e-3

# Read Data

In [3]:
df = pd.read_csv('./data/kospi.csv')
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,431.0,431.0,431.0,431.0,431.0,431.0
mean,2122.449765,2134.756032,2106.976497,2121.195427,2121.195427,648225.7
std,159.523899,155.808986,161.926432,159.038127,159.038127,259363.7
min,1474.449951,1516.75,1439.430054,1457.640015,1457.640015,0.0
25%,2057.589966,2065.619995,2041.244995,2055.299927,2055.299927,440650.0
50%,2132.379883,2141.27002,2119.070068,2131.23999,2131.23999,608700.0
75%,2211.449951,2220.595092,2196.599976,2209.244995,2209.244995,810950.0
max,2455.280029,2458.169922,2429.939941,2443.580078,2443.580078,1984200.0


In [4]:
scaler = MinMaxScaler()
df[['Open', 'High', 'Low', 'Close', 'Volume']] = scaler.fit_transform(df[['Open', 'High', 'Low', 'Close', 'Volume']])
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,431.0,431.0,431.0,431.0,431.0,431.0
mean,0.660665,0.656462,0.673942,0.673018,2121.195427,0.326694
std,0.162642,0.165504,0.163478,0.161306,159.038127,0.130715
min,0.0,0.0,0.0,0.0,1457.640015,0.0
25%,0.594537,0.583024,0.607581,0.606183,2055.299927,0.222079
50%,0.670789,0.663381,0.686152,0.683206,2131.23999,0.306774
75%,0.751404,0.747642,0.764424,0.762323,2209.244995,0.408704
max,1.0,1.0,1.0,1.0,2443.580078,1.0


In [5]:
X = df[['Open', 'Low', 'Volume']].values
y = df['Close'].values

In [6]:
def seq_data(x, y, sequence_length):
    x_seq = []
    y_seq = []
    for idx in range(len(x) - sequence_length):
        x_seq.append(x[idx : idx + sequence_length])
        y_seq.append(y[idx + sequence_length])

    return torch.FloatTensor(x_seq).to(device), torch.FloatTensor(y_seq).to(device).view(-1, 1)

In [7]:
split = 200
sequence_length = 5

x_seq, y_seq = seq_data(X, y, sequence_length)

  return torch.FloatTensor(x_seq).to(device), torch.FloatTensor(y_seq).to(device).view(-1, 1)


In [8]:
X_train_seq = x_seq[:split]
y_train_seq = y_seq[:split]

X_test_seq = x_seq[split:]
y_test_seq = y_seq[split:]

print(X_train_seq.size(), y_train_seq.size())
print(X_test_seq.size(), y_test_seq.size())

torch.Size([200, 5, 3]) torch.Size([200, 1])
torch.Size([226, 5, 3]) torch.Size([226, 1])


In [9]:
train = TensorDataset(X_train_seq, y_train_seq)
test  = TensorDataset(X_test_seq, y_test_seq)

train_loader = DataLoader(dataset=train, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(dataset=test, batch_size=BATCH_SIZE)

# Modeling

In [10]:
input_size = x_seq.size(2)
num_layers = 2
hidden_size = 8

In [11]:
class VanillaRNN(nn.Module):
    def __init__(self, input_size, hidden_size, sequence_length, num_layers, device):
        super(VanillaRNN, self).__init__()
        self.device = device
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc  = nn.Sequential(nn.Linear(hidden_size * sequence_length, 1), nn.Sigmoid())

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size()[0], self.hidden_size).to(self.device)
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc(out)
        return out

In [12]:
model = VanillaRNN(
    input_size=input_size,
    hidden_size=hidden_size,
    sequence_length=sequence_length,
    num_layers=num_layers,
    device=device).to(device)

In [13]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [14]:
loss_graph = list()
val_loss_graph = list()
n = len(train_loader)

for epoch in range(EPOCH + 1):
    running_loss = 0.0
    for data in train_loader:
        seq, target = data
        out = model(seq)
        loss = criterion(out, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    loss_graph.append(running_loss / n)

    with torch.no_grad():
        val_running_loss = 0.0
        for data in test_loader:
            seq, target = data
            out = model(seq)
            loss = criterion(out, target)
            val_running_loss += loss.item()
        val_loss_graph.append(val_running_loss / len(test_loader))

    if epoch % 100 == 0:
        print('[Epoch : %d] Train Loss : %.4f\tTest Loss : %.4f' % (epoch, running_loss / n, val_running_loss / len(test_loader)))

[Epoch : 0] Train Loss : 0.0121	Test Loss : 0.0465
[Epoch : 100] Train Loss : 0.0010	Test Loss : 0.0104
[Epoch : 200] Train Loss : 0.0006	Test Loss : 0.0063
[Epoch : 300] Train Loss : 0.0004	Test Loss : 0.0047
[Epoch : 400] Train Loss : 0.0004	Test Loss : 0.0034
[Epoch : 500] Train Loss : 0.0004	Test Loss : 0.0039
[Epoch : 600] Train Loss : 0.0003	Test Loss : 0.0036
[Epoch : 700] Train Loss : 0.0003	Test Loss : 0.0036
[Epoch : 800] Train Loss : 0.0003	Test Loss : 0.0035
[Epoch : 900] Train Loss : 0.0004	Test Loss : 0.0035
[Epoch : 1000] Train Loss : 0.0003	Test Loss : 0.0035
[Epoch : 1100] Train Loss : 0.0003	Test Loss : 0.0041
[Epoch : 1200] Train Loss : 0.0003	Test Loss : 0.0038
[Epoch : 1300] Train Loss : 0.0003	Test Loss : 0.0042
[Epoch : 1400] Train Loss : 0.0003	Test Loss : 0.0039
[Epoch : 1500] Train Loss : 0.0003	Test Loss : 0.0045


In [None]:
plt.figure(figsize=(20, 10))
plt.plot(val_loss_graph)
plt.plot(loss_graph)
plt.show()

# Prediction

In [None]:
concatdata = ConcatDataset([train, test])
data_loader = DataLoader(dataset=concatdata, batch_size=100)

with torch.no_grad():
    pred = list()
    model.eval()

    for data in data_loader:
        seq, target = data
        out = model(seq)
        pred += out.tolist()

In [None]:
plt.plot(np.ones(100) * len(train), np.linspace(0, 1, 100), '--', linewidth=0.6)
plt.plot(df['Close'][sequence_length:].values, '--')
plt.plot(pred, 'b', linewidth = 0.6)
plt.show()