In [45]:
#파이토치 인스톨
!pip install torch torchvision
!pip install -U finance-datareader
import FinanceDataReader as fdr
import datetime
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
import argparse
import time
from copy import deepcopy # Add Deepcopy for args
from sklearn.metrics import mean_absolute_error

Requirement already up-to-date: finance-datareader in /usr/local/lib/python3.6/dist-packages (0.9.6)


In [46]:
%matplotlib inline
#데이터 생성과 화면 표시를 위한 라이브러리 호출
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

#실험결과 비교를 위해 시드 고정
seed = 444
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f39681dd9f0>

In [0]:
class stockDataset(Dataset):
    
    def __init__(self, symbol, x_frames, y_frames):
        
        self.symbol = symbol
        self.x_frames = x_frames
        self.y_frames = y_frames
        
        self.start = datetime.datetime.now() - datetime.timedelta(days=(x_frames+y_frames)*2+10)
        self.end = datetime.date.today()
        self.data = fdr.DataReader(self.symbol, self.start, self.end)
        self.data = self.data.tail(x_frames+y_frames)
        print(self.data.isna().sum())
        
    def __len__(self):
        return len(self.data) - (self.x_frames + self.y_frames) + 1
    
    def __getitem__(self, idx):
        idx += self.x_frames
        data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
        data = data[['High', 'Low', 'Open', 'Close', 'Change', 'Volume']]
        data = data.apply(lambda x: (x+1) / (x[self.x_frames-1]+1))
        data = data.values
        X = data[:self.x_frames]
        y = data[self.x_frames:]
        
        return X, y

In [0]:
class StockDataset(Dataset):
    
    def __init__(self, symbol, x_frames, y_frames, start, end):
        
        self.symbol = symbol
        self.x_frames = x_frames
        self.y_frames = y_frames
        
        self.start = datetime.datetime(*start)
        self.end = datetime.datetime(*end)

        self.data = fdr.DataReader(self.symbol, self.start, self.end)
        print(self.data.isna().sum())
        
    def __len__(self):
        return len(self.data) - (self.x_frames + self.y_frames) + 1
    
    def __getitem__(self, idx):
        idx += self.x_frames
        data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
        data = data[['High', 'Low', 'Open', 'Close', 'Change', 'Volume']]
        if self.x_frames > 1:
            data = data.apply(lambda x: (x+1) / (x[self.x_frames-1]+1))
        data = data.values
        X = data[:self.x_frames]
        y = data[self.x_frames:]
        
        return X, y

In [0]:
# 모델 정의
    
class MLPModel(nn.Module):
    # input = 입력값 개수, output = 출력값 개수
    def __init__(self,_input,_output,_hidden_layers): 
        super(MLPModel, self).__init__()
        self.inputv = _input
        self.outputv = _output
        nodes = [_input] + _hidden_layers + [_output]
        self.depth = len(nodes)
        linears = [nn.Linear(nodes[i], nodes[i+1]) for i in range(self.depth-1)]
        self.linears = nn.ModuleList(linears)
        self.relu = nn.ReLU()
        
    
    def forward(self, x):
    # 인스턴스(샘플) x가 인풋으로 들어왔을 때 모델이 예측하는 y값을 리턴합니다.
        for linear in self.linears[:-1]: 
          x = linear(x)
          x = self.relu(x)
        x = self.linears[-1](x)

        return x

In [0]:

#trainloader = torch.cat(trainloader).view(len(trainloader), batch_size, -1)
def train(model, trainset, optimizer, loss_fn,device):
    trainloader = DataLoader(trainset, shuffle=True, drop_last=True)

    model.train()
    model.zero_grad()
    optimizer.zero_grad()

    train_acc = 0.0
    train_loss = 0.0
    for i, (X, y) in enumerate(trainloader):

        X = X.float().to(device)
        y_true = y[:, :, 3].float().to(device)

        model.zero_grad()
        optimizer.zero_grad()

        y_pred = model(X)
        loss = loss_fn(y_pred.view(-1), y_true.view(-1))
        
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss = train_loss / len(trainloader)
    return model, train_loss

In [0]:
def validate(model, valset, loss_fn,device):
    valloader = DataLoader(valset, shuffle=False, drop_last=True)
    model.eval()

    val_loss = 0.0
    with torch.no_grad():
        for i, (X, y) in enumerate(valloader):

            X = X.float().to(device)
            y_true = y[:, :, 3].float().to(device)

            y_pred = model(X)
            loss = loss_fn(y_pred.view(-1), y_true.view(-1))

            val_loss += loss.item()

    val_loss = val_loss / len(valloader)
    return val_loss


In [68]:
trainset = StockDataset('001040',1,1,(2018,5,1),(2020,5,20))
valset = StockDataset('005930',1,1,(2018,5,1),(2020,5,20))

Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64
Open      0
High      0
Low       0
Close     0
Volume    0
Change    0
dtype: int64


In [52]:
batch_size = 1
input_dim = 6
hidden_dim = 50
output_dim = 1
layers = [200,100,50,25,15,10,5]
epoch = 5

model = MLPModel(input_dim,output_dim,layers)
loss_fn = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

MLPModel(
  (linears): ModuleList(
    (0): Linear(in_features=6, out_features=200, bias=True)
    (1): Linear(in_features=200, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=50, bias=True)
    (3): Linear(in_features=50, out_features=25, bias=True)
    (4): Linear(in_features=25, out_features=15, bias=True)
    (5): Linear(in_features=15, out_features=10, bias=True)
    (6): Linear(in_features=10, out_features=5, bias=True)
    (7): Linear(in_features=5, out_features=1, bias=True)
  )
  (relu): ReLU()
)

In [72]:
for e in range(epoch):  # loop over the dataset multiple times
        ts = time.time()
        model, train_loss = train(model, trainset ,optimizer, loss_fn,device)
        val_loss = validate(model, valset, loss_fn,device)
        te = time.time()

        print('Epoch {}, Loss(train/val) {:2.5f}/{:2.5f}. Took {:2.2f} sec'.format(e, train_loss, val_loss, te-ts))

Epoch 0, Loss(train/val) 29566945.87945/97775849349.36526. Took 5.29 sec
Epoch 1, Loss(train/val) 41457081.24269/86541063448.01596. Took 5.28 sec
Epoch 2, Loss(train/val) 42276866.41932/183062439799.56885. Took 5.35 sec
Epoch 3, Loss(train/val) 25325832.06746/128557767240.55888. Took 5.35 sec
Epoch 4, Loss(train/val) 28590185.51858/108065834625.27745. Took 5.34 sec


In [0]:
class epochClass():

    def __init__(self,_model,_optimizer,_device):
        self.model = _model
        self.optimizer = _optimizer
        self.reg_loss = nn.MSELoss()
        self.device = _device
        self.list_epoch = []
        self.list_val_loss = []
        self.list_mae = []
        self.list_mae_epoch = []

    def getModelOptim(self):
        return self.model, self.optimizer

    def getEpochList(self):
        return self.list_epoch, self.list_val_loss, self.list_mae, self.list_mae_epoch

    def startEpoch(self,epoch,interval,train_X,train_y,val_X,val_y,test_X,test_y):
    

        for i in range(epoch):    
            # ====== Train ====== #
            self.model.train() # model을 train 모드로 세팅합니다. 반대로 향후 모델을 평가할 때는 eval() 모드로 변경할 겁니다 
            self.optimizer.zero_grad() # optimizer에 남아있을 수도 있는 잔여 그라디언트를 0으로 다 초기화해줍니다.
            
            input_x = torch.Tensor(train_X)
            true_y = torch.Tensor(train_y)
            #Gpu로 데이터 옮기기
            input_x = input_x.to(device)
            true_y = true_y.to(device)
            #
            pred_y = self.model(input_x)
            #
            
            loss = self.reg_loss(pred_y.squeeze(), true_y)
            loss.backward() # backward()를 통해서 그라디언트를 구해줍니다.
            self.optimizer.step() # step()을 통해서 그라디언틀르 바탕으로 파라미터를 업데이트 해줍니다. 
            self.list_epoch.append(i)
            
            # ====== Validation ====== #
            self.model.eval()
            self.optimizer.zero_grad()
            input_x = torch.Tensor(val_X)
            true_y = torch.Tensor(val_y)
            #GPU로 데이터 옮기기
            input_x = input_x.to(device)
            true_y = true_y.to(device)
            #
            pred_y = self.model(input_x)   
            loss = self.reg_loss(pred_y.squeeze(), true_y)
            self.list_val_loss.append(loss.item())
            

            # ====== Evaluation ======= #
            if i % interval == 0: # 200회의 학습마다 실제 데이터 분포와 모델이 예측한 분포를 그려봅니다.
                
                # ====== Calculate MAE ====== #
                self.model.eval()
                self.optimizer.zero_grad()
                input_x = torch.Tensor(test_X)
                true_y = torch.Tensor(test_y)
                #
                input_x = input_x.to(device)
                true_y = true_y.to(device)
                #
                pred_y = self.model(input_x)
                mae = self.reg_loss(true_y, pred_y.squeeze())
                self.list_mae.append(mae)
                self.list_mae_epoch.append(i)
                
                print(i, mae)

In [55]:
'''
import torch.optim as optim
from sklearn.metrics import mean_absolute_error
# 학습에 필요한 파라미터 정의

model = MLPModel(15,1,[200,150,100,50,25,10]) # Model을 생성해줍니다.
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
#modelPath = '/content/gdrive/My Drive/model.pt'
#model.load_state_dict(torch.load(modelPath))
# ===== Construct Optimizer ====== #
lr = 0.005 # Learning Rate를 하나 정해줍니다. (원할한 학습을 위해 손을 많이 탑니다)
optimizer = optim.Adam(model.parameters(), lr=lr) # Optimizer를 생성해줍니다.
'''



In [0]:
# 학습 시작
#ep = epochClass(model,optimizer,device)
#ep.startEpoch(4000,200,train_X,train_y,val_X,val_y,test_X,test_y)

In [0]:
# 학습결과 차트로 보여주기
#list_epoch,list_val_loss,list_mae,list_mae_epoch = ep.getEpochList()


In [0]:
class pltClass():

    def __init__(self,_list_epoch, _list_val_loss, _list_mae_epoch, _list_mae):
        self.list_epoch = _list_epoch
        self.list_val_loss = _list_val_loss
        self.list_mae_epoch = _list_mae_epoch
        self.list_mae = _list_mae


    def printFigure(self):
        fig = plt.figure(figsize=(24,5))


        # ====== valid plot ====== #
        ax2 = fig.add_subplot(1, 2, 1)
        ax2.plot(self.list_epoch, self.list_val_loss, '--', label='val')
        ax2.set_xlabel('epoch')
        ax2.set_ylabel('loss')
        #ax1.set_ylim(0, 5)
        ax2.grid()
        ax2.legend()
        ax2.set_title('epoch vs loss')

        # ====== test plot ====== #
        ax3 = fig.add_subplot(1, 2, 2)
        ax3.plot(self.list_mae_epoch, self.list_mae, marker='x', label='test')

        ax3.set_xlabel('epoch')
        ax3.set_ylabel('mae')
        ax3.grid()
        ax3.legend()
        ax3.set_title('epoch vs loss')


        plt.show()



In [0]:

#pltc = pltClass(list_epoch,list_val_loss,list_mae_epoch,list_mae)
#pltc.printFigure()

In [60]:
'''model.to('cpu')
model.eval()

prediction = model(torch.Tensor(test_X))
pred = []
real = test_y
for i in prediction[:].T:
    for j in i:
        pred.append(j.item())

print(real)
print(pred)
'''

"model.to('cpu')\nmodel.eval()\n\nprediction = model(torch.Tensor(test_X))\npred = []\nreal = test_y\nfor i in prediction[:].T:\n    for j in i:\n        pred.append(j.item())\n\nprint(real)\nprint(pred)\n"

In [0]:
#modelPath = '/content/gdrive/My Drive/model.pt'
#torch.save(model.state_dict(), modelPath)

In [62]:
'''fig = plt.figure(figsize=(20,10))
xl = [n for n in range(1,len(pred)+1)]

# ====== valid plot ====== #
ax2 = fig.add_subplot(1, 1, 1)
ax2.plot(xl, pred, 'o', label='pred')
ax2.plot(xl, real, 'x', label='real')
ax2.set_xlabel('len')
ax2.set_ylabel('value')
#ax1.set_ylim(0, 5)
ax2.grid()
ax2.legend()
ax2.set_title('pred vs real')
'''

