In [1]:
import pandas as pd
import numpy as np
import joblib
import os
import glob
from datetime import datetime, timedelta
from pykrx import stock

from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch import optim
from tqdm import tqdm

In [2]:
## window dataset을 생성하는 클래스
class windowDataset(Dataset):
    def __init__(self, y, input_window, output_window, stride):
        #총 데이터의 개수
        L = y.shape[0]
        #stride씩 움직일 때 생기는 총 sample의 개수
        num_samples = (L - input_window - output_window) // stride + 1

        #input과 output : shape = (window 크기, sample 개수)
        X = np.zeros([input_window*30, num_samples])
        Y = np.zeros([output_window*30, num_samples])

        for i in np.arange(num_samples):
            start_x = stride*i
            end_x = start_x + input_window
            X[:,i] = y[start_x:end_x].reshape(1, -1).flatten()

            start_y = stride*i + input_window
            end_y = start_y + output_window
            Y[:,i] = y[start_y:end_y].reshape(1, -1).flatten()

        X = X.reshape(X.shape[0], X.shape[1], 1).transpose((1,0,2))
        Y = Y.reshape(Y.shape[0], Y.shape[1], 1).transpose((1,0,2))
        self.x = X
        self.y = Y     
        self.len = len(X)
        
    def __getitem__(self, i):
        return self.x[i], self.y[i]
    def __len__(self):
        return self.len

In [3]:
## input으로부터 입력을 받고 lstm을 이용하여 디코더에 전달할 hidden state 생성
class lstm_encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers = 1):
        super(lstm_encoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first=True)

    def forward(self, x_input):
        lstm_out, self.hidden = self.lstm(x_input)
        return lstm_out, self.hidden

In [4]:
## sequence의 이전값 하나와, 이전 결과의 hidden state를 입력받아서 다음 값 하나를 예측
## 마지막에 fc layer를 연결해서 input size와 동일하게 크기를 맞춰줌
class lstm_decoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers = 1):
        super(lstm_decoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, input_size)           

    def forward(self, x_input, encoder_hidden_states):
        lstm_out, self.hidden = self.lstm(x_input.unsqueeze(-1), encoder_hidden_states)
        output = self.linear(lstm_out)
        
        return output, self.hidden

In [5]:
## 위의 두 모델 합치기
## 인코더를 한 번 실행시키고 인코더에서 전달받은 hidden state와 input의 마지막 값을 decoder에 전달해서 다음 예측값을 구함
## 여기서 나온 값과 hidden state를 반복적으로 사용해서 원하는 길이가 될 때까지 decoder 실행
class lstm_encoder_decoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(lstm_encoder_decoder, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size

        self.encoder = lstm_encoder(input_size = input_size, hidden_size = hidden_size)
        self.decoder = lstm_decoder(input_size = input_size, hidden_size = hidden_size)

    def forward(self, inputs, targets, target_len, teacher_forcing_ratio):
        batch_size = inputs.shape[0]
        input_size = inputs.shape[2]

        outputs = torch.zeros(batch_size, target_len*30, input_size)

        _, hidden = self.encoder(inputs)
        decoder_input = inputs[:,-1, :]
        
        #원하는 길이가 될 때까지 decoder를 실행한다.
        for t in range(target_len*30): 
            out, hidden = self.decoder(decoder_input, hidden)
            out =  out.squeeze(1)
            
            # teacher forcing을 구현한다.
            # teacher forcing에 해당하면 다음 인풋값으로는 예측한 값이 아니라 실제 값을 사용한다.
            if random.random() < teacher_forcing_ratio:
                decoder_input = targets[:, t, :]
            else:
                decoder_input = out
            outputs[:,t,:] = out
        return outputs

    # 편의성을 위해 예측해주는 함수도 생성한다.
    def predict(self, inputs, target_len):
        self.eval()
        inputs = inputs.unsqueeze(0)
        batch_size = inputs.shape[0]
        input_size = inputs.shape[2]
        outputs = torch.zeros(batch_size, target_len*30, input_size)
        _, hidden = self.encoder(inputs)
        decoder_input = inputs[:,-1, :]
        for t in range(target_len*30): 
            out, hidden = self.decoder(decoder_input, hidden)
            out =  out.squeeze(1)
            decoder_input = out
            outputs[:,t,:] = out
        return outputs.detach().numpy()[0,:,0]

In [6]:
df_stocks = pd.read_csv('./source/30/stocks_clustering30.csv', encoding='cp949')
df_stocks['Date'] = pd.to_datetime(df_stocks['Date'])
df_stocks.set_index(['Date'], inplace=True)

In [7]:
df_stocks

Unnamed: 0_level_0,LG화학,삼천리,한샘,동부건설,이아이디,무학,삼성SDI우,한세실업,CJ씨푸드1우,일진다이아,...,영진약품,넥센,아모레퍼시픽,CJ제일제당,AK홀딩스,세아제강지주,현대차우,고려제강,현대지에프홀딩스,한국화장품
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,340000.000000,123500.000000,18350.000000,74686.000000,10869.000000,13034.000000,50000.000000,16750.000000,4970.0,6467.000000,...,1645.000000,7900.000000,121031.0,362000.000000,21565.000000,78861.000000,68200.000000,18435.000000,11032.0,2260.0
2013-01-03,342500.000000,121000.000000,17600.000000,75109.000000,10921.000000,13129.000000,49800.000000,15700.000000,4870.0,6467.000000,...,1675.000000,7730.000000,121331.0,369000.000000,22292.000000,79032.000000,64500.000000,18496.000000,10670.0,2255.0
2013-01-04,331000.000000,123000.000000,18100.000000,77754.000000,10972.000000,13176.000000,50100.000000,15000.000000,4975.0,6371.000000,...,1665.000000,7630.000000,123532.0,375000.000000,22971.000000,78350.000000,66200.000000,18225.000000,10703.0,2265.0
2013-01-05,329166.666667,123500.000000,18183.333333,79235.000000,10750.000000,13128.666667,49916.666667,14850.000000,4925.0,6393.333333,...,1673.333333,7590.000000,123832.0,372666.666667,23003.333333,78321.666667,66333.333333,18285.000000,10703.0,2305.0
2013-01-06,327333.333333,124000.000000,18266.666667,80716.000000,10528.000000,13081.333333,49733.333333,14700.000000,4875.0,6415.666667,...,1681.666667,7550.000000,124132.0,370333.333333,23035.666667,78293.333333,66466.666667,18345.000000,10703.0,2345.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-24,669000.000000,494500.000000,45750.000000,7150.000000,840.000000,5550.000000,303500.000000,17330.000000,26900.0,18530.000000,...,3345.000000,4045.000000,144800.0,327500.000000,17770.000000,160800.000000,91800.000000,21750.000000,4775.0,8130.0
2023-02-25,665000.000000,495333.333333,45633.333333,7153.333333,833.666667,5520.000000,302333.333333,17236.666667,26700.0,18436.666667,...,3308.333333,4056.666667,143400.0,325333.333333,17710.000000,158133.333333,91600.000000,21883.333333,4775.0,8130.0
2023-02-26,661000.000000,496166.666667,45516.666667,7156.666667,827.333333,5490.000000,301166.666667,17143.333333,26500.0,18343.333333,...,3271.666667,4068.333333,142000.0,323166.666667,17650.000000,155466.666667,91400.000000,22016.666667,4775.0,8130.0
2023-02-27,657000.000000,497000.000000,45400.000000,7160.000000,821.000000,5460.000000,300000.000000,17050.000000,26300.0,18250.000000,...,3235.000000,4080.000000,140600.0,321000.000000,17590.000000,152800.000000,91200.000000,22150.000000,4775.0,8130.0


In [8]:
## 모델 불러오기
df = df_stocks.copy()
scaler = MinMaxScaler()
train_data = np.array(df)
scaler.fit(train_data)
train_np = scaler.transform(train_data)
for iw in [15, 30]:
    for ow in [4, 7, 10, 15]:
        for st in [1, 2, 3]:
            print('iw:', iw, 'ow:', ow, 'st:', st)
            path = 'C:/PythonStudy/Capstone/result/final2/clustering30/model/iw'+str(iw)+'_ow'+str(ow)+'_st'+str(st)+'_model.pkl'
            model_state = joblib.load(path)
            model = lstm_encoder_decoder(input_size=1, hidden_size=16)
            model.load_state_dict(model_state.state_dict())
            
            device = torch.device('cpu')
            inputs = torch.tensor(train_np).reshape(-1, 1).to(device).float()
            model.eval()
            outputs = model.predict(inputs, ow)
#             tmp = outputs.reshape(30, ow)
#             lst = [[] for _ in range(ow)]
#             for j in range(len(company)):
#                 for i in range(ow):
#                     lst[i].append(tmp[j][i])
            standard = pd.to_datetime('2023-01-01')
            df_predict_df = pd.DataFrame(scaler.inverse_transform(outputs.reshape(ow, 30)), columns=df_stocks.columns, index=pd.date_range(standard, standard+timedelta(days=ow-1)))
            print(df_predict_df)
#             df_predict_df.rename(columns={'포스코케미칼':'포스코퓨처엠'}, inplace=True)
            df_predict_df.to_csv('./result/final2/clustering30/predict/iw'+str(iw)+'_ow'+str(ow)+'_st'+str(st)+'_predict.csv', encoding='cp949')

iw: 15 ow: 4 st: 1
                   LG화학            삼천리             한샘          동부건설  \
2023-01-01  792026.8125  349063.500000   74863.828125  11850.772461   
2023-01-02  424522.5625  183294.390625  103020.328125  38384.464844   
2023-01-03  418332.1250  191263.156250  107844.937500  36497.792969   
2023-01-04  416364.6250  195520.984375  110115.375000  35645.269531   

                   이아이디            무학         삼성SDI우          한세실업  \
2023-01-01  2587.939209  14069.759766  306857.875000  17837.244141   
2023-01-02  9518.712891  22922.244141  217262.031250  24107.951172   
2023-01-03  9629.283203  23381.238281  215404.781250  24291.591797   
2023-01-04  9687.844727  23677.417969  214184.703125  24483.736328   

                 CJ씨푸드1우         일진다이아  ...         영진약품           넥센  \
2023-01-01  30601.175781  27646.699219  ...  6295.776855  4798.599609   
2023-01-02  19450.925781  26943.214844  ...  6067.459961  4958.352051   
2023-01-03  19717.578125  26730.464844  ...  6016.33935

                    LG화학            삼천리             한샘          동부건설  \
2023-01-01  523825.53125  226900.687500  141885.531250  17339.093750   
2023-01-02  424213.71875  193745.250000  114075.351562  33486.933594   
2023-01-03  437788.50000  200799.906250  118912.484375  34825.613281   
2023-01-04  442895.06250  203368.046875  120615.750000  35281.359375   
2023-01-05  443567.75000  203696.953125  120828.062500  35336.710938   
2023-01-06  443634.50000  203729.968750  120849.664062  35342.429688   
2023-01-07  443650.93750  203738.500000  120855.445312  35344.023438   

                    이아이디            무학         삼성SDI우          한세실업  \
2023-01-01   6338.338379  14622.240234  231535.968750  25676.369141   
2023-01-02  10369.424805  22240.539062  216049.250000  24689.832031   
2023-01-03  10827.046875  23110.037109  223885.578125  25485.466797   
2023-01-04  10977.610352  23386.376953  226290.218750  25721.107422   
2023-01-05  10995.444336  23418.347656  226562.281250  25747.212891 

                    LG화학            삼천리             한샘          동부건설  \
2023-01-01  428362.37500  225098.734375  142809.468750  17789.945312   
2023-01-02  395658.50000  178414.281250  103065.492188  30338.292969   
2023-01-03  396327.78125  178665.625000  103358.007812  30401.867188   
2023-01-04  395723.90625  178371.031250  103108.843750  30330.181641   
2023-01-05  395056.06250  178003.859375  102863.015625  30261.939453   
2023-01-06  394345.75000  177632.296875  102599.914062  30186.755859   
2023-01-07  393561.87500  177213.687500  102306.609375  30103.890625   
2023-01-08  392668.78125  176739.875000  101974.164062  30009.548828   
2023-01-09  391686.78125  176218.625000  101608.906250  29906.208984   
2023-01-10  390641.71875  175664.718750  101221.187500  29796.550781   

                   이아이디            무학         삼성SDI우          한세실업  \
2023-01-01  5028.680664  16765.558594  191124.421875  19047.179688   
2023-01-02  9262.345703  20043.578125  195366.468750  22522.951172 

                    LG화학            삼천리             한샘          동부건설  \
2023-01-01  1.070635e+06  326852.562500  -20589.835938  13410.382812   
2023-01-02  4.344184e+05  124007.367188   41183.207031  21540.238281   
2023-01-03  5.434486e+05  196633.140625   51823.621094  22553.503906   
2023-01-04  3.821938e+05  276380.250000   95249.492188  20226.687500   
2023-01-05  7.140296e+05  181876.250000  166625.187500  17393.310547   
2023-01-06  6.669058e+05  215662.890625  163186.796875  26060.689453   
2023-01-07  6.502048e+05  294537.812500  151353.468750  40230.664062   
2023-01-08  7.841764e+05  333263.406250   79950.539062  47955.101562   
2023-01-09  7.535641e+05  324982.343750  123007.351562  47379.750000   
2023-01-10  7.041168e+05  317977.718750  159119.562500  46291.035156   
2023-01-11  4.309832e+05  383095.093750  211026.593750  23347.771484   
2023-01-12  4.959636e+05  358421.718750  203486.281250  40173.761719   
2023-01-13  6.722525e+05  287466.843750  198190.125000  60197.00

                    LG화학            삼천리             한샘          동부건설  \
2023-01-01  505678.78125 -353411.468750  124875.484375   -307.800354   
2023-01-02  385947.78125  227483.312500  103354.859375  22529.550781   
2023-01-03  434505.71875  187244.187500  104949.390625  32784.031250   
2023-01-04  418206.03125  189915.734375  112357.570312  33033.359375   

                    이아이디            무학         삼성SDI우          한세실업  \
2023-01-01  10771.786133  64256.523438  809761.625000  35001.976562   
2023-01-02   8599.126953  25461.832031  206038.312500  18851.828125   
2023-01-03  10577.028320  21520.582031  201491.203125  23984.601562   
2023-01-04  10093.458008  21600.498047  211289.593750  24227.757812   

                 CJ씨푸드1우         일진다이아  ...         영진약품           넥센  \
2023-01-01  -1960.672241  16308.594727  ...  5013.068848  3670.694092   
2023-01-02  18047.625000  26458.203125  ...  5405.330566  5071.848633   
2023-01-03  21521.146484  23649.509766  ...  6144.330078  5003.8

                    LG화학            삼천리            한샘          동부건설  \
2023-01-01  669439.56250  311065.968750  42960.523438  12120.215820   
2023-01-02  340257.81250  143931.062500  81034.406250  25596.375000   
2023-01-03  319274.09375  122791.585938  67186.062500  23649.525391   
2023-01-04  331994.81250  121337.742188  64745.410156  23877.791016   
2023-01-05  353398.81250  120656.882812  61372.851562  24178.470703   
2023-01-06  392166.43750  115660.437500  59539.046875  25351.964844   
2023-01-07  450681.84375  102963.007812  62991.207031  26027.917969   

                   이아이디            무학         삼성SDI우          한세실업  \
2023-01-01  3628.124756  10892.393555  448502.312500  21660.757812   
2023-01-02  8305.063477  20676.152344  230118.468750  28774.740234   
2023-01-03  7596.726074  19562.115234  227499.953125  30163.812500   
2023-01-04  7453.448242  18414.968750  219292.281250  30285.324219   
2023-01-05  7628.052246  16804.980469  208152.906250  30533.511719   
2023-01-06 

                    LG화학            삼천리             한샘          동부건설  \
2023-01-01  605220.87500  374315.343750   53904.277344  16058.588867   
2023-01-02  424868.12500  194967.953125  115553.335938  34082.343750   
2023-01-03  418069.81250  190681.109375  112070.812500  32961.132812   
2023-01-04  447131.96875  206103.968750  122874.554688  36017.460938   
2023-01-05  476864.28125  221838.843750  133870.828125  39121.878906   
2023-01-06  506085.34375  237286.046875  144654.171875  42162.792969   
2023-01-07  533845.81250  251942.843750  154870.250000  45036.203125   
2023-01-08  511092.71875  239903.468750  146457.937500  42664.117188   
2023-01-09  534409.37500  252123.171875  154915.062500  45028.687500   
2023-01-10  726729.31250  367829.312500  235346.250000  64916.320312   

                    이아이디            무학         삼성SDI우          한세실업  \
2023-01-01   1623.770630  12591.038086  163111.343750  23680.666016   
2023-01-02  10638.470703  22868.650391  222468.562500  25354.7460

                    LG화학            삼천리             한샘          동부건설  \
2023-01-01  1.668732e+06  796152.750000  219253.906250  15524.249023   
2023-01-02  3.490647e+05  127583.195312  -18878.523438 -10906.434570   
2023-01-03 -3.270794e+05 -207786.421875 -169027.843750 -47133.929688   
2023-01-04  5.755632e+04   -2832.857910  -24724.724609  -6012.916992   
2023-01-05 -7.816779e+04  -84187.906250  -88235.734375 -25441.894531   
2023-01-06 -8.554254e+04  -90759.492188  -93941.562500 -27021.859375   
2023-01-07 -5.270531e+04  -71316.648438  -80710.343750 -24134.425781   
2023-01-08 -2.932563e+04  -57701.785156  -70608.250000 -21388.306641   
2023-01-09 -8.144223e+03  -44972.722656  -60519.609375 -18270.179688   
2023-01-10  1.070604e+04  -33991.601562  -51916.066406 -15536.425781   
2023-01-11  2.706486e+04  -24602.679688  -44642.781250 -13213.476562   
2023-01-12  3.248473e+04  -21133.187500  -41760.992188 -12236.724609   
2023-01-13  6.221453e+04   -3875.089111  -28502.175781  -8055.53