In [5]:
import os
import numpy as np
import pandas as pd
from datetime import datetime as time
from functools import reduce
from datetime import timedelta
import datetime

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data


In [7]:
codes = ['065450','013810','005870','010820','003570','119500','103140','012450', \
         '047810', '079550']
names = ['빅텍', '스페코','휴니드', '퍼스텍', 'S&T중공업', '포메탈','풍산','한화에어로스페이스', \
         '한국항공우주', 'LIG넥스원']
start = '2000-01-01'

dfList = []
for code in codes:
    df = pd.read_csv(os.path.join('dataset', code+'_daily.csv'), header=0, usecols=[0,1,4], names = \
                      ['Date','open','close'])
    df['Date_o'] = [datetime.datetime.strptime(str(m),'%Y%m%d')+timedelta(hours=9) for m in df['Date']]
    df['Date_c'] = [datetime.datetime.strptime(str(m),'%Y%m%d')+timedelta(hours=15) for m in df['Date']]
    price_o = pd.DataFrame({'Date': df['Date_o'], code : df['open']})
    price_c = pd.DataFrame({'Date': df['Date_c'], code: df['close']})

    dfList.append(pd.concat([price_o, price_c], axis=0))    

merged = reduce(lambda x, y: pd.merge(x, y, how = 'outer', on = 'Date'),dfList)
merged = merged.set_index('Date')
merged = merged.sort_index()
merged = merged[start:] #원데이터

rtn = np.log(merged/merged.shift(1))*100 #수익률 계산

rtn.shape


(9298, 10)

In [8]:
rtn

Unnamed: 0_level_0,065450,013810,005870,010820,003570,119500,103140,012450,047810,079550
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-04 09:00:00,,,,,,,,,,
2000-01-04 15:00:00,,0.000000,11.286476,0.000000,25.343811,,,12.185692,,
2000-01-05 09:00:00,,-12.745269,9.720671,13.875616,7.198429,,,-5.044853,,
2000-01-05 15:00:00,,5.979803,4.225255,-8.941564,6.711113,,,2.133035,,
2000-01-06 09:00:00,,6.525849,13.567340,-0.828734,13.355956,,,1.674263,,
2000-01-06 15:00:00,,-17.833113,0.000000,-7.187000,-9.531018,,,-1.674263,,
2000-01-07 09:00:00,,-11.939724,-1.330397,-6.941877,-2.533635,,,0.421201,,
2000-01-07 15:00:00,,-0.838183,4.367506,20.833800,-8.002417,,,12.619280,,
2000-01-10 09:00:00,,4.926844,4.184711,13.902858,9.277258,,,1.471065,,
2000-01-10 15:00:00,,-14.612747,9.754315,0.000000,-14.993099,,,-5.246286,,


In [152]:
#customizing dataset

class my_data():
    
    # 데이터 로딩해서 수익률 형태로 변환
    # 추후에 원데이터 넣는다면 여기서 코드 바꿔야 함
    codes = ['065450','013810','005870','010820','003570','119500','103140','012450', \
             '047810', '079550']
    names = ['빅텍', '스페코','휴니드', '퍼스텍', 'S&T중공업', '포메탈','풍산','한화에어로스페이스', \
             '한국항공우주', 'LIG넥스원']
    start = '2000-01-01'

    dfList = []
    for code in codes:
        df = pd.read_csv(code+'_daily.csv', header=0, usecols=[0,1,4], names = \
                          ['Date','open','close'])
        df['Date_o'] = [datetime.strptime(str(m),'%Y%m%d')+timedelta(hours=9) for m in df['Date']]
        df['Date_c'] = [datetime.strptime(str(m),'%Y%m%d')+timedelta(hours=15) for m in df['Date']]
        price_o = pd.DataFrame({'Date': df['Date_o'], code : df['open']})
        price_c = pd.DataFrame({'Date': df['Date_c'], code: df['close']})

        dfList.append(pd.concat([price_o, price_c], axis=0))    

    merged = reduce(lambda x, y: pd.merge(x, y, how = 'outer', on = 'Date'),dfList)
    merged = merged.set_index('Date')
    merged = merged.sort_index()
    merged = merged[start:] #원데이터

    rtn = np.log(merged/merged.shift(1))*100 #수익률 계산
    
    def __init__(self, train = True, ratio = 0.7, C = 5):
        self.train = train
        
        if self.train:
            
            
            
            self.train_data =[]
            self.train_label =[]
    
    def __getitem__(self, idx):
        if self.train:
            data, target = self.train_data[idx], self.train_label[idx]
        else:
            data, target = self.test_data[idx], self.test_label[idx]  
        return data, target
    
    def __len__(self):
        if self.train:
            return len(self.train_data)
        else:
            return len(self.test_data) 
        


IndentationError: expected an indented block (<ipython-input-152-622aa64b1255>, line 38)

In [13]:
#hyperparameter setting
sequence_length = 10
input_size = 2 
hidden_size = 40
num_layers = 2
num_classes = 4
batch_size = 50
num_epochs = 2
learning_rate = 0.01


In [None]:
#data loader

train_dataset = my_data(train=True)
test_dataset = my_data(train=False)


train_loader = data.DataLoader(dataset=train_dataset, 
                               batch_size = batch_size, shuffle = False)

test_loader = data.DataLoader(dataset=test_dataset, 
                               batch_size = batch_size, shuffle = False)


In [None]:
#model setting: encoder

class encoder(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()

        # architecture
        self.embed = nn.Embedding(vocab_size, EMBED_SIZE, padding_idx = PAD_IDX)
        self.rnn = nn.GRU( # LSTM or GRU
            input_size = EMBED_SIZE,
            hidden_size = HIDDEN_SIZE // NUM_DIRS,
            num_layers = NUM_LAYERS,
            bias = True,
            batch_first = True,
            dropout = DROPOUT,
            bidirectional = BIDIRECTIONAL
        )

        if CUDA:
            self = self.cuda()

    def init_hidden(self, rnn_type): # initialize hidden states
        h = zeros(NUM_LAYERS * NUM_DIRS, BATCH_SIZE, HIDDEN_SIZE // NUM_DIRS) # hidden states
        if rnn_type == "LSTM":
            c = zeros(NUM_LAYERS * NUM_DIRS, BATCH_SIZE, HIDDEN_SIZE // NUM_DIRS) # cell states
            return (h, c)
        return h

    def forward(self, x, mask):
        self.hidden = self.init_hidden("GRU") # LSTM or GRU
        x = self.embed(x)
        x = nn.utils.rnn.pack_padded_sequence(x, mask[1], batch_first = True)
        h, _ = self.rnn(x, self.hidden)
        h, _ = nn.utils.rnn.pad_packed_sequence(h, batch_first = True)
        return h

In [None]:
model = LSTM(input_size, hidden_size, num_layers, num_classes)