<a href="https://colab.research.google.com/github/byeongchan1/Adv-ALSTM/blob/master/practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 0. 모듈 import 및 cwd 설정


In [1]:
# 모듈 import
from datetime import datetime
import numpy as np
import os
import pandas as pd

from google.colab import drive
drive.mount('/content/gdrive')
os.chdir('/content/gdrive/MyDrive/python/python_dong/data_axis_transform1')

Mounted at /content/gdrive


#1. 데이터 전처리 과정

In [2]:
# data path 지정
raw_data_path = './Adv-ALSTM/data/stocknet-dataset/price/raw'

In [3]:
if 'stocknet' in raw_data_path:
    tra_date = '2014-01-02'
    val_date = '2015-08-03'
    tes_date = '2015-10-01'
    end_date = '2015-12-31'
elif 'kdd17' in raw_data_path:
    tra_date = '2007-01-03'
    val_date = '2015-01-02'
    tes_date = '2016-01-04'
    end_date = '2016-12-31'
else:
    print('unexpected path: %s' % raw_data_path)

In [4]:
print(tra_date, val_date, tes_date, end_date)

2014-01-02 2015-08-03 2015-10-01 2015-12-31


In [5]:
# os.path.isfile : 파일이 있는지 없는 지 체크
# os.path.join(data_path, fname) : 폴더 디렉터리와 fname(stockname.csv) 붙임
fnames = [fname for fname in os.listdir(raw_data_path) if
            os.path.isfile(os.path.join(raw_data_path,fname))]
fnames

['SPLP.csv',
 'CELG.csv',
 'WFC.csv',
 'INTC.csv',
 'JNJ.csv',
 'AAPL.csv',
 'GOOG.csv',
 'BP.csv',
 'GE.csv',
 'BABA.csv',
 'PTR.csv',
 'AMZN.csv',
 'SLB.csv',
 'SRE.csv',
 'KO.csv',
 'UTX.csv',
 'BHP.csv',
 'BRK-A.csv',
 'NEE.csv',
 'DHR.csv',
 'BBL.csv',
 'CVX.csv',
 'NVS.csv',
 'CAT.csv',
 'PICO.csv',
 'VZ.csv',
 'AEP.csv',
 'T.csv',
 'HD.csv',
 'PG.csv',
 'BCH.csv',
 'WMT.csv',
 'SNY.csv',
 'HSBC.csv',
 'AMGN.csv',
 'UPS.csv',
 'NGG.csv',
 'BA.csv',
 'MA.csv',
 'IEP.csv',
 'XOM.csv',
 'BSAC.csv',
 'DIS.csv',
 'BUD.csv',
 'PPL.csv',
 'ABB.csv',
 'CHL.csv',
 'AGFS.csv',
 'SO.csv',
 'GD.csv',
 'TSM.csv',
 'PM.csv',
 'FB.csv',
 'DUK.csv',
 'CHTR.csv',
 'MO.csv',
 'PFE.csv',
 'MSFT.csv',
 'D.csv',
 'HON.csv',
 'UNH.csv',
 'TOT.csv',
 'UN.csv',
 'V.csv',
 'HRG.csv',
 'EXC.csv',
 'CSCO.csv',
 'LMT.csv',
 'MCD.csv',
 'BAC.csv',
 'RDS-B.csv',
 'PCG.csv',
 'UL.csv',
 'C.csv',
 'CODI.csv',
 'SNP.csv',
 'ORCL.csv',
 'PEP.csv',
 'PCLN.csv',
 'MDT.csv',
 'TM.csv',
 'ABBV.csv',
 'MMM.csv',
 'JPM

In [6]:
fname = fnames[0]

In [7]:
df = pd.read_csv(os.path.join(raw_data_path,fname))

In [8]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2012-09-04,10.98,11.08,10.98,10.98,10.874151,7400
1,2012-09-05,10.98,10.99,10.95,10.99,10.884055,22600
2,2012-09-06,11.0,11.1,10.92,10.93,10.824634,23600
3,2012-09-07,11.0,11.0,10.95,10.98,10.874151,88500
4,2012-09-10,10.96,11.06,10.96,10.99,10.884055,87000


In [9]:
COLUMNS_FEATURE_DATA_V1 = ['open_close_ratio', 'high_close_ratio', 
                           'low_close_ratio', 'close_lastclose_ratio', 
                           'adjclose_lastadjclose_ratio', 'close_ma5_ratio', 
                           'close_ma10_ratio', 'close_ma15_ratio', 'close_ma20_ratio', 
                           'close_ma25_ratio', 'close_ma30_ratio']

In [10]:
ver = 'v1' # ver in ['v1', 'v2']
if ver == 'v1':
    COLUMNS_FEATURE = COLUMNS_FEATURE_DATA_V1


In [11]:
windows = [5,10,15,20,25,30]

def preprocess(df, windows):
   '''
   전처리 함수 역할 : 전체 feature생성하여 df column에 추가
   '''
   data = df
   data['open_close_ratio'] = data['Open'] / data['Close'] - 1
   data['high_close_ratio'] = data['High'] / data['Close'] - 1
   data['low_close_ratio'] = data['Low'] / data['Close'] - 1

   data['close_lastclose_ratio'] = np.zeros(len(data))
   data.loc[1:, 'close_lastclose_ratio'] = data['Close'][1:].values / data['Close'][:-1].values - 1

   data['adjclose_lastadjclose_ratio'] = np.zeros(len(data))
   data.loc[1:, 'adjclose_lastadjclose_ratio'] = data['Adj Close'][1:].values / data['Adj Close'][:-1].values - 1

   for window in windows:
      data[f'close_ma{window}_ratio'] = data['Adj Close'].rolling(window).mean()/data['Adj Close'] - 1
   
   data['label'] = np.append((data['Close'][1:].values > data['Close'][:-1].values)*1,0)

   return data

In [12]:
df = preprocess(df, windows)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,open_close_ratio,high_close_ratio,low_close_ratio,close_lastclose_ratio,adjclose_lastadjclose_ratio,close_ma5_ratio,close_ma10_ratio,close_ma15_ratio,close_ma20_ratio,close_ma25_ratio,close_ma30_ratio,label
0,2012-09-04,10.98,11.08,10.98,10.98,10.874151,7400,0.0,0.009107,0.0,0.0,0.0,,,,,,,1
1,2012-09-05,10.98,10.99,10.95,10.99,10.884055,22600,-0.00091,0.0,-0.00364,0.000911,0.000911,,,,,,,0
2,2012-09-06,11.0,11.1,10.92,10.93,10.824634,23600,0.006404,0.015554,-0.000915,-0.00546,-0.005459,,,,,,,1
3,2012-09-07,11.0,11.0,10.95,10.98,10.874151,88500,0.001821,0.001821,-0.002732,0.004575,0.004574,,,,,,,1
4,2012-09-10,10.96,11.06,10.96,10.99,10.884055,87000,-0.00273,0.006369,-0.00273,0.000911,0.000911,-0.001456,,,,,,1


In [13]:
data = df

In [14]:
data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,open_close_ratio,high_close_ratio,low_close_ratio,close_lastclose_ratio,adjclose_lastadjclose_ratio,close_ma5_ratio,close_ma10_ratio,close_ma15_ratio,close_ma20_ratio,close_ma25_ratio,close_ma30_ratio,label
1253,2017-08-28,18.0,18.450001,18.0,18.4,18.4,7000,-0.021739,0.002717,-0.021739,0.016575,0.016575,-0.013587,-0.006359,-0.002246,-0.00087,0.000609,0.001812,0
1254,2017-08-29,18.4,18.4,18.4,18.4,18.4,0,0.0,0.0,0.0,0.0,0.0,-0.011957,-0.007174,-0.002246,-0.001141,0.0005,0.00163,0
1255,2017-08-30,18.15,18.15,18.15,18.15,18.15,700,0.0,0.0,0.0,-0.013587,-0.013587,0.002755,0.004518,0.009881,0.011653,0.013399,0.014747,0
1256,2017-08-31,18.1,18.25,18.1,18.15,18.15,500,-0.002755,0.00551,-0.002755,0.0,0.0,0.004959,0.00314,0.008595,0.010964,0.012408,0.014105,0
1257,2017-09-01,18.049999,18.129999,18.049999,18.08,18.08,700,-0.001659,0.002765,-0.001659,-0.003857,-0.003857,0.008628,0.005808,0.010914,0.014132,0.015398,0.017072,0


# feature 추가한 csv 저장하기

In [15]:
feature_data_path = './Adv-ALSTM/data/stocknet-dataset/price/feature'

In [16]:
fname = fnames[0]
os.path.isfile(os.path.join(feature_data_path,fname))

True

In [17]:
for fname in fnames:
   if not os.path.isfile(os.path.join(feature_data_path,fname)):
      df_raw = pd.read_csv(os.path.join(raw_data_path,fname))
      data = preprocess(df_raw, windows)

      # 폴더 없으면 생성
      try:
         if not os.path.exists(feature_data_path):
            os.makedirs(feature_data_path)
      except OSError:
         print ('Error: Creating directory. ' +  feature_data_path)

      #csv 파일 저장
      data.to_csv(os.path.join(feature_data_path,fname))

# train, validation, test data 나누기

In [18]:
print(tra_date, val_date, tes_date, end_date)

2014-01-02 2015-08-03 2015-10-01 2015-12-31


In [19]:
# learning_data = data[(data['Date'] >= tra_date) & (data['Date'] <= end_date)]
tra_data_X = data[(data['Date'] >= tra_date) & (data['Date'] < val_date)][COLUMNS_FEATURE]
tra_data_Y = data[(data['Date'] >= tra_date) & (data['Date'] < val_date)]['label']

val_data_X = data[(data['Date'] >= val_date) & (data['Date'] < tes_date)][COLUMNS_FEATURE]
val_data_Y = data[(data['Date'] >= val_date) & (data['Date'] < tes_date)]['label']

test_data_X = data[(data['Date'] >= tes_date) & (data['Date'] <= end_date)][COLUMNS_FEATURE]
test_data_Y = data[(data['Date'] >= tes_date) & (data['Date'] <= end_date)]['label']


tra_data_X.head()

Unnamed: 0,open_close_ratio,high_close_ratio,low_close_ratio,close_lastclose_ratio,adjclose_lastadjclose_ratio,close_ma5_ratio,close_ma10_ratio,close_ma15_ratio,close_ma20_ratio,close_ma25_ratio,close_ma30_ratio
333,-0.015367,0.002846,-0.015367,0.01268,0.01268,-0.007513,-0.005521,-0.008234,-0.010074,-0.011793,-0.022387
334,0.003429,0.003429,0.0,-0.003984,-0.003984,-0.004571,-0.001714,-0.003429,-0.005286,-0.007291,-0.015276
335,-0.00578,0.001156,-0.008092,-0.011429,-0.011429,0.006012,0.008671,0.008324,0.006098,0.004092,-0.001522
336,0.0,0.0,0.0,0.0,0.0,0.006012,0.006936,0.008131,0.006069,0.004509,0.000983
337,0.000578,0.000578,0.0,0.0,0.0,0.005434,0.00659,0.007746,0.00604,0.004555,0.002832


In [20]:
test_data_X.tail()

Unnamed: 0,open_close_ratio,high_close_ratio,low_close_ratio,close_lastclose_ratio,adjclose_lastadjclose_ratio,close_ma5_ratio,close_ma10_ratio,close_ma15_ratio,close_ma20_ratio,close_ma25_ratio,close_ma30_ratio
832,0.007898,0.007898,-0.003645,-0.003632,-0.003632,0.012394,0.003159,0.009761,0.016403,0.020462,0.022965
833,0.005505,0.005505,-0.006116,-0.006683,-0.006683,0.01052,0.009847,0.013619,0.020856,0.025615,0.028094
834,0.001833,0.003054,-0.001222,0.001223,0.001223,0.004765,0.009957,0.009733,0.017654,0.022798,0.025779
835,0.004287,0.004287,0.0,-0.002444,-0.002444,0.004654,0.013288,0.010165,0.017942,0.023368,0.026842
836,-0.001831,0.003053,-0.001831,0.003062,0.003062,-0.000122,0.011539,0.005779,0.012637,0.018803,0.022405


# Feature Transformation Layer 만들어야합니다

## 1. input data 3차원으로 쌓기
shape = (stock 종류수, date, feature)

요건
1. 모든 티커 데이터의 date가 맞는지?
2. 결측치는 없는지?
3. 티커와 데이터 메치 가능해야함

In [21]:
raw_data_path = './Adv-ALSTM/data/stocknet-dataset/price/raw'


tra_data_X = []
tra_data_Y = []
val_data_X = []
val_data_Y = []
test_data_X = []
test_data_Y = []
tickers = []

cnt = 0
fail_cnt = 0

fnames = [fname for fname in os.listdir(raw_data_path) if
            os.path.isfile(os.path.join(raw_data_path,fname))]

for fname in fnames:

    df = pd.read_csv(os.path.join(raw_data_path,fname))
    data = preprocess(df, windows)

    learning_data = data[(data['Date'] >= tra_date) & (data['Date'] <= end_date)]['Date']
    tra_data_X_ticker = data[(data['Date'] >= tra_date) & (data['Date'] < val_date)][COLUMNS_FEATURE]
    tra_data_Y_ticker = data[(data['Date'] >= tra_date) & (data['Date'] < val_date)]['label']

    val_data_X_ticker = data[(data['Date'] >= val_date) & (data['Date'] < tes_date)][COLUMNS_FEATURE]
    val_data_Y_ticker = data[(data['Date'] >= val_date) & (data['Date'] < tes_date)]['label']

    test_data_X_ticker = data[(data['Date'] >= tes_date) & (data['Date'] <= end_date)][COLUMNS_FEATURE]
    test_data_Y_ticker = data[(data['Date'] >= tes_date) & (data['Date'] <= end_date)]['label']


    if cnt == 0:
        target_dates = learning_data
    
    print('ticker : {}, date check : {}'.format(fname, np.array_equal(target_dates.values, learning_data.values)))
    if np.array_equal(target_dates.values, learning_data.values): 
        
        tra_data_X.append(tra_data_X_ticker.values)
        tra_data_Y.append(tra_data_Y_ticker.values)

        val_data_X.append(val_data_X_ticker.values)
        val_data_Y.append(val_data_Y_ticker.values)
        
        test_data_X.append(test_data_X_ticker.values)
        test_data_Y.append(test_data_Y_ticker.values)

        tickers.append(fname)
    else : 
        fail_cnt += 1
    
    cnt += 1

print(cnt, len(fnames))
print('fail_cnt :', fail_cnt)

# 마지막에 index 종목 넣기
raw_data_index_path = './Adv-ALSTM/data/stocknet-dataset/price/raw/index'

fname = os.listdir(raw_data_index_path)[0]

df = pd.read_csv(os.path.join(raw_data_index_path,fname))
data = preprocess(df, windows)

learning_data = data[(data['Date'] >= tra_date) & (data['Date'] <= end_date)]['Date']
tra_data_X_ticker = data[(data['Date'] >= tra_date) & (data['Date'] < val_date)][COLUMNS_FEATURE]
tra_data_Y_ticker = data[(data['Date'] >= tra_date) & (data['Date'] < val_date)]['label']

val_data_X_ticker = data[(data['Date'] >= val_date) & (data['Date'] < tes_date)][COLUMNS_FEATURE]
val_data_Y_ticker = data[(data['Date'] >= val_date) & (data['Date'] < tes_date)]['label']

test_data_X_ticker = data[(data['Date'] >= tes_date) & (data['Date'] <= end_date)][COLUMNS_FEATURE]
test_data_Y_ticker = data[(data['Date'] >= tes_date) & (data['Date'] <= end_date)]['label']

print('ticker : {}, date check : {}'.format(fname, np.array_equal(target_dates.values, learning_data.values)))
if np.array_equal(target_dates.values, learning_data.values):
    
    tra_data_X.append(tra_data_X_ticker.values)
    tra_data_Y.append(tra_data_Y_ticker.values)

    val_data_X.append(val_data_X_ticker.values)
    val_data_Y.append(val_data_Y_ticker.values)
    
    test_data_X.append(test_data_X_ticker.values)
    test_data_Y.append(test_data_Y_ticker.values)

    tickers.append(fname)

# tra_data_X

ticker : SPLP.csv, date check : True
ticker : CELG.csv, date check : True
ticker : WFC.csv, date check : True
ticker : INTC.csv, date check : True
ticker : JNJ.csv, date check : True
ticker : AAPL.csv, date check : True
ticker : GOOG.csv, date check : True
ticker : BP.csv, date check : True
ticker : GE.csv, date check : True
ticker : BABA.csv, date check : False
ticker : PTR.csv, date check : True
ticker : AMZN.csv, date check : True
ticker : SLB.csv, date check : True
ticker : SRE.csv, date check : True
ticker : KO.csv, date check : True
ticker : UTX.csv, date check : True
ticker : BHP.csv, date check : True
ticker : BRK-A.csv, date check : True
ticker : NEE.csv, date check : True
ticker : DHR.csv, date check : True
ticker : BBL.csv, date check : True
ticker : CVX.csv, date check : True
ticker : NVS.csv, date check : True
ticker : CAT.csv, date check : True
ticker : PICO.csv, date check : True
ticker : VZ.csv, date check : True
ticker : AEP.csv, date check : True
ticker : T.csv, date 

In [22]:
len(tra_data_X)

86

# layer 생성

## pytorch module import

In [23]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable

import matplotlib.pyplot as plt
import math


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [24]:
device

device(type='cpu')

## Hyperparameter setting

In [25]:
w = 10 # window size w in {10, 15}
beta = 0.01 # market context weight beta in {0.01, 0.1, 1}
h = 64 # hidden layer size h in {64, 128}
learning_rate = 0.001 # in {0.001, 0.0001}
lambda_1 = 1 # selective regularzation lambda = 1
drop_rate = 0.15


In [26]:
def save_model(model, model_path):
    """Save model."""
    torch.save(model.state_dict(), model_path)


def load_model(model, model_path, use_cuda=False):
    """Load model."""
    map_location = 'cpu'
    if use_cuda and torch.cuda.is_available():
        map_location = 'cuda:0'
    model.load_state_dict(torch.load(model_path, map_location))
    return model

## Define model, trainer

In [27]:
#예시
# Size: [batch_size, seq_len, input_size]
input = torch.randn(12, 384, 768)

lstm = nn.LSTM(input_size=768, hidden_size=512, batch_first=True)

output, _ = lstm(input)
output.size()  # => torch.Size([12, 384, 512])

torch.Size([12, 384, 512])

In [28]:
numpy_tra_data_X = np.array(tra_data_X)
tensor_tra_data_X = torch.Tensor(numpy_tra_data_X)

In [29]:
tensor_tra_data_X.size()

torch.Size([86, 398, 11])

In [30]:
feature_size = tensor_tra_data_X.size()[2]
feature_size

11

In [31]:
h

64

### Feature Transformation

In [32]:
feature_transformation_linear = nn.Linear(feature_size, h)
# nn.Linear는 마지막 차원에 feature size만 있으면 그 앞에 어떤 차원이 있던지 마지막 차원 기준으로 linear 층 생성해준다. 너무 좋다

In [33]:
output = torch.tanh(feature_transformation_linear(tensor_tra_data_X))

In [34]:
output.size()

torch.Size([86, 398, 64])

### window size로 input값 제한

In [35]:
time_idx = 0
train_timesteps = int(tensor_tra_data_X.size()[1]) - w # time_idx의 끝을 의미하고 싶다. -1 해야하는지?
Z_tilda = output[:, time_idx:time_idx + w, :]

In [36]:
output[:, train_timesteps+1:train_timesteps + w + 1, :].size() # -1 하면 사이즈 줄어든다. 이전이 마지막이라는 뜻

torch.Size([86, 9, 64])

In [37]:
len(tickers)

86

### LSTM

In [38]:
# Temporal LSTM
# LSTM input size = (batch, seq_len, input_size)
# 일단 batch_size는 1로 해보자.

# 티커별로 lstm 층 개별로 만들기
for i in range(len(tickers)): # 마지막은 index용 lstm
    globals()['lstm_{}'.format(i)] = nn.LSTM(input_size = h, hidden_size=h, batch_first=True)


# 동적변수 추가법
# globals()에는 dict형태로 global 변수가 저장되어 있음
# 이를 이용하여 동적 변수 추가 가능 (https://congcoding.tistory.com/55)

In [39]:
ticker_idx = 0
Z_tilda[ticker_idx,:,:].size() # 사이즈 변형해서 넣어야 한다.
# LSTM input size = (batch, seq_len, input_size) 으로 batch_size 1 만들어야 함

torch.Size([10, 64])

In [40]:
Z_tilda[ticker_idx,:,:].view(1,Z_tilda.size()[1], Z_tilda.size()[2]).size() # shape 변경 완료

torch.Size([1, 10, 64])

In [41]:
Z_tilda[ticker_idx,:,:].view(1,Z_tilda.size()[1], Z_tilda.size()[2])[0,0,0] - Z_tilda[ticker_idx, 0, 0]
# 데이터 검증 완료

tensor(0., grad_fn=<SubBackward0>)

In [42]:
z_tilda = Z_tilda[ticker_idx,:,:].view(1,Z_tilda.size()[1], Z_tilda.size()[2])
z_tilda.size()

torch.Size([1, 10, 64])

In [43]:
torch.Tensor().new_zeros((1,w,h)).size()

torch.Size([1, 10, 64])

In [44]:
# hidden state ticker별 쌓기
H_n = torch.Tensor().new_zeros((len(tickers),w,h)) # +1은 index

for ticker_idx in range(len(tickers)):
    i = 0
    z_tilda = Z_tilda[ticker_idx,:,:].view(1,Z_tilda.size()[1], Z_tilda.size()[2])
    lstm_output, (h_n, c_n) = globals()['lstm_{}'.format(ticker_idx)](z_tilda[:,i,:].view(1,1,Z_tilda.size()[2]))
    H_n[ticker_idx,0,:] = h_n
    for i in range(1,w):
        lstm_output, (h_n, c_n) = globals()['lstm_{}'.format(ticker_idx)](z_tilda[:,i,:].view(1,1,Z_tilda.size()[2]), (h_n, c_n))
        H_n[ticker_idx,i,:] = h_n

In [45]:
H_n.size()

torch.Size([86, 10, 64])

In [46]:
lstm_output.size()

torch.Size([1, 1, 64])

In [47]:
h_n.size() # seq_len = 10이므로, h_n 10개 얻고 싶어요... H_n에 모아둠

torch.Size([1, 1, 64])

In [48]:
torch.dot(H_n[0][0], H_n[0][-1])

tensor(0.1407, grad_fn=<DotBackward>)

In [49]:
torch.matmul(H_n[0], H_n[0][-1])

tensor([0.1407, 0.2134, 0.2514, 0.2711, 0.2815, 0.2868, 0.2896, 0.2911, 0.2915,
        0.2925], grad_fn=<MvBackward>)

In [50]:
H_n_dot = torch.Tensor().new_zeros((len(tickers),w)) # 알파_i 계산하기전 ticker별로 dot(h_i, h_T) 계산
# size (len(tickers), window_size w)
# https://pytorch.org/docs/stable/generated/torch.matmul.html
# torch.mul broadcasting 볼수 있음
for i in range(len(tickers)):
    H_n_dot[i,:] = torch.matmul(H_n[i], H_n[i][-1])

In [51]:
H_n_dot[0] # 데이터 체크 완료

tensor([0.1407, 0.2134, 0.2514, 0.2711, 0.2815, 0.2868, 0.2896, 0.2911, 0.2915,
        0.2925], grad_fn=<SelectBackward>)

In [52]:
H_n_dot.size() # 사이즈 체크 완료

torch.Size([86, 10])

In [53]:
torch.exp(H_n_dot).size()

torch.Size([86, 10])

In [54]:
torch.exp(H_n_dot)[0]

tensor([1.1510, 1.2379, 1.2858, 1.3115, 1.3251, 1.3321, 1.3358, 1.3379, 1.3384,
        1.3398], grad_fn=<SelectBackward>)

In [55]:
torch.exp(H_n_dot[0][0]) # 데이터 체크 완료

tensor(1.1510, grad_fn=<ExpBackward>)

In [56]:
# https://pytorch.org/docs/stable/generated/torch.nn.Softmax.html
Alpha = nn.Softmax(dim=1)(torch.exp(H_n_dot))

In [57]:
H_n[0].size()

torch.Size([10, 64])

In [58]:
H_n[0].transpose(-2,-1).size()

torch.Size([64, 10])

In [59]:
H_n_tilde = torch.Tensor().new_zeros((len(tickers), h))
for i in range(len(tickers)):
    H_n_tilde[i,:] = torch.matmul(H_n[i].transpose(-2,-1), Alpha[i])

In [60]:
H_n_tilde.size()

torch.Size([86, 64])

### Context Normalization

In [61]:
torch.std_mean(H_n_tilde, dim=1, unbiased=False)[0].repeat((h,1)).transpose(-2,-1)

tensor([[0.0603, 0.0603, 0.0603,  ..., 0.0603, 0.0603, 0.0603],
        [0.0811, 0.0811, 0.0811,  ..., 0.0811, 0.0811, 0.0811],
        [0.0716, 0.0716, 0.0716,  ..., 0.0716, 0.0716, 0.0716],
        ...,
        [0.0662, 0.0662, 0.0662,  ..., 0.0662, 0.0662, 0.0662],
        [0.0574, 0.0574, 0.0574,  ..., 0.0574, 0.0574, 0.0574],
        [0.0693, 0.0693, 0.0693,  ..., 0.0693, 0.0693, 0.0693]],
       grad_fn=<TransposeBackward0>)

In [62]:
H_n_tilde - torch.std_mean(H_n_tilde, dim=1, unbiased=False)[1].repeat((h,1)).transpose(-2,-1)

tensor([[-0.0862, -0.1012,  0.0868,  ...,  0.0613, -0.0418, -0.0521],
        [-0.0706,  0.0229,  0.0906,  ...,  0.1488, -0.0964, -0.0043],
        [-0.1369, -0.0038,  0.0513,  ..., -0.0007, -0.0151, -0.0470],
        ...,
        [ 0.0389, -0.0547, -0.0148,  ...,  0.0521,  0.1303, -0.0836],
        [-0.0812,  0.1340,  0.0197,  ...,  0.0328, -0.0388, -0.0739],
        [ 0.0893, -0.1552, -0.1130,  ...,  0.0253,  0.0651,  0.0289]],
       grad_fn=<SubBackward0>)

In [63]:
(H_n_tilde - torch.std_mean(H_n_tilde, dim=1, unbiased=False)[1].repeat((h,1)).transpose(-2,-1)).size()

torch.Size([86, 64])

In [64]:
torch.std_mean(H_n_tilde, dim=1, unbiased=False)[0].size()

torch.Size([86])

In [65]:
torch.std_mean(H_n_tilde, dim=1, unbiased=False)[0].repeat((h,1)).transpose(-2,-1).size()

torch.Size([86, 64])

In [66]:
torch.std_mean(H_n_tilde, dim=1, unbiased=False)[0].repeat((h,1)).transpose(-2,-1)[0]

tensor([0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603,
        0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603,
        0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603,
        0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603,
        0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603,
        0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603,
        0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603, 0.0603,
        0.0603], grad_fn=<SelectBackward>)

In [67]:
(H_n_tilde - torch.std_mean(H_n_tilde, dim=1, unbiased=False)[1].repeat((h,1)).transpose(-2,-1))[0]

tensor([-0.0862, -0.1012,  0.0868, -0.0103,  0.0044,  0.0034, -0.0407, -0.0322,
         0.0024,  0.0856, -0.0944, -0.0330,  0.0052,  0.1654,  0.0771,  0.0179,
        -0.1144, -0.0329,  0.0353, -0.0114,  0.0187, -0.0511, -0.0201,  0.0416,
        -0.0130,  0.0018, -0.0610, -0.0493, -0.0018,  0.0010, -0.0628, -0.0290,
        -0.0625,  0.1016, -0.0940,  0.0363, -0.0358,  0.1271,  0.0417, -0.0523,
        -0.0699, -0.0593,  0.1095,  0.0458, -0.0575,  0.0605,  0.0593, -0.0617,
         0.0876, -0.0478, -0.0752,  0.0315,  0.0075,  0.0181,  0.0135,  0.0274,
         0.0089, -0.0137,  0.0709,  0.0357,  0.0779,  0.0613, -0.0418, -0.0521],
       grad_fn=<SelectBackward>)

In [68]:
(H_n_tilde - torch.std_mean(H_n_tilde, dim=1, unbiased=False)[1].repeat((h,1)).transpose(-2,-1))/torch.std_mean(H_n_tilde, dim=1, unbiased=False)[0].repeat((h,1)).transpose(-2,-1)

tensor([[-1.4307, -1.6795,  1.4394,  ...,  1.0177, -0.6938, -0.8637],
        [-0.8701,  0.2827,  1.1162,  ...,  1.8336, -1.1882, -0.0530],
        [-1.9122, -0.0531,  0.7159,  ..., -0.0093, -0.2106, -0.6568],
        ...,
        [ 0.5876, -0.8262, -0.2238,  ...,  0.7871,  1.9668, -1.2622],
        [-1.4141,  2.3346,  0.3433,  ...,  0.5723, -0.6765, -1.2873],
        [ 1.2889, -2.2396, -1.6302,  ...,  0.3653,  0.9396,  0.4169]],
       grad_fn=<DivBackward0>)

In [69]:
((H_n_tilde - torch.std_mean(H_n_tilde, dim=1, unbiased=False)[1].repeat((h,1)).transpose(-2,-1))/torch.std_mean(H_n_tilde, dim=1, unbiased=False)[0].repeat((h,1)).transpose(-2,-1)).size()

torch.Size([86, 64])

In [70]:
pre_H_c_n = (H_n_tilde - torch.std_mean(H_n_tilde, dim=1, unbiased=False)[1].repeat((h,1)).transpose(-2,-1))/torch.std_mean(H_n_tilde, dim=1, unbiased=False)[0].repeat((h,1)).transpose(-2,-1)

In [71]:
pre_H_c_n.size()

torch.Size([86, 64])

In [72]:
# ticker별로 linear layer 생성
for i in range(len(tickers)):
    globals()['ContextNormalLinearLayer_{}'.format(i)] = nn.Linear(h, h)

In [73]:
H_c_n = torch.Tensor().new_zeros(len(tickers), h)
for i in range(len(tickers)):
    H_c_n[i,:]= globals()['ContextNormalLinearLayer_{}'.format(i)](pre_H_c_n[i])

In [74]:
H_c_n.size()

torch.Size([86, 64])

## 3.3 Multi-Level Context Aggregation

In [75]:
H = H_c_n + beta*H_c_n[-1]
H = H[:-1] # 마지막 index context vector 제외
tickers = tickers[:-1] # 마지막 index ticker 제외

In [76]:
H.size()

torch.Size([85, 64])

In [77]:
len(tickers)

85

## 3.4 Data-Axis Self-Attention

### Self-Attention

In [78]:
query_layer = nn.Linear(h, h)
key_layer = nn.Linear(h, h)
value_layer = nn.Linear(h, h)

In [79]:
Q = query_layer(H)
K = key_layer(H)
V = value_layer(H)

In [80]:
# S 생성
S = torch.matmul(Q, K.transpose(-2,-1))
S = S/math.sqrt(h)
S = nn.Softmax(dim=1)(S)

In [81]:
# H_tilda 생성
H_tilda = torch.matmul(S,V)

### Nonlinear Transformation

In [119]:
mlp1 = nn.Linear(h, 4h)
mlp2 = nn.Linear(4h,h)

H_p = H + H_tilda
H_p = mlp1(H_p)
H_p = nn.ReLU(H_p)



tensor([[0.0129, 0.0126, 0.0122,  ..., 0.0104, 0.0136, 0.0134],
        [0.0123, 0.0125, 0.0121,  ..., 0.0139, 0.0133, 0.0148],
        [0.0134, 0.0155, 0.0122,  ..., 0.0111, 0.0125, 0.0122],
        ...,
        [0.0111, 0.0126, 0.0118,  ..., 0.0105, 0.0114, 0.0109],
        [0.0120, 0.0122, 0.0119,  ..., 0.0113, 0.0114, 0.0110],
        [0.0134, 0.0141, 0.0116,  ..., 0.0109, 0.0127, 0.0128]],
       grad_fn=<SoftmaxBackward>)

In [120]:
S.size()

torch.Size([85, 85])

In [121]:
S[0].sum()

tensor(1., grad_fn=<SumBackward0>)

In [115]:
0.0337/0.0547

0.6160877513711152

In [108]:
a = torch.Tensor([[1,2,3],[4,5,6]])
b = torch.Tensor([[3,3,3],[1,1,1]])

In [109]:
a*b

tensor([[3., 6., 9.],
        [4., 5., 6.]])

In [126]:
3*a

tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]])

In [102]:
a/b

tensor([0.3333, 0.6667, 1.0000])

In [103]:
a*b

tensor([3., 6., 9.])

In [100]:
a

tensor([1., 2., 3.])

In [84]:
(H_n_tilde - torch.std_mean(H_n_tilde, dim=1, unbiased=False)[1].repeat((h,1)).transpose(-2,-1))

RuntimeError: ignored

In [83]:
(H_n_tilde - torch.std_mean(H_n_tilde, dim=1, unbiased=False)[0])/torch.std_mean(H_n_tilde, dim=1, unbiased=False)[1].size()

RuntimeError: ignored

In [None]:
train_timesteps = int(tensor_tra_data_X.size()[1])


n_iter = 0
epoch = 0


input = output[ticker_idx,idx : idx + w, :]
input.size()

torch.Size([10, 64])

In [None]:
output_lstm, (h_n, c_n) = lstm(input)

In [None]:
h_n.size()

torch.Size([1, 84, 64])

In [None]:
sample_x = tra_data_X.values
sample_y = tra_data_Y.values

In [None]:
class Encoder(nn.Module):
    """encoder in DA_RNN."""

    def __init__(self, T,
                 input_size,
                 encoder_num_hidden,
                 parallel=False):
        """Initialize an encoder in DA_RNN."""
        super(Encoder, self).__init__()
        self.encoder_num_hidden = encoder_num_hidden
        self.input_size = input_size
        self.parallel = parallel
        self.T = T

        # Fig 1. Temporal Attention Mechanism: Encoder is LSTM
        self.encoder_lstm = nn.LSTM(
            input_size=self.input_size,
            hidden_size=self.encoder_num_hidden,
            num_layers = 1
        )

        # Construct Input Attention Mechanism via deterministic attention model
        # Eq. 8: W_e[h_{t-1}; s_{t-1}] + U_e * x^k
        self.encoder_attn = nn.Linear(
            in_features=2 * self.encoder_num_hidden + self.T - 1,
            out_features=1
        )

    def forward(self, X):
        """forward.

        Args:
            X: input data

        """
        X_tilde = Variable(X.data.new(
            X.size(0), self.T - 1, self.input_size).zero_())
        X_encoded = Variable(X.data.new(
            X.size(0), self.T - 1, self.encoder_num_hidden).zero_())

        # Eq. 8, parameters not in nn.Linear but to be learnt
        # v_e = torch.nn.Parameter(data=torch.empty(
        #     self.input_size, self.T).uniform_(0, 1), requires_grad=True)
        # U_e = torch.nn.Parameter(data=torch.empty(
        #     self.T, self.T).uniform_(0, 1), requires_grad=True)

        # h_n, s_n: initial states with dimention hidden_size
        h_n = self._init_states(X)
        s_n = self._init_states(X)

        for t in range(self.T - 1):
            # batch_size * input_size * (2 * hidden_size + T - 1)
            x = torch.cat((h_n.repeat(self.input_size, 1, 1).permute(1, 0, 2),
                           s_n.repeat(self.input_size, 1, 1).permute(1, 0, 2),
                           X.permute(0, 2, 1)), dim=2)

            x = self.encoder_attn(
                x.view(-1, self.encoder_num_hidden * 2 + self.T - 1))

            # get weights by softmax
            alpha = F.softmax(x.view(-1, self.input_size))

            # get new input for LSTM
            x_tilde = torch.mul(alpha, X[:, t, :])

            # Fix the warning about non-contiguous memory
            # https://discuss.pytorch.org/t/dataparallel-issue-with-flatten-parameter/8282
            self.encoder_lstm.flatten_parameters()

            # encoder LSTM
            _, final_state = self.encoder_lstm(x_tilde.unsqueeze(0), (h_n, s_n))
            h_n = final_state[0]
            s_n = final_state[1]

            X_tilde[:, t, :] = x_tilde
            X_encoded[:, t, :] = h_n

        return X_tilde, X_encoded

    def _init_states(self, X):
        """Initialize all 0 hidden states and cell states for encoder.

        Args:
            X

        Returns:
            initial_hidden_states
        """
        # https://pytorch.org/docs/master/nn.html?#lstm
        return Variable(X.data.new(1, X.size(0), self.encoder_num_hidden).zero_())


class Decoder(nn.Module):
    """decoder in DA_RNN."""

    def __init__(self, T, decoder_num_hidden, encoder_num_hidden):
        """Initialize a decoder in DA_RNN."""
        super(Decoder, self).__init__()
        self.decoder_num_hidden = decoder_num_hidden
        self.encoder_num_hidden = encoder_num_hidden
        self.T = T

        self.attn_layer = nn.Sequential(
            nn.Linear(2 * decoder_num_hidden + encoder_num_hidden, encoder_num_hidden),
            nn.Tanh(),
            nn.Linear(encoder_num_hidden, 1)
        )
        self.lstm_layer = nn.LSTM(
            input_size=1,
            hidden_size=decoder_num_hidden
        )
        self.fc = nn.Linear(encoder_num_hidden + 1, 1)
        self.fc_final = nn.Linear(decoder_num_hidden + encoder_num_hidden, 1)

        self.fc.weight.data.normal_()

    def forward(self, X_encoded, y_prev):
        """forward."""
        d_n = self._init_states(X_encoded)
        c_n = self._init_states(X_encoded)

        for t in range(self.T - 1):

            x = torch.cat((d_n.repeat(self.T - 1, 1, 1).permute(1, 0, 2),
                           c_n.repeat(self.T - 1, 1, 1).permute(1, 0, 2),
                           X_encoded), dim=2)

            beta = F.softmax(self.attn_layer(
                x.view(-1, 2 * self.decoder_num_hidden + self.encoder_num_hidden)).view(-1, self.T - 1))

            # Eqn. 14: compute context vector
            # batch_size * encoder_hidden_size
            context = torch.bmm(beta.unsqueeze(1), X_encoded)[:, 0, :]
            if t < self.T - 1:
                # Eqn. 15
                # batch_size * 1
                y_tilde = self.fc(
                    torch.cat((context, y_prev[:, t].unsqueeze(1)), dim=1))

                # Eqn. 16: LSTM
                self.lstm_layer.flatten_parameters()
                _, final_states = self.lstm_layer(
                    y_tilde.unsqueeze(0), (d_n, c_n))

                d_n = final_states[0]  # 1 * batch_size * decoder_num_hidden
                c_n = final_states[1]  # 1 * batch_size * decoder_num_hidden

        # Eqn. 22: final output
        y_pred = self.fc_final(torch.cat((d_n[0], context), dim=1))

        return y_pred

    def _init_states(self, X):
        """Initialize all 0 hidden states and cell states for encoder.

        Args:
            X
        Returns:
            initial_hidden_states

        """
        # hidden state and cell state [num_layers*num_directions, batch_size, hidden_size]
        # https://pytorch.org/docs/master/nn.html?#lstm
        return Variable(X.data.new(1, X.size(0), self.decoder_num_hidden).zero_())


class DA_rnn(nn.Module):
    """da_rnn."""

    def __init__(self, X, y, T,
                 encoder_num_hidden,
                 decoder_num_hidden,
                 batch_size,
                 learning_rate,
                 epochs,
                 parallel=False):
        """da_rnn initialization."""
        super(DA_rnn, self).__init__()
        self.encoder_num_hidden = encoder_num_hidden
        self.decoder_num_hidden = decoder_num_hidden
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.parallel = parallel
        self.shuffle = False
        self.epochs = epochs
        self.T = T
        self.X = X
        self.y = y

        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        print("==> Use accelerator: ", self.device)

        self.Encoder = Encoder(input_size=X.shape[1],
                               encoder_num_hidden=encoder_num_hidden,
                               T=T).to(self.device)
        self.Decoder = Decoder(encoder_num_hidden=encoder_num_hidden,
                               decoder_num_hidden=decoder_num_hidden,
                               T=T).to(self.device)

        # Loss function
        self.criterion = nn.CrossEntropyLoss()

        if self.parallel:
            self.encoder = nn.DataParallel(self.encoder)
            self.decoder = nn.DataParallel(self.decoder)

        self.encoder_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad,
                                                          self.Encoder.parameters()),
                                            lr=self.learning_rate)
        self.decoder_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad,
                                                          self.Decoder.parameters()),
                                            lr=self.learning_rate)

        # Training set
        self.train_timesteps = int(self.X.shape[0] * 0.7)
        self.y = self.y - np.mean(self.y[:self.train_timesteps])
        self.input_size = self.X.shape[1]

    def train(self):
        """training process."""
        iter_per_epoch = int(np.ceil(self.train_timesteps * 1. / self.batch_size))
        self.iter_losses = np.zeros(self.epochs * iter_per_epoch)
        self.epoch_losses = np.zeros(self.epochs)

        n_iter = 0

        for epoch in range(self.epochs):
            if self.shuffle:
                ref_idx = np.random.permutation(self.train_timesteps - self.T)
            else:
                ref_idx = np.array(range(self.train_timesteps - self.T))

            idx = 0

            while (idx < self.train_timesteps):
                # get the indices of X_train
                indices = ref_idx[idx:(idx + self.batch_size)]
                # x = np.zeros((self.T - 1, len(indices), self.input_size))
                x = np.zeros((len(indices), self.T - 1, self.input_size))
                y_prev = np.zeros((len(indices), self.T - 1))
                y_gt = self.y[indices + self.T]

                # format x into 3D tensor
                for bs in range(len(indices)):
                    x[bs, :, :] = self.X[indices[bs]:(indices[bs] + self.T - 1), :]
                    y_prev[bs, :] = self.y[indices[bs]: (indices[bs] + self.T - 1)]

                loss = self.train_forward(x, y_prev, y_gt)
                self.iter_losses[int(epoch * iter_per_epoch + idx / self.batch_size)] = loss

                idx += self.batch_size
                n_iter += 1

                if n_iter % 10000 == 0 and n_iter != 0:
                    for param_group in self.encoder_optimizer.param_groups:
                        param_group['lr'] = param_group['lr'] * 0.9
                    for param_group in self.decoder_optimizer.param_groups:
                        param_group['lr'] = param_group['lr'] * 0.9

                self.epoch_losses[epoch] = np.mean(self.iter_losses[range(
                    epoch * iter_per_epoch, (epoch + 1) * iter_per_epoch)])

            if epoch % 10 == 0:
                print("Epochs: ", epoch, " Iterations: ", n_iter,
                      " Loss: ", self.epoch_losses[epoch])

            if epoch % 10 == 0:
                y_train_pred = self.test(on_train=True)
                y_test_pred = self.test(on_train=False)
                y_pred = np.concatenate((y_train_pred, y_test_pred))
                plt.ioff()
                plt.figure()
                plt.plot(range(1, 1 + len(self.y)), self.y, label="True")
                plt.plot(range(self.T, len(y_train_pred) + self.T),
                         y_train_pred, label='Predicted - Train')
                plt.plot(range(self.T + len(y_train_pred), len(self.y) + 1),
                         y_test_pred, label='Predicted - Test')
                plt.legend(loc='upper left')
                plt.show()

            # # Save files in last iterations
            # if epoch == self.epochs - 1:
            #     np.savetxt('../loss.txt', np.array(self.epoch_losses), delimiter=',')
            #     np.savetxt('../y_pred.txt',
            #                np.array(self.y_pred), delimiter=',')
            #     np.savetxt('../y_true.txt',
            #                np.array(self.y_true), delimiter=',')

    def train_forward(self, X, y_prev, y_gt):
        """
        Forward pass.

        Args:
            X:
            y_prev:
            y_gt: Ground truth label

        """
        # zero gradients
        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()

        input_weighted, input_encoded = self.Encoder(
            Variable(torch.from_numpy(X).type(torch.FloatTensor).to(self.device)))
        y_pred = self.Decoder(input_encoded, Variable(
            torch.from_numpy(y_prev).type(torch.FloatTensor).to(self.device)))

        y_true = Variable(torch.from_numpy(
            y_gt).type(torch.FloatTensor).to(self.device))

        y_true = y_true.view(-1, 1)
        loss = self.criterion(y_pred, y_true)
        loss.backward()

        self.encoder_optimizer.step()
        self.decoder_optimizer.step()

        return loss.item()


    def test(self, on_train=False):
        """test."""

        if on_train:
            y_pred = np.zeros(self.train_timesteps - self.T + 1)
        else:
            y_pred = np.zeros(self.X.shape[0] - self.train_timesteps)

        i = 0
        while i < len(y_pred):
            batch_idx = np.array(range(len(y_pred)))[i: (i + self.batch_size)]
            X = np.zeros((len(batch_idx), self.T - 1, self.X.shape[1]))
            y_history = np.zeros((len(batch_idx), self.T - 1))

            for j in range(len(batch_idx)):
                if on_train:
                    X[j, :, :] = self.X[range(
                        batch_idx[j], batch_idx[j] + self.T - 1), :]
                    y_history[j, :] = self.y[range(
                        batch_idx[j], batch_idx[j] + self.T - 1)]
                else:
                    X[j, :, :] = self.X[range(
                        batch_idx[j] + self.train_timesteps - self.T, batch_idx[j] + self.train_timesteps - 1), :]
                    y_history[j, :] = self.y[range(
                        batch_idx[j] + self.train_timesteps - self.T, batch_idx[j] + self.train_timesteps - 1)]

            y_history = Variable(torch.from_numpy(
                y_history).type(torch.FloatTensor).to(self.device))
            _, input_encoded = self.Encoder(
                Variable(torch.from_numpy(X).type(torch.FloatTensor).to(self.device)))
            y_pred[i:(i + self.batch_size)] = self.Decoder(input_encoded,
                                                           y_history).cpu().data.numpy()[:, 0]
            i += self.batch_size

        return y_pred

In [None]:
X = sample_x
y= sample_y

batchsize = 128
nhidden_encoder = 128
nhidden_decoder = 128
ntimestep = 10
lr = 0.001
epochs = 100

In [None]:
# Initialize model
print("==> Initialize DA-RNN model ...")
model = DA_rnn(
    X,
    y,
    ntimestep,
    nhidden_encoder,
    nhidden_decoder,
    batchsize,
    lr,
    epochs
)

# Train
print("==> Start training ...")
model.train()

# Prediction
y_pred = model.test()

fig1 = plt.figure()
plt.semilogy(range(len(model.iter_losses)), model.iter_losses)
plt.savefig("1.png")
plt.close(fig1)

fig2 = plt.figure()
plt.semilogy(range(len(model.epoch_losses)), model.epoch_losses)
plt.savefig("2.png")
plt.close(fig2)

fig3 = plt.figure()
plt.plot(y_pred, label='Predicted')
plt.plot(model.y[model.train_timesteps:], label="True")
plt.legend(loc='upper left')
plt.savefig("3.png")
plt.close(fig3)
print('Finished Training')

==> Initialize DA-RNN model ...
==> Use accelerator:  cpu
==> Start training ...




RuntimeError: ignored