In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy
from time import time
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
from datetime import datetime, timedelta
import gc

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

## Functions

In [None]:
def options_FE(df_day,date):
    total_volume = df_day.TradingVolume.sum()
    df_put = df_day[df_day.Putcall==1]
    put_volume = df_put.TradingVolume.sum()
    call_volume = total_volume-put_volume
    put_ratio = put_volume/total_volume
    total_volume_day = df_day.WholeDayVolume.sum()
    put_volume_day = df_put.WholeDayVolume.sum()
    call_volume_day = total_volume_day-put_volume_day
    put_ratio_day = put_volume_day/total_volume_day
    call_call_ratio = call_volume_day/call_volume
    put_put_ratio = put_volume_day/put_volume
    df_day['rel_vol'] = df_day['ImpliedVolatility']/df_day['BaseVolatility']
    avg_vol = np.nanmean(df_day['rel_vol'])
    med_vol = np.nanmedian(df_day['rel_vol'])
    std_vol = np.nanstd(df_day['rel_vol'])    
    dividend, IR, DR, vol = df_day[['Dividend','InterestRate','DividendRate','BaseVolatility']].mean()
    
    return [date, total_volume, 
            put_ratio,
            total_volume_day,
            put_ratio_day,
            call_call_ratio,
            put_put_ratio,
            avg_vol,
            med_vol,
            std_vol, 
            dividend, IR, DR, vol]

options_feature_names = ['date','total_volume', 
            'put_ratio',
            'total_volume_day',
            'put_ratio_day',
            'call_call_ratio',
            'put_put_ratio',
            'avg_vol',
            'med_vol',
            'std_vol', 
            'dividend', 'IR', 'DR', 'vol']


In [None]:
def options_feature(options):
    options_grouped = options.groupby('Date')
    
    list_df = [] 

    for date, df in tqdm(options_grouped):
        list_df.append(options_FE(df,date))

    df_result = pd.DataFrame(np.array(list_df),columns=options_feature_names).set_index('date').astype('float32')
    df_result.index = pd.to_datetime(df_result.index, format="%Y-%m-%d")
    
    del options_grouped
    del list_df
    #gc.collect()
    
    return df_result

Trades

In [None]:
def gen_dates(b_date, days):
    day = timedelta(days=1)
    for i in range(days):
        yield b_date + day*i


def get_date_list(start=None, end=None):

    if start is None:
        start = datetime.strptime("2000-01-01", "%Y-%m-%d")
    if end is None:
        end = datetime.now()
    data = []
    for d in gen_dates(start, (end-start).days+1):
        data.append(d)
    return data

In [None]:
def fill_date(df):
    df_sales = df.TotalSales.diff().dropna()
    df_purchases = df.TotalPurchases.diff().dropna()
    df_total = df.TotalTotal.diff().dropna()
    df_balance = df.TotalBalance.diff().dropna()
    df_sales.reset_index(drop=True, inplace=True)
    df_purchases.reset_index(drop=True, inplace=True)
    df_total.reset_index(drop=True, inplace=True)
    df_balance.reset_index(drop=True, inplace=True)
    df_sales_ratio = df_sales/df.TotalSales[:-1].tolist()
    df_purchases_ratio = df_purchases/df.TotalPurchases[:-1].tolist()
    df_total_ratio = df_total/df.TotalTotal[:-1].tolist()
    df_balance_ratio = df_balance/df.TotalBalance[:-1].tolist()
    date_value = np.zeros((1,5))
    for i in range(1,df.shape[0]):
        date_list = get_date_list(df.loc[i,"StartDate"],df.loc[i,"EndDate"])
        value_list = [df_sales_ratio[i-1],df_purchases_ratio[i-1],df_total_ratio[i-1],df_balance_ratio[i-1]]
        values = np.tile(np.array(value_list).reshape(1,-1),(len(date_list),1))
        dates = np.array(date_list).reshape(-1,1)
        date_value_sub = np.hstack((dates,values))
        date_value = np.vstack((date_value,date_value_sub))
    date_value_df = pd.DataFrame(date_value[1:,:],index=None,columns=['Date','TotalSales','TotalPurchases','TotalTotal','TotalBalance'])
    date_value_df = date_value_df.drop_duplicates(subset=['Date'])
    
    del df_sales, df_purchases, df_total, df_balance, df_sales_ratio, df_purchases_ratio, df_total_ratio, df_balance_ratio
    del date_value, date_list, value_list, values, dates, date_value_sub
    #gc.collect()
    
    return date_value_df

In [None]:
def trades_feature(trades):
    trades_sub = trades[pd.isna(trades["StartDate"]) != True]
    trades_sub.reset_index(drop=True, inplace=True)
    trades_subset = trades_sub.loc[:,'StartDate':'TotalBalance']
    trades_subset["StartDate"] = pd.to_datetime(trades_subset["StartDate"], format="%Y-%m-%d")
    trades_subset["EndDate"] = pd.to_datetime(trades_subset["EndDate"], format="%Y-%m-%d")
    trades_subset["StartDate"] = trades_subset["StartDate"].dt.date
    trades_subset["EndDate"] = trades_subset["EndDate"].dt.date
    growth = trades_subset[trades_subset["Section"] == "Growth Market (Mothers/JASDAQ)"]
    prime = trades_subset[trades_subset["Section"] == "Prime Market (First Section)"]
    standard = trades_subset[trades_subset["Section"] == "Standard Market (Second Section)"]
    growth.reset_index(drop=True, inplace=True)
    prime.reset_index(drop=True, inplace=True)
    standard.reset_index(drop=True, inplace=True)
    growth_processed = fill_date(growth)
    prime_processed = fill_date(prime)
    standard_processed = fill_date(standard)
    
    del trades_sub, trades_subset, growth, prime, standard
    #gc.collect()
    
    return growth_processed, prime_processed, standard_processed


Stock features

In [None]:
def RSI(series, period):
    delta = series.diff().dropna()
    u = delta * 0
    d = u.copy()
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    u[u.index[period-1]] = np.mean( u[:period] ) #first value is sum of avg gains
    u = u.drop(u.index[:(period-1)])
    d[d.index[period-1]] = np.mean( d[:period] ) #first value is sum of avg losses
    d = d.drop(d.index[:(period-1)])
    rs = pd.DataFrame.ewm(u, com=period-1, adjust=False).mean() / \
         pd.DataFrame.ewm(d, com=period-1, adjust=False).mean()
    
    del delta, u, d
    #gc.collect()
    
    return 100 - 100 / (1 + rs)

In [None]:
def MACD_add(data_fct,N = [12,26], M = 9):
    
   
    time_now = time()
    
    data = copy.deepcopy(data_fct)
    
 
    output = copy.deepcopy(data_fct)

    EMA1 = data['Close'].ewm(span = N[0], adjust = False).mean()
    EMA2 = data['Close'].ewm(span = N[1], adjust = False).mean()
    DIFF = EMA1 - EMA2
    DEA = DIFF.ewm(span = M, adjust = False).mean()
    

    MACD = 2*(DIFF - DEA)
    MACD.rename('MACD',inplace = True)
    DIFF.rename('DIFF',inplace = True)
    DEA.rename('DEA',inplace = True)
    
    output = output.merge(DIFF,left_index = True,right_index = True, how = 'left')
    output = output.merge(DEA,left_index = True,right_index = True, how = 'left')
    output = output.merge(MACD,left_index = True,right_index = True, how = 'left')
    
    del data, EMA1, EMA2, DIFF, DEA, MACD
    #gc.collect()
    
    return(output)

In [None]:
def KDJ_add(data_fct,N = 9):

    time_now = time()
  
    def _KDJ_f(series):
        kdj = series.copy()
        for i in range(len(series)):
            if i == 0:
                kdj[i] = 50
            else:
                kdj[i] = 2/3 * kdj[i-1] + 1/3 * series[i]
        return(kdj)
  
    data = copy.deepcopy(data_fct)

    Lowest = data['Low'].rolling(N).min()
    Highest = data['High'].rolling(N).max()
    RSV = (data['Close'] - Lowest)/(Highest - Lowest)*100
    RSV.dropna(inplace = True)
  

    KDJ_K = _KDJ_f(RSV)
    KDJ_K.rename('KDJ_K',inplace = True)
    KDJ_D = _KDJ_f(KDJ_K)
    KDJ_D.rename('KDJ_D',inplace = True)
    KDJ_J = 3 * KDJ_D - 2 * KDJ_K
    KDJ_J.rename('KDJ_J',inplace = True)


    output = copy.deepcopy(data_fct)

    output = output.merge(KDJ_K,left_index = True,right_index = True,how = 'left')
    
    output = output.merge(KDJ_D,left_index = True,right_index = True,how = 'left')
    
    output = output.merge(KDJ_J,left_index = True,right_index = True,how = 'left')
  

    
    del data, Lowest, Highest, RSV, KDJ_K, KDJ_D, KDJ_J
    #gc.collect()
    
    return(output)

In [None]:
def Boll_add(data_fct,N = 20):
    
  
    time_now = time()

    data = copy.deepcopy(data_fct)
    

    MA = data['Close'].rolling(N).mean()
    Std = data['Close'].rolling(N).std()
    

    BollUp = MA + 2*Std 
    BollUp.rename('BollUp',inplace = True)
    

    BollDown = MA - 2*Std 
    BollDown.rename('BollDown',inplace = True)
    

    output = copy.deepcopy(data_fct)

    output = output.merge(BollUp,left_index = True,right_index = True, how = 'left')

    output = output.merge(BollDown,left_index = True,right_index = True, how = 'left')
    

    del data, MA, Std, BollUp, BollDown
    #gc.collect()
    
    return(output)

adjust price

In [None]:
def calculate_adjusted_prices(df_security):
    
    df_security.fillna(method='ffill', inplace=True)
    
    df_security[['Open', 'High', 'Low', 'Close']]= df_security[['Open', 'High', 'Low', 'Close']]\
        .multiply(
            df_security.AdjustmentFactor.sort_index(ascending=False)\
                .cumprod()\
                .sort_index(ascending=True), 
            axis=0)
    return df_security

padding

In [None]:
def padding(array,max_len):
    if array.shape[0] < max_len:
        initial = array[0,:].reshape(1,-1)
        pad_np = np.tile(initial,(max_len-array.shape[0],1))
        array_new = np.vstack((pad_np,array))
        
        del initial, pad_np
        
    else:
        array_new = array
    
    
    gc.collect()
    
    return array_new

split data

In [None]:
def split_data(stock, lookback):
    #data_raw = stock.to_numpy() # convert to numpy array
    data = []
    
    # create all possible sequences of length seq_len
    for index in range(stock.shape[0] - lookback + 1): 
        data.append(stock[index: index + lookback])
    
    data = np.array(data)
    #test_set_size = int(np.round(0.2*data.shape[0]))
    #train_set_size = data.shape[0] - (test_set_size)
    
    x_train = data[:,:-1,:]
    y_train = data[:,-1,-1].reshape(-1,1,)
    
    del data
    gc.collect()
    
    return [x_train, y_train]

In [None]:
def data_processing(stock_prices,options_all,trades_all):
    # stock_price
    stock_prices_copy = stock_prices.set_index('Date',inplace = False)
    stock_prices_copy.index = pd.to_datetime(stock_prices_copy.index, format="%Y-%m-%d")
    stock_prices_copy['SupervisionFlag'] = stock_prices_copy['SupervisionFlag'].map({True: 1, False: 0})
    stock_ids = stock_prices_copy.SecuritiesCode.unique()
    stocks_list = []
    for stock_id in stock_ids:
        stock = stock_prices_copy[stock_prices_copy["SecuritiesCode"]==stock_id]
        stock = calculate_adjusted_prices(stock)
        stocks_list.append(stock)
    stocks = np.array(stocks_list)
    del stocks_list
    
    
    # add RSI
    for i in range(len(stocks)):
        stocks[i]['RSI'] = RSI(stocks[i]['Close'], 14)
    # add MACD
    for i in range(len(stocks)):
        stocks[i] = MACD_add(stocks[i])
    # add KDJ
    for i in range(len(stocks)):
        stocks[i] = KDJ_add(stocks[i])
    # add BOLL
    for i in range(len(stocks)):
        stocks[i] = Boll_add(stocks[i])
        
    # add options
    options_pro = options_feature(options_all)
    for i in range(len(stocks)):
        stocks[i] = stocks[i].merge(options_pro,left_index = True,right_index = True, how = 'left')
        
    # add Trades
    growth, prime, standard = trades_feature(trades_all)
    prime.set_index('Date',inplace = True)
    prime.index = pd.to_datetime(prime.index, format="%Y-%m-%d")
    standard.set_index('Date',inplace = True)
    standard.index = pd.to_datetime(standard.index, format="%Y-%m-%d")
    growth.set_index('Date',inplace = True)
    growth.index = pd.to_datetime(growth.index, format="%Y-%m-%d")
    for i in range(len(stocks)):
        s_code = stocks[i].SecuritiesCode.unique()[0]
        market = stock_list[stock_list["SecuritiesCode"]==s_code].NewMarketSegment.to_list()[0]
        if 'Prime Market' in market:
            stocks[i] = stocks[i].merge(prime,left_index = True,right_index = True, how = 'left')
        if 'Standard Market' in market:
            stocks[i] = stocks[i].merge(standard,left_index = True,right_index = True, how = 'left')
        if 'Growth Market' in market:
            stocks[i] = stocks[i].merge(growth,left_index = True,right_index = True, how = 'left') 
    
    del stock_prices_copy
    del options_pro
    del growth
    del prime
    del standard
    gc.collect()

    return stocks, stock_ids

## Main

Load Data and add features

In [None]:
# Load Data
stock_prices = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv")
options_all = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/options.csv")
trades_all = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/trades.csv")
stock_list = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/stock_list.csv")

stock_prices_sup = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/stock_prices.csv")
options_sup = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/options.csv")
trades_sup = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/trades.csv")

stock_prices = pd.concat([stock_prices,stock_prices_sup])
options_all = pd.concat([options_all,options_sup])
trades_all = pd.concat([trades_all,trades_sup])

In [None]:
del stock_prices_sup
del options_sup
del trades_sup
gc.collect()

In [None]:
stocks, stock_ids = data_processing(stock_prices,options_all,trades_all)

Data processing and PCA

In [None]:
# Drop columns
for i in range(len(stocks)):
    stocks[i] = stocks[i].drop(columns=['RowId', 'SecuritiesCode','AdjustmentFactor','ExpectedDividend', 'Target'])

# Scale data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1,1))
column_list = stocks[0].columns
for i in range(len(stocks)):
    for column in column_list:
        stocks[i][column] = scaler.fit_transform(stocks[i][column].values.reshape(-1,1))
        
# fill nan
for i in range(len(stocks)):
    stocks[i] = stocks[i].fillna(-1)
    
# PCA
from sklearn.decomposition import PCA
for i in range(len(stocks)):
    feature_1 = stocks[i][['Open', 'High', 'Low', 'Close', 'Volume', 'SupervisionFlag', 'RSI', 'DIFF', 'DEA', 'MACD',
       'KDJ_K', 'KDJ_D', 'KDJ_J', 'BollUp', 'BollDown']]
    pca_1 = PCA(n_components=3)
    pca_1.fit(feature_1)
    PC_1 = pca_1.fit_transform(feature_1)
    feature_2 = stocks[i][['total_volume',
       'put_ratio', 'total_volume_day', 'put_ratio_day', 'call_call_ratio',
       'put_put_ratio', 'avg_vol', 'med_vol', 'std_vol', 'dividend', 'IR',
       'DR', 'vol']]
    pca_2 = PCA(n_components=4)
    pca_2.fit(feature_2)
    PC_2 = pca_2.fit_transform(feature_2)
    feature_3 = stocks[i][['TotalSales', 'TotalPurchases', 'TotalTotal',
       'TotalBalance']]
    pca_3 = PCA(n_components=2)
    pca_3.fit(feature_3)
    PC_3 = pca_3.fit_transform(feature_3)
    stocks[i] = np.hstack((PC_1,PC_2,PC_3)) 

# add Target
for i in range(len(stocks)):
    s_code = stock_ids[i]
    target = stock_prices[stock_prices["SecuritiesCode"]==s_code][["Target"]].reset_index(drop = True).fillna(-1).values.reshape(-1,1)
    stocks[i] = np.hstack((stocks[i],target))
    
# padding
max_len = stocks[0].shape[0]
for i in range(len(stocks)):
    stocks[i] = padding(stocks[i],max_len)
    
# Make a few changes
stocks = stocks.tolist()
stocks = np.array(stocks, dtype=np.float64)


In [None]:
stock_prices.to_csv("./stock_prices.csv",index=False)
options_all.to_csv("./options_all.csv",index=False)
trades_all.to_csv("./trades_all.csv",index=False)

In [None]:
del stock_prices, options_all, trades_all, stock_list
gc.collect()

In [None]:
# split data
lookback = 14
x_train_list = []
y_train_list = []

for i in range(stocks.shape[0]):
    x_train, y_train = split_data(stocks[i,:,:], lookback)
    x_train_list.append(x_train)
    y_train_list.append(y_train)
    
# switch to tensor
X_train = np.array(x_train_list, dtype=np.float64)
X_train_tensor = torch.from_numpy(X_train)
y_train = np.array(y_train_list, dtype=np.float64)
y_train_tensor = torch.from_numpy(y_train)


In [None]:
del X_train
del y_train
del x_train_list
del y_train_list
del stocks, stock_ids
gc.collect()

Dataset and Dataloader

In [None]:
class stock_Dataset(Dataset):
    def __init__(self,X,y):
        self.X = X
        self.y = y

    def __len__(self):
        self.len = len(self.X)
        return self.len

    def __getitem__(self, i):
        X_sub = self.X[i]
        y_sub = self.y[i]
        return X_sub,y_sub
    
batch_size = 64
train_dataset = stock_Dataset(X_train_tensor,y_train_tensor)
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
#test_dataset = stock_Dataset(X_test_tensor,y_test_tensor)
#test_dataloader = DataLoader(test_dataset, batch_size, shuffle=False)

LSTM Model

In [None]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state

        self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size,
                          num_layers=self.num_layers, batch_first=True) #lstm
        self.fc =  nn.Linear(self.hidden_size, self.num_classes)
    
    def forward(self,x):
        #h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_() #hidden state
        #c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_() #internal state
        #print(h_0.shape)
        #print(c_0.shape)
        # Propagate input through LSTM
        #out, (hn, cn) = self.lstm(x, (h_0.detach(), c_0.detach())) #lstm with input, hidden, and internal state
        #print(x.shape)
        out, (hn, cn) = self.lstm(x) #lstm with input, hidden, and internal state
        out = self.fc(out[:,-1,:]) #Final Output
        return out

In [None]:
def train(dataloader, model, criterion, optimizer):
    epoch_loss = 0
    size = len(dataloader.dataset)
    model.train()
    for batch, (X,y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        loss = 0
        for i in range(len(X)):
            x = X[i]
            pred = model(x)
            loss_sub = criterion(pred,y[i])
            loss += loss_sub/len(X)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()*len(X)

    epoch_loss = epoch_loss/size
    print("loss: %f" % epoch_loss)
    
    del batch, size, epoch_loss, loss_sub, loss

In [None]:
num_epochs = 200 #1000 epochs
learning_rate = 0.00005 #0.001 lr
input_size = 10 #number of features
hidden_size = 30 #number of features in hidden state
num_layers = 2 #number of stacked lstm layers
num_classes = 1 #number of output classes 

model = LSTM(num_classes, input_size, hidden_size, num_layers).to(device)
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [None]:
for epoch in range(num_epochs):
    model = model.double()
    print("Epoch: %d" % epoch)
    train(train_dataloader, model, criterion, optimizer)

In [None]:
del train_dataset
del train_dataloader
del X_train_tensor
del y_train_tensor
gc.collect()

In [None]:
del criterion, optimizer
gc.collect()

In [None]:
import numpy as np
import jpx_tokyo_market_prediction
env = jpx_tokyo_market_prediction.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test files

In [None]:
count = 0
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    print("count = ", count)
    predictions = {}
    
    stock_prices = pd.read_csv("./stock_prices.csv")
    options_all = pd.read_csv("./options_all.csv")
    trades_all = pd.read_csv("./trades_all.csv")
    stock_list = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/stock_list.csv")
    
    test_date = prices.Date.unique()[0]
    stock_prices = stock_prices[stock_prices["Date"]<test_date]
    options_all = options_all[options_all["Date"]<test_date]
    trades_all = trades_all[trades_all["Date"]<test_date]
    
    stocks, stock_ids = data_processing(stock_prices,options_all,trades_all)
    
    # Drop columns
    for i in range(len(stocks)):
        stocks[i] = stocks[i].drop(columns=['RowId', 'SecuritiesCode','AdjustmentFactor','ExpectedDividend','Target'])

    # Scale data
    scaler = MinMaxScaler(feature_range=(-1,1))
    column_list = stocks[0].columns
    for i in range(len(stocks)):
        for column in column_list:
            stocks[i][column] = scaler.fit_transform(stocks[i][column].values.reshape(-1,1))

    # fill nan
    for i in range(len(stocks)):
        stocks[i] = stocks[i].fillna(-1)

    # PCA
    for i in range(len(stocks)):
        feature_1 = stocks[i][['Open', 'High', 'Low', 'Close', 'Volume', 'SupervisionFlag', 'RSI', 'DIFF', 'DEA', 'MACD',
       'KDJ_K', 'KDJ_D', 'KDJ_J', 'BollUp', 'BollDown']]
        PC_1 = pca_1.transform(feature_1)
        feature_2 = stocks[i][['total_volume',
       'put_ratio', 'total_volume_day', 'put_ratio_day', 'call_call_ratio',
       'put_put_ratio', 'avg_vol', 'med_vol', 'std_vol', 'dividend', 'IR',
       'DR', 'vol']]
        PC_2 = pca_2.transform(feature_2)
        feature_3 = stocks[i][['TotalSales', 'TotalPurchases', 'TotalTotal',
       'TotalBalance']]
        PC_3 = pca_3.transform(feature_3)
        stocks[i] = np.hstack((PC_1,PC_2,PC_3)) 

    # add Target
    for i in range(len(stocks)):
        s_code = stock_ids[i]
        target = stock_prices[stock_prices["SecuritiesCode"]==s_code][["Target"]].reset_index(drop = True).fillna(-1).values.reshape(-1,1)
        stocks[i] = np.hstack((stocks[i],target))
        del target, s_code
    


    # padding
    max_len = stocks[0].shape[0]
    for i in range(len(stocks)):
        stocks[i] = padding(stocks[i],max_len)

    # Make a few changes
    stocks = stocks.tolist()
    stocks = np.array(stocks, dtype=np.float64)
    
    gc.collect()
    
    for i in range(stocks.shape[0]):
        X_test = stocks[i,:,:].reshape(1,stocks[i,:,:].shape[0],-1)
        X_test = X_test[:,-lookback+1:,:]
        X_tensor = torch.from_numpy(X_test).to(device)
        pred = model(X_tensor)
        predictions[stock_ids[i]] = pred.detach().cpu().numpy().flatten()[0]
        del X_test, X_tensor, pred
        
    prices["Target"] = prices["SecuritiesCode"].map(predictions) 
    prices_new = prices.copy()
    prices_new["Rank"] = prices_new.groupby("Date")["Target"].rank(ascending=False, method="first") - 1 
    prices_new["Rank"] = prices_new["Rank"].astype("int")
    pred_rank = prices_new.set_index("SecuritiesCode")["Rank"]
    sample_prediction['Rank'] = sample_prediction["SecuritiesCode"].map(pred_rank)
    del prices_new
    del pred_rank
    del stocks, stock_ids
    gc.collect()
    
    if test_date > stock_prices.Date.unique()[-1]:
        stock_prices = pd.concat([stock_prices,prices]).reset_index(drop = True)
        options_all = pd.concat([options_all,options]).reset_index(drop = True)
        trades_all = pd.concat([trades_all,trades]).reset_index(drop = True)
    
    del prices, options, trades, test_date
    
    stock_prices.to_csv("./stock_prices.csv",index=False)
    options_all.to_csv("./options_all.csv",index=False)
    trades_all.to_csv("./trades_all.csv",index=False)
    del stock_prices, options_all, trades_all, stock_list
    gc.collect()
    
    env.predict(sample_prediction)
    count += 1