In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import log_loss

In [3]:
%load_ext autoreload
%autoreload 2
from functions import loaddata, MACD, RSI
from portfolio import Portfolio

In [4]:
def sign(x):
    if x > 0:
        return 1
    elif x < 0:
        return 0
    else:
        return 2
    
def my_log(x):
    if x != 0 and np.isnan(x) == False:
        return np.log(x)
    else:
        return x

In [5]:
start_date = 20180101
end_date = 20190101
rtxm_ti2 = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1 = loaddata("rtxm_ti1", start_date, end_date)
r_ti2 = loaddata("r_ti2", start_date, end_date)
volume = loaddata("volall_day", start_date, end_date)
mid_close = loaddata("mid_close", start_date, end_date)

In [6]:
total = []
for i in range(mid_close.shape[0]):
    price_i = mid_close.iloc[i]

    overnight_i = rtxm_ti1.iloc[i, 1:]
    intraday_i = rtxm_ti2.iloc[i,:-1]
    volume_i = volume.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12 = RSI(rtxm_ti2.iloc[i], 12)[:-1]

    macd_26_12_9 = MACD(price_i, 26, 12, 9)[:-1]
   
    y = r_ti2.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data = {'X1': np.array(overnight_i), 'X2': np.array(intraday_i), "X3": np.array(volume_i), 
            "X4": np.array(rsi_12), "X5": np.array(macd_26_12_9),
            "Y": np.array(y)}

    data_train = pd.DataFrame(data)
    new_train = data_train.dropna()

    if new_train.shape[0] > 100:
        total.append(new_train)

final_df = pd.concat(total)

In [7]:
LR = LogisticRegression(max_iter=1000)
temp = final_df.sample(frac=1).reset_index(drop=True)
final_x_training = temp[["X1","X2","X3","X4","X5"]]
final_y_training = temp[["Y"]]
LR.fit(final_x_training, np.ravel(final_y_training))
LR.coef_

array([[ 5.13531121e+00,  5.64270511e-01, -3.39915056e-02,
        -7.96109833e-05,  3.44841506e-02],
       [-5.19678581e+00, -3.26368829e-01, -5.18425874e-02,
         7.56905443e-05, -5.68939120e-02],
       [ 6.14745924e-02, -2.37901682e-01,  8.58340930e-02,
         3.92043894e-06,  2.24097614e-02]])

In [8]:
start_date = 20190101
end_date = 20200101
rtxm_ti2_test = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1_test = loaddata("rtxm_ti1", start_date, end_date)
r_ti2_test = loaddata("r_ti2", start_date, end_date)
volume_test = loaddata("volall_day", start_date, end_date)
mid_close_test = loaddata("mid_close", start_date, end_date)

chk = []
for i in range(mid_close_test.shape[0]):
    price_i_test = mid_close_test.iloc[i]

    overnight_i_test = rtxm_ti1_test.iloc[i, 1:]
    intraday_i_test = rtxm_ti2_test.iloc[i,:-1]
    volume_i_test = volume_test.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12_test = RSI(rtxm_ti2_test.iloc[i], 12)[:-1]

    macd_26_12_9_test = MACD(price_i_test, 26, 12, 9)[:-1]

    y_test = r_ti2_test.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data_test = {'X1': np.array(overnight_i_test), 'X2': np.array(intraday_i_test), "X3": np.array(volume_i_test), 
        "X4": np.array(rsi_12_test), "X5": np.array(macd_26_12_9_test),
        "Y": np.array(y_test)}

    data_test = pd.DataFrame(data_test)
    new_test = data_test.dropna()
    X_test = new_test[["X1","X2","X3","X4","X5"]]  
    y_test = new_test[["Y"]]
    
    if X_test.shape[0] > 100:
        chk.append(LR.score(X_test, y_test))

print(sum(chk)/len(chk), max(chk), min(chk))

0.5233934336670714 0.6625514403292181 0.4074074074074074


In [9]:
print("Mean Score: ", sum(chk)/len(chk))
print("Max Score: ", max(chk))
print("Min Score: ", min(chk))

Mean Score:  0.5233934336670714
Max Score:  0.6625514403292181
Min Score:  0.4074074074074074
