In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LinearRegression

In [2]:
%load_ext autoreload
%autoreload 2
from functions import loaddata, MACD, RSI
from portfolio import Portfolio

In [3]:
def sign(x):
    if x > 0:
        return 1
    elif x < 0:
        return 0
    else:
        return 2
    
def my_log(x):
    if x != 0 and np.isnan(x) == False:
        return np.log(x)
    else:
        return x

In [4]:
start_date = 20180101
end_date = 20190101
rtxm_ti2 = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1 = loaddata("rtxm_ti1", start_date, end_date)
r_ti2 = loaddata("r_ti2", start_date, end_date)
volume = loaddata("volall_day", start_date, end_date)
mid_close = loaddata("mid_close", start_date, end_date)
risk = loaddata("bfast_totrisk", start_date, end_date)

In [16]:
average_risk = risk.mean(axis=1, skipna=True).dropna()
bottom_third = average_risk.quantile(.3333)
bottom_third_companies = average_risk[average_risk < bottom_third].index

In [18]:
total = []
for i in bottom_third_companies:
    price_i = mid_close.iloc[i]

    overnight_i = rtxm_ti1.iloc[i, 1:]
    intraday_i = rtxm_ti2.iloc[i,:-1]
    volume_i = volume.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12 = RSI(rtxm_ti2.iloc[i], 12)[:-1]

    macd_26_12_9 = MACD(price_i, 26, 12, 9)[:-1]
   
    y = r_ti2.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data = {'X1': np.array(overnight_i), 'X2': np.array(intraday_i), "X3": np.array(volume_i), 
            "X4": np.array(rsi_12), "X5": np.array(macd_26_12_9),
            "Y": np.array(y)}

    data_train = pd.DataFrame(data)
    new_train = data_train.dropna()

    if new_train.shape[0] > 100:
        total.append(new_train)

final_df = pd.concat(total)

In [19]:
LR = LogisticRegression(max_iter=1000)
temp = final_df.sample(frac=1).reset_index(drop=True)
final_x_training = temp[["X1","X2","X3","X4","X5"]]
final_y_training = temp[["Y"]]
LR.fit(final_x_training, np.ravel(final_y_training))
LR.coef_

array([[ 8.87829533e+00,  1.60371126e+00, -4.66704502e-02,
        -1.32557914e-03,  5.86633663e-02],
       [-8.65992993e+00, -9.42983278e-01, -6.33651531e-02,
         5.82856820e-05, -9.74385558e-02],
       [-2.18365398e-01, -6.60727982e-01,  1.10035603e-01,
         1.26729346e-03,  3.87751895e-02]])

In [20]:
start_date = 20190101
end_date = 20200101
rtxm_ti2_test = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1_test = loaddata("rtxm_ti1", start_date, end_date)
r_ti2_test = loaddata("r_ti2", start_date, end_date)
volume_test = loaddata("volall_day", start_date, end_date)
mid_close_test = loaddata("mid_close", start_date, end_date)

chk = []
for i in bottom_third_companies:
    price_i_test = mid_close_test.iloc[i]

    overnight_i_test = rtxm_ti1_test.iloc[i, 1:]
    intraday_i_test = rtxm_ti2_test.iloc[i,:-1]
    volume_i_test = volume_test.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12_test = RSI(rtxm_ti2_test.iloc[i], 12)[:-1]

    macd_26_12_9_test = MACD(price_i_test, 26, 12, 9)[:-1]

    y_test = r_ti2_test.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data_test = {'X1': np.array(overnight_i_test), 'X2': np.array(intraday_i_test), "X3": np.array(volume_i_test), 
        "X4": np.array(rsi_12_test), "X5": np.array(macd_26_12_9_test),
        "Y": np.array(y_test)}

    data_test = pd.DataFrame(data_test)
    new_test = data_test.dropna()
    X_test = new_test[["X1","X2","X3","X4","X5"]]  
    y_test = new_test[["Y"]]
    if X_test.shape[0] > 100:
        chk.append(LR.score(X_test, y_test))

print(sum(chk)/len(chk), max(chk), min(chk))

0.5244614713384114 0.6790123456790124 0.42083333333333334


In [21]:
print("Mean Score: ", sum(chk)/len(chk))
print("Max Score: ", max(chk))
print("Min Score: ", min(chk))

Mean Score:  0.5244614713384114
Max Score:  0.6790123456790124
Min Score:  0.42083333333333334
