In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LinearRegression

In [2]:
%load_ext autoreload
%autoreload 2
from functions import loaddata, MACD, RSI
from portfolio import Portfolio

In [4]:
def sign(x):
    if x > 0:
        return 1
    elif x < 0:
        return 0
    else:
        return 2
    
def my_log(x):
    if x != 0 and np.isnan(x) == False:
        return np.log(x)
    else:
        return x

In [5]:
start_date = 20180101
end_date = 20190101
rtxm_ti2 = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1 = loaddata("rtxm_ti1", start_date, end_date)
r_ti2 = loaddata("r_ti2", start_date, end_date)
volume = loaddata("volall_day", start_date, end_date)
mid_close = loaddata("mid_close", start_date, end_date)

In [43]:
LR = LogisticRegression(max_iter=1000)

In [25]:
total_X = []
total_y = []
total = []
for i in range(mid_close.shape[0]):
    price_i = mid_close.iloc[i]

    overnight_i = rtxm_ti1.iloc[i, 1:]
    intraday_i = rtxm_ti2.iloc[i,:-1]
    volume_i = volume.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_10 = RSI(rtxm_ti2.iloc[i], 10)[:-1]
    rsi_12 = RSI(rtxm_ti2.iloc[i], 12)[:-1]
    rsi_14 = RSI(rtxm_ti2.iloc[i], 14)[:-1]

    macd_50_20_10 = MACD(price_i, 50, 20, 10)[:-1]
    macd_26_12_9 = MACD(price_i, 26, 12, 9)[:-1]
    macd_20_10_8 = MACD(price_i, 20, 10, 8)[:-1]
    
   
    y = r_ti2.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data = {'X1': np.array(overnight_i), 'X2': np.array(intraday_i), "X3": np.array(volume_i), 
            "X4": np.array(rsi_10), "X5": np.array(rsi_12), "X6": np.array(rsi_14),
            "X7": np.array(macd_20_10_8), "X8": np.array(macd_26_12_9), "X9": np.array(macd_50_20_10),
            "Y": np.array(y)}

    data_train = pd.DataFrame(data)
    new_train = data_train.dropna()

    if new_train.shape[0] > 100:
        total.append(new_train)

final_df = pd.concat(total)

In [44]:
temp = final_df.sample(frac=1).reset_index(drop=True)
final_x_training = temp[["X1","X2","X3","X4","X5","X6","X7","X8","X9"]]
final_y_training = temp[["Y"]]
LR.fit(final_x_training, np.ravel(final_y_training))
LR.coef_

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


array([[ 2.60788796,  0.37172102, -0.05136446, -0.07533533,  0.20809874,
        -0.13534321, -0.04334461, -0.1439645 ,  0.14889402],
       [-2.60143746, -0.39885285, -0.05952417, -0.04213592,  0.12659379,
        -0.0858193 , -0.20732114,  0.13041857, -0.09675979],
       [-0.0064505 ,  0.02713183,  0.11088863,  0.11747126, -0.33469253,
         0.22116251,  0.25066576,  0.01354593, -0.05213423]])

In [45]:
start_date = 20190101
end_date = 20200101
rtxm_ti2_test = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1_test = loaddata("rtxm_ti1", start_date, end_date)
r_ti2_test = loaddata("r_ti2", start_date, end_date)
volume_test = loaddata("volall_day", start_date, end_date)
mid_close_test = loaddata("mid_close", start_date, end_date)

chk = []
for i in range(4000):
    price_i_test = mid_close_test.iloc[i]

    overnight_i_test = rtxm_ti1_test.iloc[i, 1:]
    intraday_i_test = rtxm_ti2_test.iloc[i,:-1]
    volume_i_test = volume_test.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_10_test = RSI(rtxm_ti2_test.iloc[i], 10)[:-1]
    rsi_12_test = RSI(rtxm_ti2_test.iloc[i], 12)[:-1]
    rsi_14_test = RSI(rtxm_ti2_test.iloc[i], 14)[:-1]

    macd_50_20_10_test = MACD(price_i_test, 50, 20, 10)[:-1]
    macd_26_12_9_test = MACD(price_i_test, 26, 12, 9)[:-1]
    macd_20_10_8_test = MACD(price_i_test, 20, 10, 8)[:-1]

    y_test = r_ti2_test.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data_test = {'X1': np.array(overnight_i_test), 'X2': np.array(intraday_i_test), "X3": np.array(volume_i_test), 
        "X4": np.array(rsi_10_test), "X5": np.array(rsi_12_test), "X6": np.array(rsi_14_test),
        "X7": np.array(macd_20_10_8_test), "X8": np.array(macd_26_12_9_test), "X9": np.array(macd_50_20_10_test),
        "Y": np.array(y_test)}

    data_test = pd.DataFrame(data_test)
    new_test = data_test.dropna()
    X_test = new_test[["X1","X2","X3","X4","X5","X6","X7","X8","X9"]]  
    y_test = new_test[["Y"]]
    if X_test.shape[0] > 100:
        chk.append(LR.score(X_test, y_test))

0.5435684647302904


In [46]:
print(sum(chk)/len(chk), max(chk), min(chk))

0.5214700336612658 0.6625514403292181 0.39090909090909093
