In [47]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(2)

In [2]:
%load_ext autoreload
%autoreload 2
from functions import loaddata, MACD, RSI
from portfolio import Portfolio

In [3]:
def sign(x):
    if x > 0:
        return 1
    elif x < 0:
        return 0
    else:
        return 2
    
def my_log(x):
    if x != 0 and np.isnan(x) == False:
        return np.log(x)
    else:
        return x

In [69]:
start_date = 20180101
end_date = 20200101
rtxm_ti2 = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1 = loaddata("rtxm_ti1", start_date, end_date)
r_ti2 = loaddata("r_ti2", start_date, end_date)
volume = loaddata("volall_day", start_date, end_date)
mid_close = loaddata("mid_close", start_date, end_date)
risk = loaddata("bfast_totrisk", start_date, end_date)

In [70]:
average_risk = risk.mean(axis=1, skipna=True).dropna()
bottom_third = average_risk.quantile(.3333)
bottom_third_companies = average_risk[average_risk < bottom_third].index

In [77]:
len(bottom_third_companies)

1272

In [71]:
total = []
for i in bottom_third_companies:
    price_i = mid_close.iloc[i]

    overnight_i = rtxm_ti1.iloc[i, 1:]
    intraday_i = rtxm_ti2.iloc[i,:-1]
    volume_i = volume.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12 = RSI(rtxm_ti2.iloc[i], 12)[:-1]

    macd_26_12_9 = MACD(price_i, 26, 12, 9)[:-1]
   
    y = r_ti2.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data = {'X1': np.array(overnight_i), 'X2': np.array(intraday_i), "X3": np.array(volume_i), 
            "X4": np.array(rsi_12), "X5": np.array(macd_26_12_9),
            "Y": np.array(y)}

    data_train = pd.DataFrame(data)
    new_train = data_train.dropna()

    if new_train.shape[0] > 100:
        total.append(new_train)

final_df = pd.concat(total)

In [72]:
LR = LogisticRegression(max_iter=1000)
temp = final_df.sample(frac=1).reset_index(drop=True)
final_x_training = temp[["X1","X2","X3","X4","X5"]]
final_y_training = temp[["Y"]]
attempt = poly.fit_transform(final_x_training)
LR.fit(attempt, np.ravel(final_y_training))
LR.coef_

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


array([[ 2.54104530e-03,  2.85653945e-03,  4.61982467e-04,
         1.98022542e-02,  3.40156999e-02, -7.07555999e-04,
         2.27544063e-05, -6.29548168e-06,  4.20896407e-02,
         1.36256366e-01,  3.13141593e-06,  1.92822124e-05,
         9.55242938e-03,  2.23619386e-02,  1.61852807e-04,
         3.34712325e-03, -2.01991256e-03,  4.10666127e-03,
        -4.23087821e-05,  1.26700624e-03,  8.03043304e-03],
       [ 1.84318034e-03, -2.79802201e-03, -2.86701525e-04,
         1.31171393e-02,  4.36660535e-02, -2.62720488e-04,
        -1.73326430e-05,  5.64341604e-06, -4.11685745e-02,
        -1.33792168e-01, -8.53824009e-06,  1.99613541e-06,
        -6.45566656e-03, -7.26679639e-03, -1.34279578e-04,
         4.31155730e-03, -2.95241202e-03, -1.63714859e-02,
         1.38089063e-05,  3.93232564e-03,  6.25656760e-03],
       [-4.38422564e-03, -5.85174403e-05, -1.75280941e-04,
        -3.29193935e-02, -7.76817534e-02,  9.70276487e-04,
        -5.42176326e-06,  6.52065640e-07, -9.21066142e

In [64]:
start_date = 20190101
end_date = 20200101
rtxm_ti2_test = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1_test = loaddata("rtxm_ti1", start_date, end_date)
r_ti2_test = loaddata("r_ti2", start_date, end_date)
volume_test = loaddata("volall_day", start_date, end_date)
mid_close_test = loaddata("mid_close", start_date, end_date)

chk = []
for i in bottom_third_companies:
    price_i_test = mid_close_test.iloc[i]

    overnight_i_test = rtxm_ti1_test.iloc[i, 1:]
    intraday_i_test = rtxm_ti2_test.iloc[i,:-1]
    volume_i_test = volume_test.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12_test = RSI(rtxm_ti2_test.iloc[i], 12)[:-1]

    macd_26_12_9_test = MACD(price_i_test, 26, 12, 9)[:-1]

    y_test = r_ti2_test.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data_test = {'X1': np.array(overnight_i_test), 'X2': np.array(intraday_i_test), "X3": np.array(volume_i_test), 
        "X4": np.array(rsi_12_test), "X5": np.array(macd_26_12_9_test),
        "Y": np.array(y_test)}

    data_test = pd.DataFrame(data_test)
    new_test = data_test.dropna()
    X_test = new_test[["X1","X2","X3","X4","X5"]]  
    y_test = new_test[["Y"]]
    if X_test.shape[0] > 100:
        x_test = poly.fit_transform(X_test)
        chk.append(LR.score(x_test, y_test))

print(sum(chk)/len(chk), max(chk), min(chk))

0.5201889111820222 0.6639004149377593 0.4074074074074074


In [55]:
print("Mean Score: ", sum(chk)/len(chk))
print("Max Score: ", max(chk))
print("Min Score: ", min(chk))

Mean Score:  0.5251412435215079
Max Score:  0.6419753086419753
Min Score:  0.40329218106995884


<font size="5"> How much do we make? <font>

In [73]:
start_date = 20200101
end_date = 20210101
rtxm_ti2_test = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1_test = loaddata("rtxm_ti1", start_date, end_date)
r_ti2_test = loaddata("r_ti2", start_date, end_date)
volume_test = loaddata("volall_day", start_date, end_date)
mid_close_test = loaddata("mid_close", start_date, end_date)
mid_open_test = loaddata("mid_open", start_date, end_date)

In [74]:

total = []
test = []
pls = []
for i in bottom_third_companies:
    total_made = 0
    
    price_i_test = mid_close_test.iloc[i]
    open_price_i_test = mid_open_test.iloc[i]
    overnight_i_test = rtxm_ti1_test.iloc[i, 1:]
    intraday_i_test = rtxm_ti2_test.iloc[i,:-1]
    volume_i_test = volume_test.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12_test = RSI(rtxm_ti2_test.iloc[i], 12)[:-1]

    macd_26_12_9_test = MACD(price_i_test, 26, 12, 9)[:-1]

    y_test = r_ti2_test.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data_test = {'X1': np.array(overnight_i_test), 'X2': np.array(intraday_i_test), "X3": np.array(volume_i_test), 
        "X4": np.array(rsi_12_test), "X5": np.array(macd_26_12_9_test),
        "Y": np.array(y_test)}

    data_test = pd.DataFrame(data_test, index=y_test.index)

    new_test = data_test.dropna()
    X_test = new_test[["X1","X2","X3","X4","X5"]]  
    y_test = new_test[["Y"]]
    
    if X_test.shape[0] > 100:
        x_test = poly.fit_transform(X_test)
        signals = LR.predict(x_test)
        count = 0

        signal_dates = new_test.index

        for j in range(len(signals)):
            price1 = open_price_i_test.loc[signal_dates[j]]
            price2 = price_i_test.loc[signal_dates[j]]
            pls.append(int(price1 - price2 < 0) == np.array(y_test)[j][0])
            #Short
            if signals[j] == 0:
                gain = price1 - price2
                test.append(gain > 0)
                total_made += gain
            #Long
            elif signals[j] == 1:
                gain = price2 - price1
                test.append(gain > 0)
                total_made += gain
        total.append(total_made)

print("Proportion of correct labels: ", sum(pls)/len(pls))
print("Proportion of correct guesses: ", sum(test)/len(test))
print("Total Money Made: ", sum(total))

Proportion of correct labels:  0.9495751202790675
Proportion of correct guesses:  0.4893855611400408
Total Money Made:  3667.12910771367


In [75]:
sum(total) / mid_open_test.iloc[:,0][bottom_third_companies.values].dropna().sum()


0.2057963327777884

In [76]:
mid_open_test.iloc[:,0][bottom_third_companies.values].dropna().sum()

17819.2150375843