In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import log_loss

In [2]:
%load_ext autoreload
%autoreload 2
from functions import loaddata, MACD, RSI
from portfolio import Portfolio

In [3]:
def sign(x):
    if x > 0:
        return 1
    elif x < 0:
        return 0
    else:
        return 2
    
def my_log(x):
    if x != 0 and np.isnan(x) == False:
        return np.log(x)
    else:
        return x

In [62]:
start_date = 20160101
end_date = 20180101
rtxm_ti2 = loaddata("rtxm_ti2", start_date, end_date)
rtxm_ti1 = loaddata("rtxm_ti1", start_date, end_date)
r_ti1 = loaddata("r_ti1", start_date, end_date)
r_ti2 = loaddata("r_ti2", start_date, end_date)
volume = loaddata("volall_day", start_date, end_date)
mid_open = loaddata("mid_open", start_date, end_date)
mid_close = loaddata("mid_close", start_date, end_date)

start_date1 = 20180101
end_date1 = 20190101
rtxm_ti2_test = loaddata("rtxm_ti2", start_date1, end_date1)
rtxm_ti1_test = loaddata("rtxm_ti1", start_date1, end_date1)
r_ti1_test = loaddata("r_ti1", start_date1, end_date1)
r_ti2_test = loaddata("r_ti2", start_date1, end_date1)
volume_test = loaddata("volall_day", start_date1, end_date1)
mid_close_test = loaddata("mid_close", start_date1, end_date1)
mid_open_test = loaddata("mid_open", start_date1, end_date1)

total = []
for i in range(mid_close.shape[0]):
    price_i = mid_close.iloc[i]

    overnight_i = rtxm_ti1.iloc[i, 1:]
    intraday_i = rtxm_ti2.iloc[i,:-1]
    volume_i = volume.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12 = RSI(rtxm_ti2.iloc[i], 12)[:-1]

    macd_26_12_9 = MACD(price_i, 26, 12, 9)[:-1]
   
    y = r_ti2.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data = {'X1': np.array(overnight_i), 'X2': np.array(intraday_i), "X3": np.array(volume_i), 
            "X4": np.array(rsi_12), "X5": np.array(macd_26_12_9),
            "Y": np.array(y)}

    data_train = pd.DataFrame(data)
    new_train = data_train.dropna()

    if new_train.shape[0] > 100:
        total.append(new_train)

final_df = pd.concat(total)

LR = LogisticRegression(max_iter=1000)
temp = final_df.sample(frac=1).reset_index(drop=True)
final_x_training = temp[["X1","X2","X3","X4","X5"]]
final_y_training = temp[["Y"]]
LR.fit(final_x_training, np.ravel(final_y_training))

total = []
test = []
pls = []
chk = []
for i in range(mid_close_test.shape[0]):
    total_made = 0
    
    price_i_test = mid_close_test.iloc[i]
    open_price_i_test = mid_open_test.iloc[i]
    overnight_i_test = rtxm_ti1_test.iloc[i, 1:]
    intraday_i_test = rtxm_ti2_test.iloc[i,:-1]
    volume_i_test = volume_test.iloc[i, :-1].map(lambda x: my_log(x), na_action="ignore")

    rsi_12_test = RSI(rtxm_ti2_test.iloc[i], 12)[:-1]

    macd_26_12_9_test = MACD(price_i_test, 26, 12, 9)[:-1]

    y_test = r_ti2_test.iloc[i, 1:].map(lambda x: sign(x), na_action="ignore")

    data_test = {'X1': np.array(overnight_i_test), 'X2': np.array(intraday_i_test), "X3": np.array(volume_i_test), 
        "X4": np.array(rsi_12_test), "X5": np.array(macd_26_12_9_test),
        "Y": np.array(y_test)}

    data_test = pd.DataFrame(data_test, index=y_test.index)

    new_test = data_test.dropna()
    X_test = new_test[["X1","X2","X3","X4","X5"]]  
    y_test = new_test[["Y"]]
    
    if X_test.shape[0] > 100:
        chk.append(LR.score(X_test, y_test))
        signals = LR.predict(X_test)
        count = 0

        signal_dates = new_test.index

        for j in range(len(signals)):
            price1 = open_price_i_test.loc[signal_dates[j]]
            price2 = price_i_test.loc[signal_dates[j]]
            pls.append(int(price1 - price2 < 0) == np.array(y_test)[j][0])
            #Short
            if signals[j] == 0:
                gain = price1 - price2
                test.append(gain > 0)
                total_made += gain
            #Long
            elif signals[j] == 1:
                gain = price2 - price1
                test.append(gain > 0)
                total_made += gain
        total.append(total_made)

print("Mean Score: ", sum(chk)/len(chk))
print("Max Score: ", max(chk))
print("Min Score: ", min(chk))
print("Proportion of correct labels: ", sum(pls)/len(pls))
print("Proportion of correct guesses: ", sum(test)/len(test))
print("Total Money Made: ", sum(total))
print("Initial Funds: ", mid_open_test.iloc[:,0].dropna().sum())
print("YoY: ", sum(total) / mid_open_test.iloc[:,0].dropna().sum())


Mean Score:  0.5097640812947896
Max Score:  0.6588235294117647
Min Score:  0.37916666666666665
Proportion of correct labels:  0.9437519453006556
Proportion of correct guesses:  0.4939268968563941
Total Money Made:  4109.165072724187
Initial Funds:  58307.81996846199
YoY:  0.07047365301852797
