In [27]:
import pandas as pd
import numpy as np
import rex
import targeter
from sklearn.model_selection import cross_val_score, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import SMOTE

In [218]:
engine = create_engine("mysql+pymysql://rex:#Pass123@localhost/new_ml")
sql_d = "SELECT * FROM `gbpusd_d` ORDER BY `index` ASC"
df = pd.read_sql(sql_d, engine, index_col="index")

In [79]:
def double_up(in_df, fac=2):
    drops = ['bs3_mean', 'w3_mean', 's3_mean', 'bs3_std', 'w3_std', 's3_std',
       'bs5_mean', 'w5_mean', 's5_mean', 'bs5_std', 'w5_std', 's5_std',
       'bs10_mean', 'w10_mean', 's10_mean', 'bs10_std', 'w10_std', 's10_std',
       'stoch3_mean', 'rsi3_mean', 'macd3_mean', 'adx3_mean', 'stoch3_std',
       'rsi3_std', 'macd3_std', 'adx3_std', 'stoch5_mean', 'rsi5_mean',
       'macd5_mean', 'adx5_mean', 'stoch5_std', 'rsi5_std', 'macd5_std',
       'adx5_std', 'stoch10_mean', 'rsi10_mean', 'macd10_mean', 'adx10_mean',
       'stoch10_std', 'rsi10_std', 'macd10_std', 'adx10_std', 'target']
    
    df = in_df.copy(deep=True)
    df = targeter.reproduce_columns(df, col_count=fac)
    
    dropable = []
    for i in range(1, fac+1):
        for col in drops:
            dropable.append(f"{col}-{str(i)}")
            
    return(df.drop(dropable, axis=1))

In [220]:
def shape_df_d(in_df):
    df["bottom_bollinger"] = df["bb_lower"] - df["close"]
    df["top_bollinger"] = df["close"] - df["bb_upper"]
    df["mid_bollinger"] = df["close"] - df["bb_center"]
    df["bollinger_range"] = df["bb_upper"] - df["bb_center"]
    df["keltner_range"] = df["kelt_upper"] - df["kelt_center"]
    df["bottom_keltner"] = df["kelt_lower"] - df["close"]
    df["top_keltner"] = df["close"] - df["kelt_upper"]
    df["above_sma10"] = df["close"] - df["sma10"]
    df["above_sma5"] = df["close"] - df["sma10"]
    df["above_ema5"] = df["close"] - df["ema5"]
    df["above_ema10"] = df["close"] - df["ema10"]
    df["ema5_sma10"] = df["ema5"] - df["sma10"]
    df["sma5_sma10"] = df["sma5"] - df["sma10"]
    df["stoch_over_signal"] = df["stoch_stochastic"] - df["stoch_signal"]
    df["macd_over_signal"] = df["macd_macd"] - df["macd_signal"]
    df["adx_over"] = df["adx_pdmi"] - df["adx_ndmi"]
    
    
    levels = [
        "level_1",
        "level_2",
        "level_3",
        "level_4",
        "level_5"
    ]
    
    bull_conditions = []
    bear_conditions = []
    
    for level in levels:
        bull_conditions.append(
            (df['close'] > df[level]) & (df['open'] < df[level])
        )
        bear_conditions.append(
            (df['close'] < df[level]) & (df['open'] > df[level])
        )
        
    df["bull_levels"] = np.select(bull_conditions, np.ones(len(levels)), default=0)
    df["bear_levels"] = np.select(bear_conditions, np.ones(len(levels)), default=0)
    
    drop_cols = [
        "bb_center",
        "bb_upper",
        "bb_lower",
        "kelt_center",
        "kelt_upper",
        "kelt_lower",
        "ema5",
        "sma5",
        "sma10",
        "ema10",
        "psar_psar",
        "curr_trend_down",
        "curr_trend_up",
        "level_1",
        "level_2",
        "level_3",
        "level_4",
        "level_5",
        "adx_pdmi",
        "adx_ndmi",
        "open",
        "high",
        "low",
        "close"
    ]


    return(df.drop(drop_cols, axis=1))

In [229]:
def get_target_regression(in_df):
    df = in_df.copy(deep=True)
    df["target"] = df.shift(-1)["bodysize"]
    df.dropna(axis=0, inplace=True)
    return(df)

In [142]:
def new_means(in_df):
    hdf = in_df
    
    ndf = targeter.reproduce_columns(hdf, 10)
    
    s3 = []
    w3 = []
    b3 = []
    stoch_3 = []
    macd_3 = []
    rsi_3 = []
    adx_3 = []
    for x in range(1,4):
        s3.append(f"shadow-{str(x)}")
        b3.append(f"bodysize-{str(x)}")
        w3.append(f"wick-{str(x)}")
        stoch_3.append(f"stoch_stochastic-{str(x)}")
        macd_3.append(f"macd_macd-{str(x)}")
        rsi_3.append(f"rsi-{str(x)}")
        adx_3.append(f"adx_adx-{str(x)}")
    
    s5 = []
    w5 = []
    b5 = []
    stoch_5 = []
    macd_5 = []
    rsi_5 = []
    adx_5 = []
    for x in range(1,6):
        s5.append(f"shadow-{str(x)}")
        b5.append(f"bodysize-{str(x)}")
        w5.append(f"wick-{str(x)}")
        stoch_5.append(f"stoch_stochastic-{str(x)}")
        macd_5.append(f"macd_macd-{str(x)}")
        rsi_5.append(f"rsi-{str(x)}")
        adx_5.append(f"adx_adx-{str(x)}")
        
    s10 = []
    w10 = []
    b10 = []
    stoch_10 = []
    macd_10 = []
    rsi_10 = []
    adx_10 = []
    
    for x in range(1,11):
        s10.append(f"shadow-{str(x)}")
        b10.append(f"bodysize-{str(x)}")
        w10.append(f"wick-{str(x)}")
        stoch_10.append(f"stoch_stochastic-{str(x)}")
        macd_10.append(f"macd_macd-{str(x)}")
        rsi_10.append(f"rsi-{str(x)}")
        adx_10.append(f"adx_adx-{str(x)}")
        
    ndf["bs3_mean"] = ndf[b3].sum(axis=1)
    ndf["w3_mean"] = ndf[w3].mean(axis=1)
    ndf["s3_mean"] = ndf[s3].mean(axis=1)

    ndf["bs3_std"] = ndf[b3].std(axis=1)
    ndf["w3_std"] = ndf[w3].std(axis=1)
    ndf["s3_std"] = ndf[s3].std(axis=1)
    
    ndf["bs5_mean"] = ndf[b5].sum(axis=1)
    ndf["w5_mean"] = ndf[w5].mean(axis=1)
    ndf["s5_mean"] = ndf[s5].mean(axis=1)

    ndf["bs5_std"] = ndf[b5].std(axis=1)
    ndf["w5_std"] = ndf[w5].std(axis=1)
    ndf["s5_std"] = ndf[s5].std(axis=1)
    
    ndf["bs10_mean"] = ndf[b10].sum(axis=1)
    ndf["w10_mean"] = ndf[w10].mean(axis=1)
    ndf["s10_mean"] = ndf[s10].mean(axis=1)

    ndf["bs10_std"] = ndf[b10].std(axis=1)
    ndf["w10_std"] = ndf[w10].std(axis=1)
    ndf["s10_std"] = ndf[s10].std(axis=1)
    
    ndf["stoch3_mean"] = ndf[stoch_3].mean(axis=1)
    ndf["rsi3_mean"] = ndf[rsi_3].mean(axis=1)
    ndf["macd3_mean"] = ndf[macd_3].mean(axis=1)
    ndf["adx3_mean"] = ndf[adx_3].mean(axis=1)

    ndf["stoch3_std"] = ndf[stoch_3].std(axis=1)
    ndf["rsi3_std"] = ndf[rsi_3].std(axis=1)
    ndf["macd3_std"] = ndf[macd_3].std(axis=1)
    ndf["adx3_std"] = ndf[adx_3].std(axis=1)
    
    ndf["stoch5_mean"] = ndf[stoch_5].mean(axis=1)
    ndf["rsi5_mean"] = ndf[rsi_5].mean(axis=1)
    ndf["macd5_mean"] = ndf[macd_5].mean(axis=1)
    ndf["adx5_mean"] = ndf[adx_5].mean(axis=1)

    ndf["stoch5_std"] = ndf[stoch_5].std(axis=1)
    ndf["rsi5_std"] = ndf[rsi_5].std(axis=1)
    ndf["macd5_std"] = ndf[macd_5].std(axis=1)
    ndf["adx5_std"] = ndf[adx_5].std(axis=1)
    
    ndf["stoch10_mean"] = ndf[stoch_10].mean(axis=1)
    ndf["rsi10_mean"] = ndf[rsi_10].mean(axis=1)
    ndf["macd10_mean"] = ndf[macd_10].mean(axis=1)
    ndf["adx10_mean"] = ndf[adx_10].mean(axis=1)

    ndf["stoch10_std"] = ndf[stoch_10].std(axis=1)
    ndf["rsi10_std"] = ndf[rsi_10].std(axis=1)
    ndf["macd10_std"] = ndf[macd_10].std(axis=1)
    ndf["adx10_std"] = ndf[adx_10].std(axis=1)
    
    dropable = ["bs3", "bs6", "w3", "w6", "s3", "s6"]
    
    for x in range(1, 11):
        for col in hdf.columns:
            dropable.append(f"{col}-{str(x)}")
    
    return(ndf.drop(dropable, axis=1))

In [143]:
scale_cols = [
 'bodysize',
 'shadow',
 'wick',
 'macd_macd',
 'macd_signal',
 'rsi',
 'stoch_stochastic',
 'stoch_signal',
 'adx_adx',
 'williams',
 'bs3_mean',
 'w3_mean',
 's3_mean',
 'bs3_std',
 'w3_std',
 's3_std',
 'bs5_mean',
 'w5_mean',
 's5_mean',
 'bs5_std',
 'w5_std',
 's5_std',
 'bs10_mean',
 'w10_mean',
 's10_mean',
 'bs10_std',
 'w10_std',
 's10_std',
 'stoch3_mean',
 'rsi3_mean',
 'macd3_mean',
 'adx3_mean',
 'stoch3_std',
 'rsi3_std',
 'macd3_std',
 'adx3_std',
 'stoch5_mean',
 'rsi5_mean',
 'macd5_mean',
 'adx5_mean',
 'stoch5_std',
 'rsi5_std',
 'macd5_std',
 'adx5_std',
 'stoch10_mean',
 'rsi10_mean',
 'macd10_mean',
 'adx10_mean',
 'stoch10_std',
 'rsi10_std',
 'macd10_std',
 'adx10_std',
 'bottom_bollinger',
 'top_bollinger',
 'mid_bollinger',
 'bollinger_range',
 'keltner_range',
 'bottom_keltner',
 'top_keltner',
 'above_sma10',
 'above_sma5',
 'above_ema5',
 'above_ema10',
 'ema5_sma10',
 'sma5_sma10',
 'stoch_over_signal',
 'macd_over_signal',
 'adx_over',
 'bodysize-1',
 'shadow-1',
 'wick-1',
 'macd_macd-1',
 'macd_signal-1',
 'rsi-1',
 'stoch_stochastic-1',
 'stoch_signal-1',
 'adx_adx-1',
 'williams-1',
 'bottom_bollinger-1',
 'top_bollinger-1',
 'mid_bollinger-1',
 'bollinger_range-1',
 'keltner_range-1',
 'bottom_keltner-1',
 'top_keltner-1',
 'above_sma10-1',
 'above_sma5-1',
 'above_ema5-1',
 'above_ema10-1',
 'ema5_sma10-1',
 'sma5_sma10-1',
 'stoch_over_signal-1',
 'macd_over_signal-1',
 'adx_over-1',
 'bodysize-2',
 'shadow-2',
 'wick-2',
 'macd_macd-2',
 'macd_signal-2',
 'rsi-2',
 'stoch_stochastic-2',
 'stoch_signal-2',
 'adx_adx-2',
 'williams-2',
 'bottom_bollinger-2',
 'top_bollinger-2',
 'mid_bollinger-2',
 'bollinger_range-2',
 'keltner_range-2',
 'bottom_keltner-2',
 'top_keltner-2',
 'above_sma10-2',
 'above_sma5-2',
 'above_ema5-2',
 'above_ema10-2',
 'ema5_sma10-2',
 'sma5_sma10-2',
 'stoch_over_signal-2',
 'macd_over_signal-2',
 'adx_over-2',
]

In [144]:
non_scale_cols = [
 'singles',
 'bull_doubles',
 'bear_doubles',
 'morningstars',
 'whitesoldiers',
 'insideup',
 'eveningstars',
 'blackcrows',
 'insidedown',
 'direction_down',
 'direction_up',
 'psar_direction_bear',
 'psar_direction_bull',
 'bull_levels',
 'bear_levels',
 'singles-1',
 'bull_doubles-1',
 'bear_doubles-1',
 'morningstars-1',
 'whitesoldiers-1',
 'insideup-1',
 'eveningstars-1',
 'blackcrows-1',
 'insidedown-1',
 'direction_down-1',
 'direction_up-1',
 'psar_direction_bear-1',
 'psar_direction_bull-1',
 'bull_levels-1',
 'bear_levels-1',
 'singles-2',
 'bull_doubles-2',
 'bear_doubles-2',
 'morningstars-2',
 'whitesoldiers-2',
 'insideup-2',
 'eveningstars-2',
 'blackcrows-2',
 'insidedown-2',
 'direction_down-2',
 'direction_up-2',
 'psar_direction_bear-2',
 'psar_direction_bull-2',
 'bull_levels-2',
 'bear_levels-2']

In [230]:
df = pd.read_sql(sql_d, engine, index_col="index")

In [231]:
df = get_target_regression(df)
df = new_means(df)
df = shape_df_d(df)
df = double_up(df)

In [232]:
X = df[:-30].drop("target", axis=1)
y = df[:-30]["target"]

In [233]:
X_1 = X[non_scale_cols].values

In [234]:
X_2 = X[scale_cols]

In [235]:
X_scaled = np.concatenate([X_1, scaler.transform(X_2)], axis=1)

In [236]:
model = RandomForestRegressor()

In [237]:
model.fit(X_scaled, y)

RandomForestRegressor()

In [238]:
X_final = df[-30:].drop("target", axis=1)

In [239]:
Xfinal_1 = X_final[non_scale_cols].values
Xfinal_2 = X_final[scale_cols]

In [240]:
Xfinal_scaled = np.concatenate([Xfinal_1, scaler.transform(Xfinal_2)], axis=1)

In [241]:
preds = model.predict(Xfinal_scaled)

In [242]:
for x in range(len(preds)):
    if abs(preds[x])> 0:
        print(f"{preds[x]} vs actual: {df[-30:].iloc[x]['target']} on {X_final.iloc[x].name}")

-4.729999999997014e-05 vs actual: -0.006799999999999917 on 2021-09-17 00:00:00
0.00040780000000000704 vs actual: 9.999999999998899e-05 on 2021-09-20 00:00:00
0.000909500000000012 vs actual: -0.004289999999999905 on 2021-09-21 00:00:00
-0.0008025999999999955 vs actual: 0.010399999999999965 on 2021-09-22 00:00:00
-0.00014019999999999034 vs actual: -0.00470999999999977 on 2021-09-23 00:00:00
-0.0007853000000000265 vs actual: 0.0034000000000000696 on 2021-09-24 00:00:00
0.002217499999999997 vs actual: -0.016159999999999952 on 2021-09-27 00:00:00
0.00035920000000001285 vs actual: -0.010930000000000106 on 2021-09-28 00:00:00
-0.0006179000000000046 vs actual: 0.004850000000000021 on 2021-09-29 00:00:00
-0.0014923000000000087 vs actual: 0.007200000000000095 on 2021-09-30 00:00:00
-0.0012718000000000312 vs actual: 0.006530000000000147 on 2021-10-01 00:00:00
0.0003584999999999905 vs actual: 0.0019299999999999873 on 2021-10-04 00:00:00
-1.2500000000015276e-05 vs actual: -0.004209999999999825 on 2

In [243]:
actual = df[-30:]['target'].values


In [244]:
comparison = []
for x in range(len(preds)):
    if abs(preds[x] + actual[x]) == abs(preds[x]) + abs(actual[x]):
        comparison.append(1)
    else:
        comparison.append(0)

In [245]:
from collections import Counter

In [246]:
Counter(comparison)

Counter({1: 13, 0: 17})