In [65]:
import pandas as pd
import numpy as np
import rex
import targeter
from sklearn.model_selection import cross_val_score, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import SMOTE


In [230]:
engine = create_engine("mysql+pymysql://rex:#Pass123@localhost/new_ml")
sql_h = "SELECT * FROM `gbpusd_1h` ORDER BY `index` ASC"
df = pd.read_sql(sql_h, engine, index_col="index")

In [231]:
def clean_turns(turns, df):
    new_turns = {}
    t_list = list(turns.keys())
    good_list = list(turns.keys())
    final_turns = {}
    for x in t_list[1:]:
        prev_x = t_list[t_list.index(x)-1]
        if(turns[x] == turns[prev_x]):
            if turns[x] == "up":
                if df.loc[x, ['open', 'close']].min() >= df.loc[prev_x, ['open' ,'close']].min():
                    discard_x = x
                else:
                    discard_x = prev_x
            else:
                
                if df.loc[x, ['open', 'close']].max() <= df.loc[prev_x, ['open' ,'close']].max():
                    discard_x = x
                else:
                    discard_x = prev_x
            good_list.remove(discard_x)
            
    for new_x in good_list:
        new_turns[new_x] = turns[new_x]
        
    new_turns_keys = list(new_turns.keys())
    for peak_x in new_turns_keys[:-1]:
        start_index = df.index.get_loc(peak_x)-1
        stop_index = df.index.get_loc(new_turns_keys[new_turns_keys.index(peak_x)+1])
        
        temp_df = df[start_index:stop_index]
               
        if new_turns[peak_x] == "up":
            peak_up_time = peak_x
            for index, row in temp_df.iterrows():
                if row[["open", "close"]].min() <= df.loc[peak_up_time, ["open", "close"]].min():
                    peak_up_time = index
            if df.loc[peak_up_time, 'direction_down'] == 0 and df.iloc[df.index.get_loc(peak_up_time)-1]["direction_down"] == 1:
                peak_up_time = df.iloc[df.index.get_loc(peak_up_time)-1].name
            final_turns[peak_up_time] = "up"
                                
            
        if new_turns[peak_x] == "down":
            peak_down_time = peak_x
            for index, row in temp_df.iterrows():
                if row[["open", "close"]].max() >= df.loc[peak_down_time, ["open", "close"]].max():
                    peak_down_time = index
            if df.loc[peak_down_time, 'direction_up'] == 0 and df.iloc[df.index.get_loc(peak_down_time)-1]["direction_up"] == 1:
                peak_down_time = df.iloc[df.index.get_loc(peak_down_time)-1].name
            final_turns[peak_down_time] = "down"
        
    
    
    return(final_turns)

In [232]:
def turn_target(in_df, turns):
    df = in_df.copy(deep=True)
    df["target_up"] = np.zeros(len(df))
    df["target_down"] = np.zeros(len(df))

    turns_keys = list(turns.keys())
    for t_time in  turns_keys[:-1]:
        try:
            turn0 = df.index.get_loc(t_time)
            turn2 = df.iloc[turn0 + 2].name

        
            if turns[t_time] == "up":
                df.loc[turn2, "target_up"] = 1
            
            if turns[t_time] == "down":
                df.loc[turn2, "target_down"] = 1
        except KeyError as e:
            print(e)
            
    return(df)

In [233]:
def new_means(in_df):
    hdf = in_df
    
    ndf = targeter.reproduce_columns(hdf, 10)
    
    s3 = []
    w3 = []
    b3 = []
    stoch_3 = []
    macd_3 = []
    rsi_3 = []
    adx_3 = []
    for x in range(1,4):
        s3.append(f"shadow-{str(x)}")
        b3.append(f"bodysize-{str(x)}")
        w3.append(f"wick-{str(x)}")
        stoch_3.append(f"stoch_stochastic-{str(x)}")
        macd_3.append(f"macd_macd-{str(x)}")
        rsi_3.append(f"rsi-{str(x)}")
        adx_3.append(f"adx_adx-{str(x)}")
    
    s5 = []
    w5 = []
    b5 = []
    stoch_5 = []
    macd_5 = []
    rsi_5 = []
    adx_5 = []
    for x in range(1,6):
        s5.append(f"shadow-{str(x)}")
        b5.append(f"bodysize-{str(x)}")
        w5.append(f"wick-{str(x)}")
        stoch_5.append(f"stoch_stochastic-{str(x)}")
        macd_5.append(f"macd_macd-{str(x)}")
        rsi_5.append(f"rsi-{str(x)}")
        adx_5.append(f"adx_adx-{str(x)}")
        
    s10 = []
    w10 = []
    b10 = []
    stoch_10 = []
    macd_10 = []
    rsi_10 = []
    adx_10 = []
    
    for x in range(1,11):
        s10.append(f"shadow-{str(x)}")
        b10.append(f"bodysize-{str(x)}")
        w10.append(f"wick-{str(x)}")
        stoch_10.append(f"stoch_stochastic-{str(x)}")
        macd_10.append(f"macd_macd-{str(x)}")
        rsi_10.append(f"rsi-{str(x)}")
        adx_10.append(f"adx_adx-{str(x)}")
        
    ndf["bs3_mean"] = ndf[b3].sum(axis=1)
    ndf["w3_mean"] = ndf[w3].mean(axis=1)
    ndf["s3_mean"] = ndf[s3].mean(axis=1)

    ndf["bs3_std"] = ndf[b3].std(axis=1)
    ndf["w3_std"] = ndf[w3].std(axis=1)
    ndf["s3_std"] = ndf[s3].std(axis=1)
    
    ndf["bs5_mean"] = ndf[b5].sum(axis=1)
    ndf["w5_mean"] = ndf[w5].mean(axis=1)
    ndf["s5_mean"] = ndf[s5].mean(axis=1)

    ndf["bs5_std"] = ndf[b5].std(axis=1)
    ndf["w5_std"] = ndf[w5].std(axis=1)
    ndf["s5_std"] = ndf[s5].std(axis=1)
    
    ndf["stoch3_mean"] = ndf[stoch_3].mean(axis=1)
    ndf["rsi3_mean"] = ndf[rsi_3].mean(axis=1)
    ndf["macd3_mean"] = ndf[macd_3].mean(axis=1)
    ndf["adx3_mean"] = ndf[adx_3].mean(axis=1)

    ndf["stoch3_std"] = ndf[stoch_3].std(axis=1)
    ndf["rsi3_std"] = ndf[rsi_3].std(axis=1)
    ndf["macd3_std"] = ndf[macd_3].std(axis=1)
    ndf["adx3_std"] = ndf[adx_3].std(axis=1)
    
    ndf["stoch5_mean"] = ndf[stoch_5].mean(axis=1)
    ndf["rsi5_mean"] = ndf[rsi_5].mean(axis=1)
    ndf["macd5_mean"] = ndf[macd_5].mean(axis=1)
    ndf["adx5_mean"] = ndf[adx_5].mean(axis=1)

    ndf["stoch5_std"] = ndf[stoch_5].std(axis=1)
    ndf["rsi5_std"] = ndf[rsi_5].std(axis=1)
    ndf["macd5_std"] = ndf[macd_5].std(axis=1)
    ndf["adx5_std"] = ndf[adx_5].std(axis=1)
    
    ndf["stoch10_mean"] = ndf[stoch_10].mean(axis=1)
    ndf["rsi10_mean"] = ndf[rsi_10].mean(axis=1)
    ndf["macd10_mean"] = ndf[macd_10].mean(axis=1)
    ndf["adx10_mean"] = ndf[adx_10].mean(axis=1)

    ndf["stoch10_std"] = ndf[stoch_10].std(axis=1)
    ndf["rsi10_std"] = ndf[rsi_10].std(axis=1)
    ndf["macd10_std"] = ndf[macd_10].std(axis=1)
    ndf["adx10_std"] = ndf[adx_10].std(axis=1)
    
    dropable = ["bs3", "bs6", "w3", "w6", "s3", "s6"]
    
    for x in range(1, 11):
        for col in hdf.columns:
            dropable.append(f"{col}-{str(x)}")
    
    return(ndf.drop(dropable, axis=1))

In [265]:
def double_up(in_df, fac=2):
    drops = ['bs3_mean', 'w3_mean', 's3_mean', 'bs3_std', 'w3_std', 's3_std',
       'bs5_mean', 'w5_mean', 's5_mean', 'bs5_std', 'w5_std', 's5_std',
       'bs10_mean', 'w10_mean', 's10_mean', 'bs10_std', 'w10_std', 's10_std',
       'stoch3_mean', 'rsi3_mean', 'macd3_mean', 'adx3_mean', 'stoch3_std',
       'rsi3_std', 'macd3_std', 'adx3_std', 'stoch5_mean', 'rsi5_mean',
       'macd5_mean', 'adx5_mean', 'stoch5_std', 'rsi5_std', 'macd5_std',
       'adx5_std']
    
    df = in_df.copy(deep=True)
    df = targeter.reproduce_columns(df, col_count=fac)
    
    dropable = []
    for i in range(1, fac+1):
        for col in drops:
            dropable.append(f"{col}-{str(i)}")
            
    return(df.drop(dropable, axis=1))

In [235]:
def shape_df(in_df):
    df["bottom_bollinger"] = df["bb_lower"] - df["close"]
    df["top_bollinger"] = df["close"] - df["bb_upper"]
    df["mid_bollinger"] = df["close"] - df["bb_center"]
    df["bollinger_range"] = df["bb_upper"] - df["bb_center"]
    df["keltner_range"] = df["kelt_upper"] - df["kelt_center"]
    df["bottom_keltner"] = df["kelt_lower"] - df["close"]
    df["top_keltner"] = df["close"] - df["kelt_upper"]
    df["above_sma10"] = df["close"] - df["sma10"]
    df["above_sma5"] = df["close"] - df["sma10"]
    df["above_ema5"] = df["close"] - df["ema5"]
    df["above_ema10"] = df["close"] - df["ema10"]
    df["ema5_sma10"] = df["ema5"] - df["sma10"]
    df["sma5_sma10"] = df["sma5"] - df["sma10"]
    df["stoch_over_signal"] = df["stoch_stochastic"] - df["stoch_signal"]
    df["macd_over_signal"] = df["macd_macd"] - df["macd_signal"]
    df["adx_over"] = df["adx_pdmi"] - df["adx_ndmi"]
    df["stoch_over_mean"] = df["stoch_stochastic"] - df["stoch3_mean"]
    df["adx_over_mean"] = df["adx_adx"] - df["adx3_mean"]
    df["rsi_over_mean"] = df["rsi"] - df["rsi3_mean"]
    df["macd_over_mean"] = df["macd_macd"] - df["macd3_mean"]
    df["bs_over_mean"] = df["close"] - df["bs3_mean"]
    
    
    levels = [
        "level_1",
        "level_2",
        "level_3",
        "level_4",
        "level_5",
        "pp",
        "standard_r1",
        "standard_s1",
        "fib_r1",
        "fib_s1"
    ]
    
    bull_conditions = []
    bear_conditions = []
    
    for level in levels:
        bull_conditions.append(
            (df['close'] > df[level]) & (df['open'] < df[level])
        )
        bear_conditions.append(
            (df['close'] < df[level]) & (df['open'] > df[level])
        )
        
    df["bull_levels"] = np.select(bull_conditions, np.ones(len(levels)), default=0)
    df["bear_levels"] = np.select(bear_conditions, np.ones(len(levels)), default=0)
    
    drop_cols = [
        "bb_center",
        "bb_upper",
        "bb_lower",
        "kelt_center",
        "kelt_upper",
        "kelt_lower",
        "ema5",
        "sma5",
        "sma10",
        "ema10",
        "psar_psar",
        "curr_trend_down",
        "curr_trend_up",
        "level_1",
        "level_2",
        "level_3",
        "level_4",
        "level_5",
        "pp",
        "standard_r1",
        "standard_s1",
        "fib_r1",
        "fib_s1",
        "adx_pdmi",
        "adx_ndmi",
        "open",
        "high",
        "low",
        "close"
    ]
    
    
    return(df.drop(drop_cols, axis=1))

In [236]:
import importlib
importlib.reload(targeter)

<module 'targeter' from 'C:\\Users\\dimad\\OneDrive\\projects\\rex\\targeter.py'>

In [239]:
df = pd.read_sql(sql_h, engine, index_col="index")

In [240]:
turns = rex.get_smoothed_turns(df)
turns = clean_turns(turns, df)
df = new_means(df)
df = shape_df(df)
df = double_up(df)
df = turn_target(df, turns)


In [227]:
scale_cols = [
 'bodysize',
 'shadow',
 'wick',
 'macd_macd',
 'macd_signal',
 'rsi',
 'stoch_stochastic',
 'stoch_signal',
 'adx_adx',
 'williams',
 'bs3_mean',
 'w3_mean',
 's3_mean',
 'bs3_std',
 'w3_std',
 's3_std',
 'bs5_mean',
 'w5_mean',
 's5_mean',
 'bs5_std',
 'w5_std',
 's5_std',
 'bs10_mean',
 'w10_mean',
 's10_mean',
 'bs10_std',
 'w10_std',
 's10_std',
 'stoch3_mean',
 'rsi3_mean',
 'macd3_mean',
 'adx3_mean',
 'stoch3_std',
 'rsi3_std',
 'macd3_std',
 'adx3_std',
 'stoch5_mean',
 'rsi5_mean',
 'macd5_mean',
 'adx5_mean',
 'stoch5_std',
 'rsi5_std',
 'macd5_std',
 'adx5_std',
 'stoch10_mean',
 'rsi10_mean',
 'macd10_mean',
 'adx10_mean',
 'stoch10_std',
 'rsi10_std',
 'macd10_std',
 'adx10_std',
 'bottom_bollinger',
 'top_bollinger',
 'mid_bollinger',
 'bollinger_range',
 'keltner_range',
 'bottom_keltner',
 'top_keltner',
 'above_sma10',
 'above_sma5',
 'above_ema5',
 'above_ema10',
 'ema5_sma10',
 'sma5_sma10',
 'stoch_over_signal',
 'macd_over_signal',
 'adx_over',
 'bodysize-1',
 'shadow-1',
 'wick-1',
 'macd_macd-1',
 'macd_signal-1',
 'rsi-1',
 'stoch_stochastic-1',
 'stoch_signal-1',
 'adx_adx-1',
 'williams-1',
 'bottom_bollinger-1',
 'top_bollinger-1',
 'mid_bollinger-1',
 'bollinger_range-1',
 'keltner_range-1',
 'bottom_keltner-1',
 'top_keltner-1',
 'above_sma10-1',
 'above_sma5-1',
 'above_ema5-1',
 'above_ema10-1',
 'ema5_sma10-1',
 'sma5_sma10-1',
 'stoch_over_signal-1',
 'macd_over_signal-1',
 'adx_over-1',
 'bodysize-2',
 'shadow-2',
 'wick-2',
 'macd_macd-2',
 'macd_signal-2',
 'rsi-2',
 'stoch_stochastic-2',
 'stoch_signal-2',
 'adx_adx-2',
 'williams-2',
 'bottom_bollinger-2',
 'top_bollinger-2',
 'mid_bollinger-2',
 'bollinger_range-2',
 'keltner_range-2',
 'bottom_keltner-2',
 'top_keltner-2',
 'above_sma10-2',
 'above_sma5-2',
 'above_ema5-2',
 'above_ema10-2',
 'ema5_sma10-2',
 'sma5_sma10-2',
 'stoch_over_signal-2',
 'macd_over_signal-2',
 'adx_over-2',
]

non_scale_cols = [
 'singles',
 'bull_doubles',
 'bear_doubles',
 'morningstars',
 'whitesoldiers',
 'insideup',
 'eveningstars',
 'blackcrows',
 'insidedown',
 'direction_down',
 'direction_up',
 'psar_direction_bear',
 'psar_direction_bull',
 'bull_levels',
 'bear_levels',
 'singles-1',
 'bull_doubles-1',
 'bear_doubles-1',
 'morningstars-1',
 'whitesoldiers-1',
 'insideup-1',
 'eveningstars-1',
 'blackcrows-1',
 'insidedown-1',
 'direction_down-1',
 'direction_up-1',
 'psar_direction_bear-1',
 'psar_direction_bull-1',
 'bull_levels-1',
 'bear_levels-1',
 'singles-2',
 'bull_doubles-2',
 'bear_doubles-2',
 'morningstars-2',
 'whitesoldiers-2',
 'insideup-2',
 'eveningstars-2',
 'blackcrows-2',
 'insidedown-2',
 'direction_down-2',
 'direction_up-2',
 'psar_direction_bear-2',
 'psar_direction_bull-2',
 'bull_levels-2',
 'bear_levels-2']

In [241]:
pprint(list(df.columns))

['bodysize',
 'shadow',
 'wick',
 'singles',
 'bull_doubles',
 'bear_doubles',
 'morningstars',
 'whitesoldiers',
 'insideup',
 'eveningstars',
 'blackcrows',
 'insidedown',
 'macd_macd',
 'macd_signal',
 'rsi',
 'stoch_stochastic',
 'stoch_signal',
 'adx_adx',
 'williams',
 'direction_down',
 'direction_up',
 'psar_direction_bear',
 'psar_direction_bull',
 'bs3_mean',
 'w3_mean',
 's3_mean',
 'bs3_std',
 'w3_std',
 's3_std',
 'bs5_mean',
 'w5_mean',
 's5_mean',
 'bs5_std',
 'w5_std',
 's5_std',
 'bs10_mean',
 'w10_mean',
 's10_mean',
 'bs10_std',
 'w10_std',
 's10_std',
 'stoch3_mean',
 'rsi3_mean',
 'macd3_mean',
 'adx3_mean',
 'stoch3_std',
 'rsi3_std',
 'macd3_std',
 'adx3_std',
 'stoch5_mean',
 'rsi5_mean',
 'macd5_mean',
 'adx5_mean',
 'stoch5_std',
 'rsi5_std',
 'macd5_std',
 'adx5_std',
 'stoch10_mean',
 'rsi10_mean',
 'macd10_mean',
 'adx10_mean',
 'stoch10_std',
 'rsi10_std',
 'macd10_std',
 'adx10_std',
 'bottom_bollinger',
 'top_bollinger',
 'mid_bollinger',
 'bollinger_ran

In [244]:
scale = StandardScaler()

In [253]:
X = df[:-720].drop(["target_up", "target_down"], axis=1)
y = df[:-720]["target_up"]

In [254]:
X1 = X[scale_cols]
X2 = X[non_scale_cols]

In [255]:
scale = scale.fit(X1)

In [256]:
X_scaled = np.concatenate([scale.transform(X1), X2.values], axis=1)

In [257]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y)

In [258]:
clf = RandomForestClassifier(n_estimators=300).fit(X_train, y_train)
preds = clf.predict(X_test)
confusion_matrix(y_test, preds)

array([[12820,    73],
       [  403,   194]], dtype=int64)

In [259]:
y_final = df[-720:]['target_up']
X_final = df[-720:].drop(["target_up", "target_down"], axis=1)

Xf1 = X_final[scale_cols]
Xf2 = X_final[non_scale_cols]

Xf_scaled = np.concatenate([scale.transform(Xf1), Xf2.values], axis=1)


In [260]:
preds_final = clf.predict(Xf_scaled)

In [261]:
confusion_matrix(y_final, preds_final)

array([[682,   8],
       [ 24,   6]], dtype=int64)

In [217]:
pred_index = []
for x in range(len(preds_final)):
    if preds_final[x] == 1:
        pred_index.append(x)

In [218]:
for x in pred_index:
    print(df[-720:].iloc[x].name)

2021-05-21 16:00:00
2021-06-01 12:00:00
2021-06-09 16:00:00
2021-06-11 12:00:00
2021-07-12 08:00:00
2021-07-23 08:00:00
2021-07-30 16:00:00
2021-08-04 20:00:00
2021-08-30 12:00:00
2021-09-06 00:00:00
2021-09-10 20:00:00
2021-09-14 20:00:00
2021-10-14 20:00:00
2021-10-26 20:00:00


In [173]:
len(pred_index)

4

In [171]:
from collections import Counter

In [9]:
targets = [
    "target_up-0",
    "target_up-1",
    "target_up-2",
    "target_up-3",
    "target_down-0",
    "target_down-1",
    "target_down-2",
    "target_down-3",
]

X = df.drop(targets, axis=1)
y = df[targets]

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier

In [11]:
for i in range(4):
    print(f"Working on {i}")
    up_train = y_train[f"target_up-{str(i)}"]
    down_train = y_train[f"target_down-{str(i)}"]
    up_test = y_test[f"target_up-{str(i)}"]
    down_test = y_test[f"target_down-{str(i)}"]
    
    rf_up = RandomForestClassifier().fit(X_train, up_train)
    rf_down = RandomForestClassifier().fit(X_train, down_train)
    
    lsvc_up = LinearSVC(max_iter=5000).fit(X_train, up_train)
    lsvc_down = LinearSVC(max_iter=5000).fit(X_train, down_train)
    
    kn_up = KNeighborsClassifier().fit(X_train, up_train)
    kn_down = KNeighborsClassifier().fit(X_train, down_train)
    
    
    print(f"RFC up on {str(i)}:\n{confusion_matrix(up_test, rf_up.predict(X_test))}\n\n")
    print(f"RFC down on {str(i)}:\n{confusion_matrix(down_test, rf_down.predict(X_test))}\n\n")
    
    print(f"LSVC up on {str(i)}:\n{confusion_matrix(up_test, lsvc_up.predict(X_test))}\n\n")
    print(f"LSVC down on {str(i)}:\n{confusion_matrix(down_test, lsvc_down.predict(X_test))}\n\n")

    print(f"KN up on {str(i)}:\n{confusion_matrix(up_test, kn_up.predict(X_test))}\n\n")
    print(f"KN down on {str(i)}:\n{confusion_matrix(down_test, kn_down.predict(X_test))}\n\n")

Working on 0




RFC up on 0:
[[13086     2]
 [  578     0]]


RFC down on 0:
[[13068     1]
 [  597     0]]


LSVC up on 0:
[[13088     0]
 [  578     0]]


LSVC down on 0:
[[13067     2]
 [  597     0]]


KN up on 0:
[[13022    66]
 [  569     9]]


KN down on 0:
[[13020    49]
 [  585    12]]


Working on 1




RFC up on 1:
[[13054    42]
 [  494    76]]


RFC down on 1:
[[13058    34]
 [  518    56]]


LSVC up on 1:
[[13093     3]
 [  564     6]]


LSVC down on 1:
[[12999    93]
 [  490    84]]


KN up on 1:
[[12994   102]
 [  510    60]]


KN down on 1:
[[13002    90]
 [  527    47]]


Working on 2




RFC up on 2:
[[12996    72]
 [  414   184]]


RFC down on 2:
[[12975    96]
 [  403   192]]


LSVC up on 2:
[[12622   446]
 [  230   368]]


LSVC down on 2:
[[13069     2]
 [  587     8]]


KN up on 2:
[[12970    98]
 [  490   108]]


KN down on 2:
[[12961   110]
 [  484   111]]


Working on 3




RFC up on 3:
[[13025    41]
 [  441   159]]


RFC down on 3:
[[13034    42]
 [  430   160]]


LSVC up on 3:
[[11885  1181]
 [   70   530]]


LSVC down on 3:
[[12902   174]
 [  298   292]]


KN up on 3:
[[12992    74]
 [  530    70]]


KN down on 3:
[[12990    86]
 [  524    66]]




In [18]:
from sklearn.preprocessing import StandardScaler



In [19]:
scaler = StandardScaler()


In [68]:
df = new_means(df)

In [71]:
turns = rex.get_smoothed_turns(df)

KeyboardInterrupt: 

In [None]:
turns = clean_turns(turns, df)

In [69]:
df.columns

Index(['open', 'high', 'low', 'close', 'bodysize', 'shadow', 'wick', 'singles',
       'bull_doubles', 'bear_doubles', 'morningstars', 'whitesoldiers',
       'insideup', 'eveningstars', 'blackcrows', 'insidedown', 'level_1',
       'level_2', 'level_3', 'level_4', 'level_5', 'pp', 'standard_r1',
       'standard_s1', 'fib_r1', 'fib_s1', 'sma5', 'ema5', 'sma10', 'ema10',
       'bb_center', 'bb_upper', 'bb_lower', 'kelt_center', 'kelt_upper',
       'kelt_lower', 'macd_macd', 'macd_signal', 'rsi', 'psar_psar',
       'stoch_stochastic', 'stoch_signal', 'adx_adx', 'adx_pdmi', 'adx_ndmi',
       'williams', 'direction_down', 'direction_up', 'curr_trend_down',
       'curr_trend_up', 'psar_direction_bear', 'psar_direction_bull',
       'bs3_mean', 'w3_mean', 's3_mean', 'bs3_std', 'w3_std', 's3_std',
       'bs5_mean', 'w5_mean', 's5_mean', 'bs5_std', 'w5_std', 's5_std',
       'bs10_mean', 'w10_mean', 's10_mean', 'bs10_std', 'w10_std', 's10_std',
       'stoch3_mean', 'rsi3_mean', 'macd3_

array([[ 5.40000000e-04,  3.60000000e-04,  4.20000000e-04, ...,
        -2.99860225e+01,  0.00000000e+00,  0.00000000e+00],
       [ 2.26000000e-03,  2.53000000e-03,  3.10000000e-04, ...,
        -3.27921673e+01,  0.00000000e+00,  0.00000000e+00],
       [ 6.50000000e-04,  5.80000000e-04,  1.63000000e-03, ...,
        -3.11406464e+01,  0.00000000e+00,  0.00000000e+00],
       ...,
       [ 1.00000000e-05,  1.20000000e-04,  3.20000000e-04, ...,
        -9.02707356e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.06000000e-03,  3.20000000e-04,  0.00000000e+00, ...,
        -3.72706592e+00,  1.00000000e+00,  0.00000000e+00],
       [ 3.80000000e-04,  2.60000000e-04,  8.80000000e-04, ...,
        -2.01274226e+00,  0.00000000e+00,  0.00000000e+00]])

In [64]:
wdf = df[:-720]
# X = wdf.drop(targets, axis=1)
# y = wdf["target_up-2"]

X_train, X_test, y_train, y_test = train_test_split(X, y)
over = SMOTE()

X_t, y_t = over.fit_resample(X_train, y_train)

clf = RandomForestClassifier(n_estimators=250).fit(X_t, y_t)

confusion_matrix(y_test, clf.predict(X_test))

array([[12779,   325],
       [  232,   330]], dtype=int64)

In [34]:
wdf.columns

Index(['bodysize', 'shadow', 'wick', 'singles', 'bull_doubles', 'bear_doubles',
       'morningstars', 'whitesoldiers', 'insideup', 'eveningstars',
       ...
       'bull_levels-2', 'bear_levels-2', 'target_up-0', 'target_down-0',
       'target_up-1', 'target_down-1', 'target_up-2', 'target_down-2',
       'target_up-3', 'target_down-3'],
      dtype='object', length=185)

In [39]:
scale_cols = [
 'singles',
 'bull_doubles',
 'bear_doubles',
 'morningstars',
 'whitesoldiers',
 'insideup',
 'eveningstars',
 'blackcrows',
 'insidedown',
 'direction_down',
 'direction_up',
 'psar_direction_bear',
 'psar_direction_bull',
 'bull_levels',
 'bear_levels',
 'singles-1',
 'bull_doubles-1',
 'bear_doubles-1',
 'morningstars-1',
 'whitesoldiers-1',
 'insideup-1',
 'eveningstars-1',
 'blackcrows-1',
 'insidedown-1',
 'direction_down-1',
 'direction_up-1',
 'psar_direction_bear-1',
 'psar_direction_bull-1',
 'bull_levels-1',
 'bear_levels-1',
 'singles-2',
 'bull_doubles-2',
 'bear_doubles-2',
 'morningstars-2',
 'whitesoldiers-2',
 'insideup-2',
 'eveningstars-2',
 'blackcrows-2',
 'insidedown-2',
 'direction_down-2',
 'direction_up-2',
 'psar_direction_bear-2',
 'psar_direction_bull-2',
 'bull_levels-2',
 'bear_levels-2',
 'target_up-0',
 'target_down-0',
 'target_up-1',
 'target_down-1',
 'target_up-2',
 'target_down-2',
 'target_up-3',
 'target_down-3'
]

In [50]:
scale_cols_2 = [
 'singles',
 'bull_doubles',
 'bear_doubles',
 'morningstars',
 'whitesoldiers',
 'insideup',
 'eveningstars',
 'blackcrows',
 'insidedown',
 'direction_down',
 'direction_up',
 'psar_direction_bear',
 'psar_direction_bull',
 'bull_levels',
 'bear_levels',
 'singles-1',
 'bull_doubles-1',
 'bear_doubles-1',
 'morningstars-1',
 'whitesoldiers-1',
 'insideup-1',
 'eveningstars-1',
 'blackcrows-1',
 'insidedown-1',
 'direction_down-1',
 'direction_up-1',
 'psar_direction_bear-1',
 'psar_direction_bull-1',
 'bull_levels-1',
 'bear_levels-1',
 'singles-2',
 'bull_doubles-2',
 'bear_doubles-2',
 'morningstars-2',
 'whitesoldiers-2',
 'insideup-2',
 'eveningstars-2',
 'blackcrows-2',
 'insidedown-2',
 'direction_down-2',
 'direction_up-2',
 'psar_direction_bear-2',
 'psar_direction_bull-2',
 'bull_levels-2',
 'bear_levels-2',
]

In [40]:
ldf = df.drop(scale_cols, axis=1)

In [46]:
scaler = StandardScaler()
scaler.fit(ldf)
ldf_1 = scaler.transform(ldf)

In [56]:
scaled_df = np.concatenate([ldf, df[scale_cols_2].values], axis=1)

In [58]:
X = scaled_df
y = df["target_up-2"]

In [16]:
confusion_matrix(wdf[-720:]["target_up-2"], clf.predict(wdf.drop(targets, axis=1)[-720:]))

array([[686,   6],
       [  3,  25]], dtype=int64)

In [15]:
wdf.columns

Index(['bodysize', 'shadow', 'wick', 'singles', 'bull_doubles', 'bear_doubles',
       'morningstars', 'whitesoldiers', 'insideup', 'eveningstars',
       ...
       'bull_levels-2', 'bear_levels-2', 'target_up-0', 'target_down-0',
       'target_up-1', 'target_down-1', 'target_up-2', 'target_down-2',
       'target_up-3', 'target_down-3'],
      dtype='object', length=185)