In [104]:
import pandas as pd
import numpy as np
import rex
import targeter
from datetime import datetime
from sklearn.model_selection import cross_val_score, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import SMOTE

In [105]:
scale_cols = [
 'bodysize',
 'shadow',
 'wick',
 'macd_macd',
 'macd_signal',
 'rsi',
 'stoch_stochastic',
 'stoch_signal',
 'adx_adx',
 'williams',
 'bs3_mean',
 'w3_mean',
 's3_mean',
 'bs3_std',
 'w3_std',
 's3_std',
 'bs5_mean',
 'w5_mean',
 's5_mean',
 'bs5_std',
 'w5_std',
 's5_std',
 'stoch3_mean',
 'rsi3_mean',
 'macd3_mean',
 'adx3_mean',
 'stoch3_std',
 'rsi3_std',
 'macd3_std',
 'adx3_std',
 'stoch5_mean',
 'rsi5_mean',
 'macd5_mean',
 'adx5_mean',
 'stoch5_std',
 'rsi5_std',
 'macd5_std',
 'adx5_std',
 'bottom_bollinger',
 'top_bollinger',
 'mid_bollinger',
 'bollinger_range',
 'keltner_range',
 'bottom_keltner',
 'top_keltner',
 'above_sma10',
 'above_sma5',
 'above_ema5',
 'above_ema10',
 'ema5_sma10',
 'sma5_sma10',
 'stoch_over_signal',
 'macd_over_signal',
 'adx_over',
 "stoch_over_mean",
 "adx_over_mean",
 "rsi_over_mean",
 "macd_over_mean",
 "bs_over_mean",
 'bodysize-1',
 'shadow-1',
 'wick-1',
 'macd_macd-1',
 'macd_signal-1',
 'rsi-1',
 'stoch_stochastic-1',
 'stoch_signal-1',
 'adx_adx-1',
 'williams-1',
 'bottom_bollinger-1',
 'top_bollinger-1',
 'mid_bollinger-1',
 'bollinger_range-1',
 'keltner_range-1',
 'bottom_keltner-1',
 'top_keltner-1',
 'above_sma10-1',
 'above_sma5-1',
 'above_ema5-1',
 'above_ema10-1',
 'ema5_sma10-1',
 'sma5_sma10-1',
 'stoch_over_signal-1',
 'macd_over_signal-1',
 'adx_over-1',
 'bodysize-2',
 'shadow-2',
 'wick-2',
 'macd_macd-2',
 'macd_signal-2',
 'rsi-2',
 'stoch_stochastic-2',
 'stoch_signal-2',
 'adx_adx-2',
 'williams-2',
 'bottom_bollinger-2',
 'top_bollinger-2',
 'mid_bollinger-2',
 'bollinger_range-2',
 'keltner_range-2',
 'bottom_keltner-2',
 'top_keltner-2',
 'above_sma10-2',
 'above_sma5-2',
 'above_ema5-2',
 'above_ema10-2',
 'ema5_sma10-2',
 'sma5_sma10-2',
 'stoch_over_signal-2',
 'macd_over_signal-2',
 'adx_over-2',
]

non_scale_cols = [
 'singles',
 'bull_doubles',
 'bear_doubles',
 'morningstars',
 'whitesoldiers',
 'insideup',
 'eveningstars',
 'blackcrows',
 'insidedown',
 'direction_down',
 'direction_up',
 'psar_direction_bear',
 'psar_direction_bull',
 'bull_levels',
 'bear_levels',
 'bull_near_levels',
 'bear_near_levels',
 'singles-1',
 'bull_doubles-1',
 'bear_doubles-1',
 'morningstars-1',
 'whitesoldiers-1',
 'insideup-1',
 'eveningstars-1',
 'blackcrows-1',
 'insidedown-1',
 'direction_down-1',
 'direction_up-1',
 'psar_direction_bear-1',
 'psar_direction_bull-1',
 'bull_levels-1',
 'bear_levels-1',
 'bull_near_levels-1',
 'bear_near_levels-1',
 'singles-2',
 'bull_doubles-2',
 'bear_doubles-2',
 'morningstars-2',
 'whitesoldiers-2',
 'insideup-2',
 'eveningstars-2',
 'blackcrows-2',
 'insidedown-2',
 'direction_down-2',
 'direction_up-2',
 'psar_direction_bear-2',
 'psar_direction_bull-2',
 'bull_levels-2',
 'bear_levels-2',
 'bull_near_levels-2',
 'bear_near_levels-2']

In [336]:
def get_target_up_down(in_df, uptick=0.0050, downtick=0.0020, up_length=20):
    """
    Takes dataframe and returns df with added target_up, where 1 is classified as candle after which there is a raise in price `'uptick'`
    in the next `'up_length'` number  of candles before a drop of `'downtick'` and similar for target_down
    """
    df = in_df.copy(deep=True)
    df_keys = sorted(list(df.index))
    df["target_up"] = np.zeros(len(df))
    df["target_down"] = np.zeros(len(df))
    for index, row in df.iterrows():
        base_point = row["close"]
        goal_point_up = base_point + uptick
        fail_point_up = base_point - downtick
        goal_point_down = base_point - uptick
        fail_point_down = base_point + downtick
        small_df = df[df_keys.index(index)+1:df_keys.index(index)+up_length]
        small_keys = sorted(list(small_df.index))
      
        goal_reached_up = getfirst_pandas(lambda x: x.high >= goal_point_up, small_df)
        if goal_reached_up:
            goal_index = small_keys.index(goal_reached_up)
            if small_df[:goal_index]["low"].min() > fail_point_up and row["overall_trend"] > 0:
                df.at[index, 'target_up'] = 1
            
        goal_reached_down = getfirst_pandas(lambda x: x.low <= goal_point_down, small_df)
        if goal_reached_down:
            goal_index = small_keys.index(goal_reached_down)
            if small_df[:goal_index]["high"].max() < fail_point_down and row["overall_trend"] < 0:
                df.at[index, "target_down"] = 1

    return(df[:-up_length])

In [217]:
def clean_turns(turns, df):
    new_turns = {}
    t_list = list(turns.keys())
    good_list = list(turns.keys())
    final_turns = {}
    for x in t_list[1:]:
        prev_x = t_list[t_list.index(x)-1]
        if(turns[x] == turns[prev_x]):
            if turns[x] == "up":
                if df.loc[x, ['open', 'close']].min() >= df.loc[prev_x, ['open' ,'close']].min():
                    discard_x = x
                else:
                    discard_x = prev_x
            else:
                
                if df.loc[x, ['open', 'close']].max() <= df.loc[prev_x, ['open' ,'close']].max():
                    discard_x = x
                else:
                    discard_x = prev_x
            try:
                good_list.remove(discard_x)
            except(ValueError):
                print("Already removed")
            
    for new_x in good_list:
        new_turns[new_x] = turns[new_x]
        
    new_turns_keys = list(new_turns.keys())
    for peak_x in new_turns_keys[:-1]:
        start_index = df.index.get_loc(peak_x)-1
        stop_index = df.index.get_loc(new_turns_keys[new_turns_keys.index(peak_x)+1])
        
        temp_df = df[start_index:stop_index]
               
        if new_turns[peak_x] == "up":
            peak_up_time = peak_x
            for index, row in temp_df.iterrows():
                if row[["open", "close"]].min() <= df.loc[peak_up_time, ["open", "close"]].min():
                    peak_up_time = index
            if df.loc[peak_up_time, 'direction_down'] == 0 and df.iloc[df.index.get_loc(peak_up_time)-1]["direction_down"] == 1:
                peak_up_time = df.iloc[df.index.get_loc(peak_up_time)-1].name
            final_turns[peak_up_time] = "up"
                                
            
        if new_turns[peak_x] == "down":
            peak_down_time = peak_x
            for index, row in temp_df.iterrows():
                if row[["open", "close"]].max() >= df.loc[peak_down_time, ["open", "close"]].max():
                    peak_down_time = index
            if df.loc[peak_down_time, 'direction_up'] == 0 and df.iloc[df.index.get_loc(peak_down_time)-1]["direction_up"] == 1:
                peak_down_time = df.iloc[df.index.get_loc(peak_down_time)-1].name
            final_turns[peak_down_time] = "down"
        
    
    
    return(final_turns)

In [107]:
def new_means(in_df):
    hdf = in_df
    
    ndf = targeter.reproduce_columns(hdf, 5)
    
    s3 = []
    w3 = []
    b3 = []
    stoch_3 = []
    macd_3 = []
    rsi_3 = []
    adx_3 = []
    for x in range(1,4):
        s3.append(f"shadow-{str(x)}")
        b3.append(f"bodysize-{str(x)}")
        w3.append(f"wick-{str(x)}")
        stoch_3.append(f"stoch_stochastic-{str(x)}")
        macd_3.append(f"macd_macd-{str(x)}")
        rsi_3.append(f"rsi-{str(x)}")
        adx_3.append(f"adx_adx-{str(x)}")
    
    s5 = []
    w5 = []
    b5 = []
    stoch_5 = []
    macd_5 = []
    rsi_5 = []
    adx_5 = []
    for x in range(1,6):
        s5.append(f"shadow-{str(x)}")
        b5.append(f"bodysize-{str(x)}")
        w5.append(f"wick-{str(x)}")
        stoch_5.append(f"stoch_stochastic-{str(x)}")
        macd_5.append(f"macd_macd-{str(x)}")
        rsi_5.append(f"rsi-{str(x)}")
        adx_5.append(f"adx_adx-{str(x)}")
        
        
    ndf["bs3_mean"] = ndf[b3].mean(axis=1)
    ndf["w3_mean"] = ndf[w3].mean(axis=1)
    ndf["s3_mean"] = ndf[s3].mean(axis=1)

    ndf["bs3_std"] = ndf[b3].std(axis=1)
    ndf["w3_std"] = ndf[w3].std(axis=1)
    ndf["s3_std"] = ndf[s3].std(axis=1)
    
    ndf["bs5_mean"] = ndf[b5].mean(axis=1)
    ndf["w5_mean"] = ndf[w5].mean(axis=1)
    ndf["s5_mean"] = ndf[s5].mean(axis=1)

    ndf["bs5_std"] = ndf[b5].std(axis=1)
    ndf["w5_std"] = ndf[w5].std(axis=1)
    ndf["s5_std"] = ndf[s5].std(axis=1)
    
    ndf["stoch3_mean"] = ndf[stoch_3].mean(axis=1)
    ndf["rsi3_mean"] = ndf[rsi_3].mean(axis=1)
    ndf["macd3_mean"] = ndf[macd_3].mean(axis=1)
    ndf["adx3_mean"] = ndf[adx_3].mean(axis=1)

    ndf["stoch3_std"] = ndf[stoch_3].std(axis=1)
    ndf["rsi3_std"] = ndf[rsi_3].std(axis=1)
    ndf["macd3_std"] = ndf[macd_3].std(axis=1)
    ndf["adx3_std"] = ndf[adx_3].std(axis=1)
    
    ndf["stoch5_mean"] = ndf[stoch_5].mean(axis=1)
    ndf["rsi5_mean"] = ndf[rsi_5].mean(axis=1)
    ndf["macd5_mean"] = ndf[macd_5].mean(axis=1)
    ndf["adx5_mean"] = ndf[adx_5].mean(axis=1)

    ndf["stoch5_std"] = ndf[stoch_5].std(axis=1)
    ndf["rsi5_std"] = ndf[rsi_5].std(axis=1)
    ndf["macd5_std"] = ndf[macd_5].std(axis=1)
    ndf["adx5_std"] = ndf[adx_5].std(axis=1)
    
    
    

    dropable = ["bs3", "bs6", "w3", "w6", "s3", "s6"]
    
    for x in range(1, 6):
        for col in hdf.columns:
            dropable.append(f"{col}-{str(x)}")
    
    return(ndf.drop(dropable, axis=1))

In [538]:
def shape_df(in_df, del_cols=True):
    mean_bs = df["bodysize"].abs().mean()
    threshold = mean_bs/5
    df["bottom_bollinger"] = df["bb_lower"] - df["close"]
    df["top_bollinger"] = df["close"] - df["bb_upper"]
    df["mid_bollinger"] = df["close"] - df["bb_center"]
    df["bollinger_range"] = df["bb_upper"] - df["bb_center"]
    df["keltner_range"] = df["kelt_upper"] - df["kelt_center"]
    df["bottom_keltner"] = df["kelt_lower"] - df["close"]
    df["top_keltner"] = df["close"] - df["kelt_upper"]
    df["above_sma10"] = df["close"] - df["sma10"]
    df["above_sma5"] = df["close"] - df["sma10"]
    df["above_ema5"] = df["close"] - df["ema5"]
    df["above_ema10"] = df["close"] - df["ema10"]
    df["ema5_sma10"] = df["ema5"] - df["sma10"]
    df["sma5_sma10"] = df["sma5"] - df["sma10"]
    df["stoch_over_signal"] = df["stoch_stochastic"] - df["stoch_signal"]
    df["macd_over_signal"] = df["macd_macd"] - df["macd_signal"]
    df["adx_over"] = df["adx_pdmi"] - df["adx_ndmi"]
    df["stoch_over_mean"] = df["stoch_stochastic"] - df["stoch3_mean"]
    df["adx_over_mean"] = df["adx_adx"] - df["adx3_mean"]
    df["rsi_over_mean"] = df["rsi"] - df["rsi3_mean"]
    df["macd_over_mean"] = df["macd_macd"] - df["macd3_mean"]
    df["bs_over_mean"] = df["close"] - df["bs3_mean"]
    df["above_pp"] = df["close"] - df["pp"]
    
    
    levels = [
        "level_1",
        "level_2",
        "level_3",
        "level_4",
        "level_5",
        "pp",
        "standard_r1",
        "standard_s1",
        "fib_r1",
        "fib_s1"
    ]
    
    bull_conditions = []
    bear_conditions = []
    bear_near_conditions = []
    bull_near_conditions = []
    
    for level in levels:
        bull_conditions.append(
            (df['close'] > df[level]) & (df['open'] < df[level])
        )
        bear_conditions.append(
            (df['close'] < df[level]) & (df['open'] > df[level])
        )
        bear_near_conditions.append(
            (df['open'] < df[level]) & (df['close'] < df[level]) & (df[level]-df['close'] < threshold)
        )
        bull_near_conditions.append(
            (df['open'] > df[level]) & (df['close'] > df[level]) & (df['close']-df[level] < threshold)
        )
        
    df["bull_levels"] = np.select(bull_conditions, np.ones(len(levels)), default=0)
    df["bear_levels"] = np.select(bear_conditions, np.ones(len(levels)), default=0)
    df["bull_near_levels"] = np.select(bull_near_conditions, np.ones(len(levels)), default=0)
    df["bear_near_levels"] = np.select(bear_near_conditions, np.ones(len(levels)), default=0)
    
    if del_cols:
        drop_cols = [
            "bb_center",
            "bb_upper",
            "bb_lower",
            "kelt_center",
            "kelt_upper",
            "kelt_lower",
            "ema5",
            "sma5",
            "sma10",
            "ema10",
            "psar_psar",
            "curr_trend_down",
            "curr_trend_up",
            "level_1",
            "level_2",
            "level_3",
            "level_4",
            "level_5",
            "pp",
            "standard_r1",
            "standard_s1",
            "fib_r1",
            "fib_s1",
            "adx_pdmi",
            "adx_ndmi",
            "open",
            "high",
            "low",
            "close"
        ]
    else:
        drop_cols=[]

    
    return(df.drop(drop_cols, axis=1))

In [109]:
def double_up(in_df, fac=2):
    drops = ['bs3_mean', 'w3_mean', 's3_mean', 'bs3_std', 'w3_std', 's3_std',
       'bs5_mean', 'w5_mean', 's5_mean', 'bs5_std', 'w5_std', 's5_std',
       'stoch3_mean', 'rsi3_mean', 'macd3_mean', 'adx3_mean', 'stoch3_std',
       'rsi3_std', 'macd3_std', 'adx3_std', 'stoch5_mean', 'rsi5_mean',
       'macd5_mean', 'adx5_mean', 'stoch5_std', 'rsi5_std', 'macd5_std',
       'adx5_std', 'target_up', 'target_down']
    
    df = in_df.copy(deep=True)
    df = targeter.reproduce_columns(df, col_count=fac)
    
    dropable = []
    for i in range(1, fac+1):
        for col in drops:
            dropable.append(f"{col}-{str(i)}")
            
    return(df.drop(dropable, axis=1))

In [110]:
def getfirst_pandas(condition, df):
    cond = df[condition(df)]
    if not cond.empty:
        return(cond.iloc[0].name)
    else:
        return None


In [420]:
def turn_target(in_df, turns, fac=30):
    df = in_df.copy(deep=True)
    mean_bs = df["bodysize"].abs().mean()
    df["target_up"] = np.zeros(len(df))
    df["target_down"] = np.zeros(len(df))

    turns_keys = list(turns.keys())
    for t_time in  turns_keys:
        if df.index.get_loc(t_time) < len(df)+fac+2:
            try:
                turn0 = df.index.get_loc(t_time)
                turn2 = df.iloc[turn0 + 2].name
                start_point = df.index.get_loc(turn2)
                end_point = start_point + fac
                small_df = df[start_point:end_point]
                small_keys = sorted(list(small_df.index))
                sl = df.loc[t_time, 'close']
                goal_point_up = df.loc[turn2, "close"] + (mean_bs*4)
                goal_point_down = df.loc[turn2, "close"] - (mean_bs*4)

        
                if turns[t_time] == "up":
                    goal_reached_up = getfirst_pandas(lambda x: x.high >= goal_point_up, small_df)
                    if goal_reached_up:
                        goal_index = small_keys.index(goal_reached_up)
                        if small_df[:goal_index]["low"].min() > sl:
                            df.loc[turn2, "target_up"] = 1
            
                if turns[t_time] == "down":
                    goal_reached_down = getfirst_pandas(lambda x: x.high <= goal_point_down, small_df)
                    if goal_reached_down:
                        goal_index = small_keys.index(goal_reached_down)
                        if small_df[:goal_index]["high"].max() < sl:
                            df.loc[turn2, "target_down"] = 1
            except(IndexError):
                print("Fail")
            
    return(df)

In [112]:
def overall_trend(in_df, daily_df, fac=4):
    df = in_df.copy(deep=True)
    trend_df = targeter.reproduce_columns(daily_df, fac)
    summables= ['bodysize']
    for x in range(1, fac+1):
        summables.append(f"bodysize-{str(x)}")
    trend_df["mean_trend"] = trend_df[summables].mean(axis=1)
    
    trend_df_keys = list(trend_df.index)
    
    for index, row in df.iterrows():
        small_list = [item for item in trend_df_keys if item < index.replace(hour=0)]
        df.loc[index, 'overall_trend'] = trend_df.loc[max(small_list), "mean_trend"]

    return(df)

In [113]:
engine = create_engine("mysql+pymysql://rex:#Pass123@localhost/new_ml")
sql_h = "SELECT * FROM `gbpusd_1h` ORDER BY `index` ASC"
sql_d = "SELECT * FROM `gbpusd_d` ORDER BY `index` ASC"
new_df = pd.read_sql(sql_h, engine, index_col="index")
df_d = pd.read_sql(sql_d, engine, index_col="index")

In [319]:
df = new_df.copy(deep=True)
turns = rex.get_turns(df)
turns = clean_turns(turns, df)
df = overall_trend(df, df_d)

Already removed


In [321]:
cdf = df.copy(deep=True)

## Trying with new targeting method and no scaling

In [468]:
df = cdf.copy(deep=True)
df = turn_target(df, turns)
df = new_means(df)
df = shape_df(df)
df = double_up(df)

In [469]:
X = df.drop(["target_up", "target_down"], axis=1)
y = df["target_up"]


In [470]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [471]:
clf = RandomForestClassifier(random_state=42).fit(X_train, y_train)

In [472]:
clf.score(X_test, y_test)

0.9794665692363902

In [473]:
threshold = 0.5
predicted_proba = clf.predict_proba(X_test)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_test, preds)
print(cm)

[[13376    11]
 [  269    29]]


In [474]:
threshold = 0.4
predicted_proba = clf.predict_proba(X_test)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_test, preds)
print(cm)

[[13327    60]
 [  233    65]]


## Trying with old targeting method and no scaling

In [475]:
df = cdf.copy(deep=True)
df = targeter.get_target_up_down(df)
df = new_means(df)
df = shape_df(df)
df = double_up(df)

In [476]:
X = df.drop(["target_up", "target_down"], axis=1)
y = df["target_up"]
X_train, X_test, y_train, y_test = train_test_split(X, y)
clf = RandomForestClassifier(random_state=42).fit(X_train, y_train)

In [477]:
clf.score(X_test, y_test)

0.8044002631386594

In [478]:
threshold = 0.5
predicted_proba = clf.predict_proba(X_test)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_test, preds)
print(cm)

[[10936     5]
 [ 2662    78]]


In [479]:
threshold = 0.4
predicted_proba = clf.predict_proba(X_test)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_test, preds)
print(cm)

[[10849    92]
 [ 2451   289]]


## Trying with old df and old targeting method

In [686]:
df = cdf[-20000:].copy(deep=True)
df = targeter.get_target_up_down(df, up_length=30)

In [687]:
X = df[:-720].drop(["target_up", "target_down"], axis=1)
y = df[:-720]["target_up"]
X_train, X_test, y_train, y_test = train_test_split(X, y)
clf = RandomForestClassifier(random_state=42, class_weight={0:1,1:5}).fit(X_train, y_train)

In [688]:
clf.score(X_test, y_test)

0.8622480781217536

In [689]:
threshold = 0.5
predicted_proba = clf.predict_proba(X_test)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_test, preds)
print(cm)

[[3464   82]
 [ 576  691]]


In [690]:
X_new = df[-720:]
X_new = X_new.drop(["target_up", "target_down"], axis=1)
y_new = df[-720:]["target_up"]

threshold = 0.5
predicted_proba = clf.predict_proba(X_new)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_new, preds)
print(cm)

[[442  58]
 [185  35]]


In [691]:
for x in range(len(X_new)):
    if preds[x] == 1 and y_new[x] == 0:
        print(X_new.iloc[x].name)

2021-09-22 18:00:00
2021-09-23 12:00:00
2021-09-30 16:00:00
2021-09-30 17:00:00
2021-09-30 19:00:00
2021-09-30 20:00:00
2021-09-30 22:00:00
2021-09-30 23:00:00
2021-10-01 00:00:00
2021-10-01 01:00:00
2021-10-01 19:00:00
2021-10-01 20:00:00
2021-10-01 21:00:00
2021-10-01 22:00:00
2021-10-04 01:00:00
2021-10-04 02:00:00
2021-10-04 03:00:00
2021-10-05 11:00:00
2021-10-05 12:00:00
2021-10-05 13:00:00
2021-10-05 17:00:00
2021-10-05 18:00:00
2021-10-05 19:00:00
2021-10-05 20:00:00
2021-10-05 21:00:00
2021-10-05 22:00:00
2021-10-05 23:00:00
2021-10-06 01:00:00
2021-10-06 02:00:00
2021-10-06 03:00:00
2021-10-08 01:00:00
2021-10-11 05:00:00
2021-10-11 06:00:00
2021-10-11 07:00:00
2021-10-12 13:00:00
2021-10-13 05:00:00
2021-10-13 06:00:00
2021-10-13 08:00:00
2021-10-13 09:00:00
2021-10-13 10:00:00
2021-10-13 11:00:00
2021-10-13 12:00:00
2021-10-13 14:00:00
2021-10-21 01:00:00
2021-10-21 02:00:00
2021-10-21 03:00:00
2021-10-21 04:00:00
2021-10-21 13:00:00
2021-10-21 14:00:00
2021-10-21 15:00:00


## Trying with old df and new targeting method

In [484]:
df = cdf.copy(deep=True)
df = turn_target(df, turns)

In [485]:
X = df.drop(["target_up", "target_down"], axis=1)
y = df["target_up"]
X_train, X_test, y_train, y_test = train_test_split(X, y)
clf = RandomForestClassifier(random_state=42).fit(X_train, y_train)

In [486]:
clf.score(X_test, y_test)

0.9807846861985826

In [487]:
threshold = 0.5
predicted_proba = clf.predict_proba(X_test)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_test, preds)
print(cm)

[[13415     8]
 [  258     6]]


## Trying to improve new df old targeting

### Dropping columns

In [832]:
df = cdf[-30000:].copy(deep=True)
df = targeter.get_target_up_down(df, up_length=30)
df = new_means(df)
df = shape_df(df, del_cols=False)
df = double_up(df)

In [833]:
pprint(list(df.columns))

['open',
 'high',
 'low',
 'close',
 'bodysize',
 'shadow',
 'wick',
 'singles',
 'bull_doubles',
 'bear_doubles',
 'morningstars',
 'whitesoldiers',
 'insideup',
 'eveningstars',
 'blackcrows',
 'insidedown',
 'level_1',
 'level_2',
 'level_3',
 'level_4',
 'level_5',
 'pp',
 'standard_r1',
 'standard_s1',
 'fib_r1',
 'fib_s1',
 'sma5',
 'ema5',
 'sma10',
 'ema10',
 'bb_center',
 'bb_upper',
 'bb_lower',
 'kelt_center',
 'kelt_upper',
 'kelt_lower',
 'macd_macd',
 'macd_signal',
 'rsi',
 'psar_psar',
 'stoch_stochastic',
 'stoch_signal',
 'adx_adx',
 'adx_pdmi',
 'adx_ndmi',
 'williams',
 'direction_down',
 'direction_up',
 'curr_trend_down',
 'curr_trend_up',
 'psar_direction_bear',
 'psar_direction_bull',
 'overall_trend',
 'target_up',
 'target_down',
 'bs3_mean',
 'w3_mean',
 's3_mean',
 'bs3_std',
 'w3_std',
 's3_std',
 'bs5_mean',
 'w5_mean',
 's5_mean',
 'bs5_std',
 'w5_std',
 's5_std',
 'stoch3_mean',
 'rsi3_mean',
 'macd3_mean',
 'adx3_mean',
 'stoch3_std',
 'rsi3_std',
 'mac

In [834]:
drop_cols =[ 
#  'open-1',
#  'high-1',
#  'low-1',
#  'close-1',
#  'bodysize-1',
#  'shadow-1',
#  'wick-1',
#  'singles-1',
#  'bull_doubles-1',
#  'bear_doubles-1',
#  'morningstars-1',
#  'whitesoldiers-1',
#  'insideup-1',
#  'eveningstars-1',
#  'blackcrows-1',
#  'insidedown-1',
#  'level_1-1',
#  'level_2-1',
#  'level_3-1',
#  'level_4-1',
#  'level_5-1',
#  'pp-1',
#  'standard_r1-1',
#  'standard_s1-1',
#  'fib_r1-1',
#  'fib_s1-1',
#  'sma5-1',
#  'ema5-1',
#  'sma10-1',
#  'ema10-1',
#  'bb_center-1',
#  'bb_upper-1',
#  'bb_lower-1',
#  'kelt_center-1',
#  'kelt_upper-1',
#  'kelt_lower-1',
#  'macd_macd-1',
#  'macd_signal-1',
#  'rsi-1',
#  'psar_psar-1',
#  'stoch_stochastic-1',
#  'stoch_signal-1',
#  'adx_adx-1',
#  'adx_pdmi-1',
#  'adx_ndmi-1',
#  'williams-1',
#  'direction_down-1',
#  'direction_up-1',
#  'curr_trend_down-1',
#  'curr_trend_up-1',
#  'psar_direction_bear-1',
#  'psar_direction_bull-1',
#  'overall_trend-1',
#  'bottom_bollinger-1',
#  'top_bollinger-1',
#  'mid_bollinger-1',
#  'bollinger_range-1',
#  'keltner_range-1',
#  'bottom_keltner-1',
#  'top_keltner-1',
#  'above_sma10-1',
#  'above_sma5-1',
#  'above_ema5-1',
#  'above_ema10-1',
#  'ema5_sma10-1',
#  'sma5_sma10-1',
#  'stoch_over_signal-1',
#  'macd_over_signal-1',
#  'adx_over-1',
#  'stoch_over_mean-1',
#  'adx_over_mean-1',
#  'rsi_over_mean-1',
#  'macd_over_mean-1',
#  'bs_over_mean-1',
#  'above_pp-1',
#  'bull_levels-1',
#  'bear_levels-1',
#  'bull_near_levels-1',
#  'bear_near_levels-1',
#  'open-2',
#  'high-2',
#  'low-2',
#  'close-2',
#  'bodysize-2',
#  'shadow-2',
#  'wick-2',
#  'singles-2',
#  'bull_doubles-2',
#  'bear_doubles-2',
#  'morningstars-2',
#  'whitesoldiers-2',
#  'insideup-2',
#  'eveningstars-2',
#  'blackcrows-2',
#  'insidedown-2',
#  'level_1-2',
#  'level_2-2',
#  'level_3-2',
#  'level_4-2',
#  'level_5-2',
#  'pp-2',
#  'standard_r1-2',
#  'standard_s1-2',
#  'fib_r1-2',
#  'fib_s1-2',
#  'sma5-2',
#  'ema5-2',
#  'sma10-2',
#  'ema10-2',
#  'bb_center-2',
#  'bb_upper-2',
#  'bb_lower-2',
#  'kelt_center-2',
#  'kelt_upper-2',
#  'kelt_lower-2',
#  'macd_macd-2',
#  'macd_signal-2',
#  'rsi-2',
#  'psar_psar-2',
#  'stoch_stochastic-2',
#  'stoch_signal-2',
#  'adx_adx-2',
#  'adx_pdmi-2',
#  'adx_ndmi-2',
#  'williams-2',
#  'direction_down-2',
#  'direction_up-2',
#  'curr_trend_down-2',
#  'curr_trend_up-2',
#  'psar_direction_bear-2',
#  'psar_direction_bull-2',
#  'overall_trend-2',
#  'bottom_bollinger-2',
#  'top_bollinger-2',
#  'mid_bollinger-2',
#  'bollinger_range-2',
#  'keltner_range-2',
#  'bottom_keltner-2',
#  'top_keltner-2',
#  'above_sma10-2',
#  'above_sma5-2',
#  'above_ema5-2',
#  'above_ema10-2',
#  'ema5_sma10-2',
#  'sma5_sma10-2',
#  'stoch_over_signal-2',
#  'macd_over_signal-2',
#  'adx_over-2',
#  'stoch_over_mean-2',
#  'adx_over_mean-2',
#  'rsi_over_mean-2',
#  'macd_over_mean-2',
#  'bs_over_mean-2',
#  'above_pp-2',
#  'bull_levels-2',
#  'bear_levels-2',
#  'bull_near_levels-2',
#  'bear_near_levels-2'
]

In [835]:
Xd = df[:-720].drop(drop_cols, axis=1)
X = Xd.drop(["target_up", "target_down"], axis=1)
y = df[:-720]["target_up"]
X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train, y_train = over.fit_resample(X_train, y_train)
clf = RandomForestClassifier(random_state=42, n_estimators=1000, min_impurity_decrease=0.001).fit(X_train, y_train)

In [836]:
clf.score(X_test, y_test)

0.7325947202845028

In [837]:
threshold = 0.5
# predicted_proba = clf.predict_proba(X_test)
# preds = (predicted_proba [:,1] >= threshold).astype('int')
preds = clf.predict(X_test)
cm = confusion_matrix(y_test, preds)
print(cm)

[[5345   28]
 [1927   11]]


In [838]:
X_new = df[-720:].drop(drop_cols, axis=1)
X_new = X_new.drop(["target_up", "target_down"], axis=1)
y_new = df[-720:]["target_up"]

threshold = 0.5
predicted_proba = clf.predict_proba(X_new)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_new, preds)
print(cm)

[[490  10]
 [220   0]]


In [780]:
for x in range(len(X_new)):
    if preds[x] == 1 and y_new[x] == 0:
        print(X_new.iloc[x].name)

2021-09-28 15:00:00
2021-09-28 16:00:00
2021-09-28 17:00:00
2021-09-28 20:00:00
2021-10-05 14:00:00
2021-10-05 17:00:00
2021-10-13 11:00:00
2021-10-20 18:00:00
2021-10-20 22:00:00


## Trying to improve new df new targeting

In [827]:
df = cdf.copy(deep=True)
df = turn_target(df, turns)
df = new_means(df)
df = shape_df(df)
df = double_up(df)

In [828]:
drop_cols = [

]

In [829]:
Xd = df[-30000:-720].drop(drop_cols, axis=1)
X = Xd.drop(["target_up", "target_down"], axis=1)
y = df[-30000:-720]["target_up"]
X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train, y_train = over.fit_resample(X_train, y_train)

clf = RandomForestClassifier(random_state=42, class_weight={0:1,1:2}).fit(X_train, y_train)

In [830]:
threshold = 0.5
predicted_proba = clf.predict_proba(X_test)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_test, preds)
print(cm)

[[7062  101]
 [ 104   53]]


In [831]:
X_new = df[-720:].drop(drop_cols, axis=1)
X_new = X_new.drop(["target_up", "target_down"], axis=1)
y_new = df[-720:]["target_up"]

threshold = 0.5
predicted_proba = clf.predict_proba(X_new)
preds = (predicted_proba [:,1] >= threshold).astype('int')
cm = confusion_matrix(y_new, preds)
print(cm)

[[697   8]
 [  7   8]]
