In [1]:
import pandas as pd
import numpy as np
import rex
import targeter
from sklearn.model_selection import cross_val_score, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import SMOTE


In [2]:
engine = create_engine("mysql+pymysql://rex:#Pass123@localhost/new_ml")
sql_h = "SELECT * FROM `gbpusd_1h` ORDER BY `index` ASC"
df = pd.read_sql(sql_h, engine, index_col="index")

In [3]:
def clean_turns(turns, df):
    new_turns = {}
    t_list = list(turns.keys())
    good_list = list(turns.keys())
    final_turns = {}
    for x in t_list[1:]:
        prev_x = t_list[t_list.index(x)-1]
        if(turns[x] == turns[prev_x]):
            if turns[x] == "up":
                if df.loc[x, ['open', 'close']].min() >= df.loc[prev_x, ['open' ,'close']].min():
                    discard_x = x
                else:
                    discard_x = prev_x
            else:
                
                if df.loc[x, ['open', 'close']].max() <= df.loc[prev_x, ['open' ,'close']].max():
                    discard_x = x
                else:
                    discard_x = prev_x
            good_list.remove(discard_x)
            
    for new_x in good_list:
        new_turns[new_x] = turns[new_x]
        
    new_turns_keys = list(new_turns.keys())
    for peak_x in new_turns_keys[:-1]:
        start_index = df.index.get_loc(peak_x)-1
        stop_index = df.index.get_loc(new_turns_keys[new_turns_keys.index(peak_x)+1])
        
        temp_df = df[start_index:stop_index]
               
        if new_turns[peak_x] == "up":
            peak_up_time = peak_x
            for index, row in temp_df.iterrows():
                if row[["open", "close"]].min() <= df.loc[peak_up_time, ["open", "close"]].min():
                    peak_up_time = index
            if df.loc[peak_up_time, 'direction_down'] == 0 and df.iloc[df.index.get_loc(peak_up_time)-1]["direction_down"] == 1:
                peak_up_time = df.iloc[df.index.get_loc(peak_up_time)-1].name
            final_turns[peak_up_time] = "up"
                                
            
        if new_turns[peak_x] == "down":
            peak_down_time = peak_x
            for index, row in temp_df.iterrows():
                if row[["open", "close"]].max() >= df.loc[peak_down_time, ["open", "close"]].max():
                    peak_down_time = index
            if df.loc[peak_down_time, 'direction_up'] == 0 and df.iloc[df.index.get_loc(peak_down_time)-1]["direction_up"] == 1:
                peak_down_time = df.iloc[df.index.get_loc(peak_down_time)-1].name
            final_turns[peak_down_time] = "down"
        
    
    
    return(final_turns)
                    
            

In [4]:
def retarget(in_df, turns):
    df = in_df.copy(deep=True)
    baseline = df["bodysize"].abs().mean() * 4
    
    for x in range(1,4):
        df[f"target_up-{str(x)}"] = np.zeros(len(df))
        df[f"target_down-{str(x)}"] = np.zeros(len(df))

    
    turns_keys = list(turns.keys())
    for t_time in  turns_keys[:-1]:
        turn0 = df.index.get_loc(t_time)
        turn1 = df.iloc[turn0 + 1].name
        turn2 = df.iloc[turn0 + 2].name
        turn3 = df.iloc[turn0 + 3].name
        last_loc = turns_keys[turns_keys.index(t_time)+1]

        if turns[t_time] == "up":
            last_loc_high = df.loc[last_loc, "high"]
            
            if (last_loc_high - df.loc[turn1, "close"]) >= baseline:
                df.loc[turn1, "target_up-1"] = 1
            
            if (last_loc_high - df.loc[turn2, "close"]) >= baseline:
                df.loc[turn2, "target_up-2"] = 1
            
            if (last_loc_high - df.loc[turn3, "close"]) >= baseline:
                df.loc[turn3, "target_up-3"] = 1
                
        if turns[t_time] == "down":
            last_loc_low = df.loc[last_loc, "low"]
            
            if (df.loc[turn1, "close"] - last_loc_low) >= baseline:
                df.loc[turn1, "target_down-1"] = 1
            
            if (df.loc[turn2, "close"] - last_loc_low) >= baseline:
                df.loc[turn2, "target_down-2"] = 1
            
            if (df.loc[turn3, "close"] - last_loc_low) >= baseline:
                df.loc[turn3, "target_down-3"] = 1
    
    return(df[:df.index.get_loc(turns_keys[-1])])
    

In [5]:
def turn_target(in_df, turns):
    df = in_df.copy(deep=True)
    for x in range(0,4):
        df[f"target_up-{str(x)}"] = np.zeros(len(df))
        df[f"target_down-{str(x)}"] = np.zeros(len(df))

    turns_keys = list(turns.keys())
    for t_time in  turns_keys[:-1]:
        turn0 = df.index.get_loc(t_time)
        turn1 = df.iloc[turn0 + 1].name
        turn2 = df.iloc[turn0 + 2].name
        turn3 = df.iloc[turn0 + 3].name
        
        if turns[t_time] == "up":
            df.loc[t_time, "target_up-0"] = 1
            df.loc[turn1, "target_up-1"] = 1
            df.loc[turn2, "target_up-2"] = 1
            df.loc[turn3, "target_up-3"] = 1
            
        if turns[t_time] == "down":
            df.loc[t_time, "target_down-0"] = 1
            df.loc[turn1, "target_down-1"] = 1
            df.loc[turn2, "target_down-2"] = 1
            df.loc[turn3, "target_down-3"] = 1
            
    return(df)

In [34]:
def new_means(in_df):
    hdf = in_df
    
    ndf = rex.reproduce_columns(hdf, 10)
    
    s3 = []
    w3 = []
    b3 = []
    stoch_3 = []
    macd_3 = []
    rsi_3 = []
    for x in range(1,4):
        s3.append(f"shadow-{str(x)}")
        b3.append(f"bodysize-{str(x)}")
        w3.append(f"wick-{str(x)}")
        stoch_3.append(f"stoch_stochastic-{str(x)}")
        macd_3.append(f"macd_macd-{str(x)}")
        rsi_3.append(f"rsi-{str(x)}")
    
    s5 = []
    w5 = []
    b5 = []
    stoch_5 = []
    macd_5 = []
    rsi_5 = []
    for x in range(1,6):
        s5.append(f"shadow-{str(x)}")
        b5.append(f"bodysize-{str(x)}")
        w5.append(f"wick-{str(x)}")
        stoch_5.append(f"stoch_stochastic-{str(x)}")
        macd_5.append(f"macd_macd-{str(x)}")
        rsi_5.append(f"rsi-{str(x)}")
        
    s10 = []
    w10 = []
    b10 = []
    stoch_10 = []
    macd_10 = []
    rsi_10 = []
    
    for x in range(1,11):
        s10.append(f"shadow-{str(x)}")
        b10.append(f"bodysize-{str(x)}")
        w10.append(f"wick-{str(x)}")
        stoch_10.append(f"stoch_stochastic-{str(x)}")
        macd_10.append(f"macd_macd-{str(x)}")
        rsi_10.append(f"rsi-{str(x)}")
        
    ndf["bs3_mean"] = ndf[b3].sum(axis=1)
    ndf["w3_mean"] = ndf[w3].mean(axis=1)
    ndf["s3_mean"] = ndf[s3].mean(axis=1)

    ndf["bs3_std"] = ndf[b3].std(axis=1)
    ndf["w3_std"] = ndf[w3].std(axis=1)
    ndf["s3_std"] = ndf[s3].std(axis=1)
    
    ndf["bs5_mean"] = ndf[b5].sum(axis=1)
    ndf["w5_mean"] = ndf[w5].mean(axis=1)
    ndf["s5_mean"] = ndf[s5].mean(axis=1)

    ndf["bs5_std"] = ndf[b5].std(axis=1)
    ndf["w5_std"] = ndf[w5].std(axis=1)
    ndf["s5_std"] = ndf[s5].std(axis=1)
    
    ndf["bs10_mean"] = ndf[b10].sum(axis=1)
    ndf["w10_mean"] = ndf[w10].mean(axis=1)
    ndf["s10_mean"] = ndf[s10].mean(axis=1)

    ndf["bs10_std"] = ndf[b10].std(axis=1)
    ndf["w10_std"] = ndf[w10].std(axis=1)
    ndf["s10_std"] = ndf[s10].std(axis=1)
    
    ndf["stoch3_mean"] = ndf[stoch_3].mean(axis=1)
    ndf["rsi3_mean"] = ndf[rsi_3].mean(axis=1)
    ndf["macd3_mean"] = ndf[macd_3].mean(axis=1)

    ndf["stoch3_std"] = ndf[stoch_3].std(axis=1)
    ndf["rsi3_std"] = ndf[rsi_3].std(axis=1)
    ndf["macd3_std"] = ndf[macd_3].std(axis=1)
    
    ndf["stoch5_mean"] = ndf[stoch_5].mean(axis=1)
    ndf["rsi5_mean"] = ndf[rsi_5].mean(axis=1)
    ndf["macd5_mean"] = ndf[macd_5].mean(axis=1)

    ndf["stoch5_std"] = ndf[stoch_5].std(axis=1)
    ndf["rsi5_std"] = ndf[rsi_5].std(axis=1)
    ndf["macd5_std"] = ndf[macd_5].std(axis=1)
    
    ndf["stoch10_mean"] = ndf[stoch_5].mean(axis=1)
    ndf["rsi10_mean"] = ndf[rsi_5].mean(axis=1)
    ndf["macd10_mean"] = ndf[macd_5].mean(axis=1)

    ndf["stoch10_std"] = ndf[stoch_5].std(axis=1)
    ndf["rsi10_std"] = ndf[rsi_5].std(axis=1)
    ndf["macd10_std"] = ndf[macd_5].std(axis=1)
    
    dropable = ["bs3", "bs6", "w3", "w6", "s3", "s6"]
    
    for x in range(1, 11):
        for col in hdf.cols:
            dropable.append(f"{col}-{str(x)}")
    ndf.drop(dropable, axis=1)
    
    return(ndf.drop(dropable, axis=1))

In [None]:
def double_up(in_df, fac=2):
    drops = ["bs3_mean",
            "w3_mean",
            "s3_mean",
            "bs3_std",
            "w3_std",
            "s3_std",
            "bs5_mean",
            "w5_mean",
            "s5_mean",
            "bs5_std",
            "w5_std",
            "s5_std",    
            "bs10_mean",
            "w10_mean",
            "s10_mean",
            "bs10_std",
            "w10_std",
            "s10_std",    
            "stoch3_mean",
            "rsi3_mean",
            "macd3_mean",
            "stoch3_std",
            "rsi3_std",
            "macd3_std",    
            "stoch5_mean",
            "rsi5_mean",
            "macd5_mean",
            "stoch5_std",
            "rsi5_std",
            "macd5_std",
            "stoch10_mean",
            "rsi10_mean",
            "macd10_mean",
            "stoch10_std",
            "rsi10_std",
            "macd10_std"
    ]
    
    df = in_df.copy(deep=True)
    df = targeter.reproduce_columns(df, col_count=fac)
    
    dropable = []
    for i in range(1, fac+1):
        for col in drops:
            dropable.append(f"{col}-{str(i)}")
            
    return(df.drop(dropable, axis=1))
    

In [7]:
df.drop(["bs3", "bs6", "w3", "w6", "s3", "s6"], axis=1, inplace=True)
ndf = targeter.reproduce_columns(df, 9)

In [8]:
ndf = new_means(ndf)

In [9]:
for col in df.columns:
    for x in range(1,10):
        ndf.drop(f"{col}-{str(x)}", axis=1, inplace=True)

In [10]:
ndf.columns

Index(['open', 'high', 'low', 'close', 'bodysize', 'shadow', 'wick', 'singles',
       'bull_doubles', 'bear_doubles', 'morningstars', 'whitesoldiers',
       'insideup', 'eveningstars', 'blackcrows', 'insidedown', 'level_1',
       'level_2', 'level_3', 'level_4', 'level_5', 'pp', 'standard_r1',
       'standard_s1', 'fib_r1', 'fib_s1', 'sma5', 'ema5', 'sma10', 'ema10',
       'bb_center', 'bb_upper', 'bb_lower', 'kelt_center', 'kelt_upper',
       'kelt_lower', 'macd_macd', 'macd_signal', 'rsi', 'psar_psar',
       'stoch_stochastic', 'stoch_signal', 'adx_adx', 'adx_pdmi', 'adx_ndmi',
       'williams', 'direction_down', 'direction_up', 'curr_trend_down',
       'curr_trend_up', 'psar_direction_bear', 'psar_direction_bull',
       'bs3_mean', 'w3_mean', 's3_mean', 'bs3_std', 'w3_std', 's3_std',
       'bs6_mean', 'w6_mean', 's6_mean', 'bs6_std', 'w6_std', 's6_std',
       'stoch3_mean', 'rsi3_mean', 'macd3_mean', 'stoch6_mean', 'rsi6_mean',
       'macd6_mean'],
      dtype='object')

In [11]:
turns = rex.get_smoothed_turns(ndf)

In [12]:
turns = clean_turns(turns, ndf)

In [13]:
tdf = turn_target(ndf, turns)

In [14]:
tdf.columns

Index(['open', 'high', 'low', 'close', 'bodysize', 'shadow', 'wick', 'singles',
       'bull_doubles', 'bear_doubles', 'morningstars', 'whitesoldiers',
       'insideup', 'eveningstars', 'blackcrows', 'insidedown', 'level_1',
       'level_2', 'level_3', 'level_4', 'level_5', 'pp', 'standard_r1',
       'standard_s1', 'fib_r1', 'fib_s1', 'sma5', 'ema5', 'sma10', 'ema10',
       'bb_center', 'bb_upper', 'bb_lower', 'kelt_center', 'kelt_upper',
       'kelt_lower', 'macd_macd', 'macd_signal', 'rsi', 'psar_psar',
       'stoch_stochastic', 'stoch_signal', 'adx_adx', 'adx_pdmi', 'adx_ndmi',
       'williams', 'direction_down', 'direction_up', 'curr_trend_down',
       'curr_trend_up', 'psar_direction_bear', 'psar_direction_bull',
       'bs3_mean', 'w3_mean', 's3_mean', 'bs3_std', 'w3_std', 's3_std',
       'bs6_mean', 'w6_mean', 's6_mean', 'bs6_std', 'w6_std', 's6_std',
       'stoch3_mean', 'rsi3_mean', 'macd3_mean', 'stoch6_mean', 'rsi6_mean',
       'macd6_mean', 'target_up-0', 'target

In [15]:
wdf = tdf[:-720]

In [16]:
len(wdf)

53947

In [21]:
targets = [
    "target_up-0",
    "target_up-1",
    "target_up-2",
    "target_up-3",
    "target_down-0",
    "target_down-1",
    "target_down-2",
    "target_down-3",
]

X = wdf.drop(targets, axis=1)
y = wdf[targets]

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [23]:
y_test

Unnamed: 0_level_0,target_up-0,target_up-1,target_up-2,target_up-3,target_down-0,target_down-1,target_down-2,target_down-3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-04-20 05:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-11-21 07:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-02-01 08:00:00,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-08-31 09:00:00,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2017-03-17 15:00:00,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
2016-07-04 13:00:00,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2016-07-28 06:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2015-10-27 13:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-23 08:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier

In [27]:
for i in range(4):
    up_train = y_train[f"target_up-{str(i)}"]
    down_train = y_train[f"target_down-{str(i)}"]
    up_test = y_test[f"target_up-{str(i)}"]
    down_test = y_test[f"target_down-{str(i)}"]
    
    rf_up = RandomForestClassifier().fit(X_train, up_train)
    rf_down = RandomForestClassifier().fit(X_train, down_train)
    
    lsvc_up = LinearSVC(max_iter=5000).fit(X_train, up_train)
    lsvc_down = LinearSVC(max_iter=5000).fit(X_train, down_train)
    
    kn_up = KNeighborsClassifier().fit(X_train, up_train)
    kn_down = KNeighborsClassifier().fit(X_train, down_train)
    
    
    print(f"RFC up on {str(i)}:\n{confusion_matrix(up_test, rf_up.predict(X_test))}\n\n")
    print(f"RFC down on {str(i)}:\n{confusion_matrix(down_test, rf_down.predict(X_test))}\n\n")
    
    print(f"LSVC up on {str(i)}:\n{confusion_matrix(up_test, lsvc_up.predict(X_test))}\n\n")
    print(f"LSVC down on {str(i)}:\n{confusion_matrix(down_test, lsvc_down.predict(X_test))}\n\n")

    print(f"KN up on {str(i)}:\n{confusion_matrix(up_test, kn_up.predict(X_test))}\n\n")
    print(f"KN down on {str(i)}:\n{confusion_matrix(down_test, kn_down.predict(X_test))}\n\n")



RFC up on 0:
[[12891     5]
 [  591     0]]


RFC down on 0:
[[12909     2]
 [  576     0]]


LSVC up on 0:
[[12896     0]
 [  591     0]]


LSVC down on 0:
[[8352 4559]
 [  63  513]]


KN up on 0:
[[12827    69]
 [  576    15]]


KN down on 0:
[[12849    62]
 [  565    11]]






RFC up on 1:
[[12898    22]
 [  534    33]]


RFC down on 1:
[[12821    20]
 [  600    46]]


LSVC up on 1:
[[ 2538 10382]
 [    0   567]]


LSVC down on 1:
[[12841     0]
 [  646     0]]


KN up on 1:
[[12879    41]
 [  547    20]]


KN down on 1:
[[12799    42]
 [  627    19]]






RFC up on 2:
[[12885    18]
 [  518    66]]


RFC down on 2:
[[12858    15]
 [  581    33]]


LSVC up on 2:
[[12884    19]
 [  524    60]]


LSVC down on 2:
[[12718   155]
 [  472   142]]


KN up on 2:
[[12829    74]
 [  544    40]]


KN down on 2:
[[12785    88]
 [  571    43]]






RFC up on 3:
[[12898     5]
 [  563    21]]


RFC down on 3:
[[12872     5]
 [  600    10]]


LSVC up on 3:
[[12903     0]
 [  584     0]]


LSVC down on 3:
[[12877     0]
 [  610     0]]


KN up on 3:
[[12832    71]
 [  545    39]]


KN down on 3:
[[12806    71]
 [  575    35]]




In [30]:
from sklearn.model_selection  import GridSearchCV

In [33]:
X = wdf.drop(targets, axis=1)
y = wdf["target_up-2"]

X_train, X_test, y_train, y_test = train_test_split(X, y)

rf_grid = {
    "n_estimators": [100,300, 600],
    "criterion": ["gini", "entropy"],
    "max_depth": [20, 50, None],
    "min_samples_split": [2, 4, 6],
    "max_features": ["auto", 0.2, 0.5, 0.9],
    "bootstrap": [True, False]
}

clf = RandomForestClassifier(n_jobs=2)
up_clf = GridSearchCV(estimator=clf, scoring='roc_auc', param_grid=rf_grid,
                    cv=5, return_train_score=True, n_jobs=2, verbose=2)
up_clf.fit(X_train, y_train)

preds = up_clf.predict(X_test)
print(confusion_matrix(y_test, preds))


Fitting 5 folds for each of 432 candidates, totalling 2160 fits
[[12854    32]
 [  514    87]]


In [35]:
up_clf.best_params_

{'bootstrap': True,
 'criterion': 'entropy',
 'max_depth': 50,
 'max_features': 0.5,
 'min_samples_split': 6,
 'n_estimators': 600}

In [93]:
wdf[wdf["target_up-0"]==1][["target_up-0", "target_up-1", "target_up-2"]]

Unnamed: 0_level_0,target_up-0,target_up-1,target_up-2
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-04 13:00:00,1.0,0.0,0.0
2013-01-07 06:00:00,1.0,0.0,0.0
2013-01-08 16:00:00,1.0,0.0,0.0
2013-01-09 04:00:00,1.0,0.0,0.0
2013-01-09 16:00:00,1.0,0.0,0.0
...,...,...,...
2021-09-09 23:00:00,1.0,0.0,0.0
2021-09-13 09:00:00,1.0,0.0,0.0
2021-09-13 21:00:00,1.0,0.0,0.0
2021-09-15 05:00:00,1.0,0.0,0.0


In [94]:
df.columns

Index(['open', 'high', 'low', 'close', 'bodysize', 'shadow', 'wick', 'singles',
       'bull_doubles', 'bear_doubles', 'morningstars', 'whitesoldiers',
       'insideup', 'eveningstars', 'blackcrows', 'insidedown', 'level_1',
       'level_2', 'level_3', 'level_4', 'level_5', 'pp', 'standard_r1',
       'standard_s1', 'fib_r1', 'fib_s1', 'sma5', 'ema5', 'sma10', 'ema10',
       'bb_center', 'bb_upper', 'bb_lower', 'kelt_center', 'kelt_upper',
       'kelt_lower', 'macd_macd', 'macd_signal', 'rsi', 'psar_psar',
       'stoch_stochastic', 'stoch_signal', 'adx_adx', 'adx_pdmi', 'adx_ndmi',
       'williams', 'direction_down', 'direction_up', 'curr_trend_down',
       'curr_trend_up', 'psar_direction_bear', 'psar_direction_bull'],
      dtype='object')

In [None]:
def shape_df(in_df):
    df["bottom_bollinger"] = df["bb_lower"] - df["close"]
    df["top_bollinger"] = df["close"] - df["bb_upper"]
    df["mid_bollinger"] = df["close"] - df["bb_center"]
    df["bollinger_range"] = df["bb_upper"] - df["bb_center"]
    df["keltner_range"] = df["kelt_upper"] - df["kelt_center"]
    df["bottom_keltner"] = df["kelt_lower"] - df["close"]
    df["top_keltner"] = df["close"] - df["kelt_upper"]
    df["above_sma10"] = df["close"] - df["sma10"]
    df["above_sma5"] = df["close"] - df["sma10"]
    df["above_ema5"] = df["close"] - df["ema5"]
    df["above_ema10"] = df["close"] - df["ema10"]
    df["ema5_sma10"] = df["ema5"] - df["sma10"]
    df["sma5_sma10"] = df["sma5"] - df["sma10"]
    df["stoch_over_signal"] = df["stoch_stochastic"] - df["stoch_signal"]
    df["macd_over_signal"] = df["macd_macd"] - df["macd_signal"]
    df["adx_over"] = df["adx_pdmi"] - df["adx_ndmi"]
    
    
    levels = [
        "level_1",
        "level_2",
        "level_3",
        "level_4",
        "level_5",
        "pp",
        "standard_r1",
        "standard_s1",
        "fib_r1",
        "fib_s1"
    ]
    
    bull_conditions = []
    bear_conditions = []
    
    for level in levels:
        bull_conditions.append(
            (df['close'] > df[level]) & (df['open'] < df[level])
        )
        bear_conditions.append(
            (df['close'] < df[level]) & (df['open'] > df[level])
        )
        
    df["bull_levels"] = np.select(bull_conditions, np.ones(len(levels)), default=0)
    df["bear_levels"] = np.select(bear_conditions, np.ones(len(levels)), default=0)
    
    drop_cols = [
        "bb_center",
        "bb_upper",
        "bb_lower",
        "kelt_center",
        "kelt_upper",
        "kelt_lower",
        "ema5",
        "sma5",
        "sma10",
        "ema10",
        "psar_psar",
        "curr_trend_down",
        "curr_trend_up",
        "level_1",
        "level_2",
        "level_3",
        "level_4",
        "level_5",
        "pp",
        "standard_r1",
        "standard_s1",
        "fib_r1",
        "fib_s1",
        "adx_pdmi",
        "adx_ndmi",
        "open",
        "high",
        "low",
        "close"
    ]
    
    df["bodysize"] = df["bodysize"].abs()
    

In [95]:
dturns = rex.get_smoothed_turns(df)

In [96]:
dturns = clean_turns(dturns,df)

In [97]:
kdf = turn_target(df, dturns)

In [178]:
X = kdf.drop(targets, axis=1)
y = kdf["target_up-2"]

In [179]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, shuffle=False)

In [181]:
neigh = KNeighborsClassifier(weights='distance', algorithm='ball_tree', n_neighbors=5, leaf_size=300, n_jobs=3, p=1)
clf = neigh.fit(X_train, y_train)
pred = clf.predict(X_test)
cm = confusion_matrix(y_test, pred)
cm

array([[10393,    80],
       [  420,    42]], dtype=int64)

In [182]:
from sklearn.model_selection import GridSearchCV

In [183]:
neigh = KNeighborsClassifier()

In [None]:
grid = {
    "weights": ["uniform", "distance"],
    "n_neighbors": [1,2,3,4,5],
    "algorithm": ['ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [30, 50, 100, 200, 500, 1000],
    'p': [1, 2]
}

up_clf = GridSearchCV(estimator=neigh, scoring='roc_auc', param_grid=grid,
                    cv=5, return_train_score=True, n_jobs=3, verbose=2)
up_clf.fit(X_train, y_train)

Fitting 5 folds for each of 360 candidates, totalling 1800 fits


In [None]:
preds = up_clf.predict(X_test)
cm = confusion_matrix(y_test, preds)
cm