In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score

In [None]:
df = pd.read_csv("btc_usdt_2h_raw.csv", parse_dates=["timestamp"], index_col="timestamp")

In [None]:
df_feat = df.copy()

df_feat['return_1'] = df_feat['close'].pct_change()

df_feat["vol_30"] = df_feat['return_1'].rolling(30).std()

In [None]:
df_feat["target"] = np.nan

In [None]:
# Here it means price should not drop below 0.6 precent of actual value and minimum required upside is more than 1 
# like if volatility is 1 then minimum return is 1*1 = 1 so if base amount is 100 then it should be 101 to trade.
# and if it dropped below 99.4 in volatility 1 then no trade

#worst_allowed_drop = -0.6 * vol_30
#minimum_required_rise = 1.0 * vol_30

In [None]:
for t in range(30, len(df_feat)-5):

    entry_price = df_feat["close"].iloc[t]
    vol = df_feat["vol_30"].iloc[t]

    future_high = df_feat["high"].iloc[t+1:t+6].max()
    future_low = df_feat["low"].iloc[t+1:t+6].min()

    best_rise = (future_high - entry_price)/ entry_price
    worst_drop = (future_low - entry_price)/ entry_price

    if worst_drop < -0.6 * vol:
        df_feat.iloc[t, df_feat.columns.get_loc("target")] = 0
    elif best_rise >= 1 * vol:
        df_feat.iloc[t, df_feat.columns.get_loc("target")] = 1
    else:
        df_feat.iloc[t, df_feat.columns.get_loc("target")] = 0

In [None]:
# Features


df_feat["momentum_10"] = (
    df_feat["close"] -
    df_feat["close"].rolling(10).mean()
) / df_feat["close"]

df_feat["distance_from_mean_30"] = (
    df_feat["close"] -
    df_feat["close"].rolling(30).mean()
) / df_feat["close"]


low_10 = df_feat["low"].rolling(10).min()
high_10 = df_feat["high"].rolling(10).max()

df_feat["range_position_10"] = (
    (df_feat["close"] - low_10) /
    (high_10 - low_10)
)


In [None]:
df_feat[
    ["return_1", "vol_30", "momentum_10",
     "distance_from_mean_30", "range_position_10"]
].describe()


In [None]:
df_feat = df_feat.dropna()

In [None]:
df_feat.isnull().sum().sum()

In [None]:
# we lost around ~30 rows after dropping rows having nan
df_feat.shape[0], df.shape[0]

In [None]:
n = len(df_feat)

train_end = int(n * 0.6)
val_end   = int(n * 0.8)

df_train = df_feat.iloc[:train_end]
df_val   = df_feat.iloc[train_end:val_end]
df_test  = df_feat.iloc[val_end:]

In [None]:
df_train.target.mean(), df_val.target.mean(), df_test.target.mean()

In [None]:
def evaluate(features):

    X_train = df_train[features]
    y_train = df_train["target"]

    X_val = df_val[features]
    y_val = df_val["target"]

    X_test = df_test[features]
    y_test = df_test["target"]


    model = DecisionTreeClassifier(max_depth=3,
                                   random_state=1)
    model.fit(X_train, y_train)

    val_pred = model.predict_proba(X_val)[:,1]
    test_pred = model.predict_proba(X_test)[:,1]

    val_score = roc_auc_score(y_val, val_pred)
    test_score = roc_auc_score(y_test, test_pred)

    return val_score, test_score



In [None]:
evaluate(['return_1', 'vol_30', 'momentum_10'])

In [None]:
evaluate(['return_1', 'vol_30', 'momentum_10', 'distance_from_mean_30'])

# remove this feature

In [None]:
evaluate(['return_1', 'vol_30', 'momentum_10', 'range_position_10'])

# add this feature

In [None]:
# Final Model:

features = ['return_1', 'vol_30', 'momentum_10', 'range_position_10']

X_train = df_train[features]
y_train = df_train["target"]

X_val = df_val[features]
y_val = df_val["target"]

X_test = df_test[features]
y_test = df_test["target"]


model = DecisionTreeClassifier(max_depth=3,
                                   random_state=1)
model.fit(X_train, y_train)

val_pred = model.predict_proba(X_val)[:,1]
test_pred = model.predict_proba(X_test)[:,1]

val_score = roc_auc_score(y_val, val_pred)
test_score = roc_auc_score(y_test, test_pred)

In [None]:
# Now we will find roc auc of high vol and low vol in df_val

In [None]:
mid_vol_30 = df_val["vol_30"].median()

In [None]:
low_vol_mask  = df_val["vol_30"] <= mid_vol_30
high_vol_mask = df_val["vol_30"] >  mid_vol_30

In [None]:
y_val_prob = model.predict_proba(X_val)[:, 1]

In [None]:
low_vol_auc = roc_auc_score(
    y_val[low_vol_mask],
    y_val_prob[low_vol_mask]
)

high_vol_auc = roc_auc_score(
    y_val[high_vol_mask],
    y_val_prob[high_vol_mask]
)

print("Low vol ROC:", low_vol_auc)
print("High vol ROC:", high_vol_auc)

It means our model works slightly better in higher volatility markets than calm markets.