In [218]:
import pandas as pd
import numpy as np
import pandas_ta as ta
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from binance.client import Client
from binance import ThreadedWebsocketManager
from sklearn.preprocessing import LabelEncoder
from os import getenv

In [219]:
api_key = getenv('api_key')
secret_key = getenv('api_secret')

In [220]:
# Encode the target labels
lr_trend = LabelEncoder()
lr_action = LabelEncoder()
lr_amount = LabelEncoder()

In [221]:
client = Client(api_key = api_key, api_secret= secret_key, tld= 'com')

In [222]:
client

<binance.client.Client at 0x2946d667b20>

In [223]:
symbol = 'BTCUSDT'
interval = Client.KLINE_INTERVAL_5MINUTE
start_date = '2023-01-01'
end_date = '2024-11-02'

In [224]:
data = client.get_historical_klines(symbol, interval, start_str=start_date,end_str=end_date)

In [225]:
df = pd.DataFrame(data, columns=[
    'timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time',
    'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume',
    'taker_buy_quote_asset_volume', 'ignore'
])

In [226]:
#Data Manipulation
# Convert the 'timestamp' column to a readable date format
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)

# Select relevant columns
df = df[['open', 'high', 'low', 'close', 'volume']]

# Convert data types to numeric for analysis
df = df.apply(pd.to_numeric)

In [227]:
dataset = df

In [228]:
# Assuming 'data' is your DataFrame with 'close' and 'volume' columns
dataset['RSI'] = ta.rsi(dataset['close'], length=14)
# Adding the MA calculations
dataset['EMA_5'] = dataset['close'].ewm(span=5, adjust=False).mean()
dataset['EMA_30'] = dataset['close'].ewm(span=30, adjust=False).mean()
dataset['EMA_90'] = dataset['close'].ewm(span=90, adjust=False).mean()
#Volume 
dataset['volume_sma_3'] = dataset['volume'].rolling(window=3).mean()  # 3-day SMA for volume
# Calculate Bollinger Bands
bbands = ta.bbands(dataset['close'], length=20, std=2)

In [229]:
#Adding the limits of the Bollinger Bands
dataset['BB_upper'] = bbands['BBU_20_2.0']
dataset['BB_middle'] = bbands['BBM_20_2.0']
dataset['BB_lower'] = bbands['BBL_20_2.0']

In [230]:
# Step 2: Determine trend based on EMA alignment
def determine_trend(row):
    if (row['EMA_5'] > row['EMA_30'] > row['EMA_90']) & (row['close'] > row['BB_middle']) & (row['RSI'] < 70) & (row['volume'] > row['volume_sma_3']):
        return 'strong_upward'
    elif (row['EMA_5'] < row['EMA_30'] < row['EMA_90']) & (row['close'] < row['BB_middle']) & (row['RSI'] > 30):
        return 'strong_downward'
    elif (row['EMA_5'] > row['EMA_30'] < row['EMA_90']) & (row['RSI'] > 30) & (row['close'] < row['BB_middle']) & (row['volume'] < row['volume_sma_3']):
        return 'downward'
    elif (row['EMA_5'] < row['EMA_30'] > row['EMA_90']) & (row['RSI'] < 70) & (row['close'] > row['BB_lower']) & (row['volume'] > row['volume_sma_3']):
        return 'upward'
    else:
        return 'sideways'

In [231]:
dataset['trend'] = dataset.apply(determine_trend, axis=1)

In [232]:
# Step 3: Define actions and amounts based on the trend
def determine_action_amount(trend):
    if trend == 'strong_upward':
        return 'buy', 'all'
    elif trend == 'upward':
        return 'buy', 'half'
    elif trend == 'sideways':
        return 'hold', 'half'
    elif trend == 'downward':
        return 'sell', 'half'
    elif trend == 'strong_downward':
        return 'sell', 'all'
    else:
        return 'hold', 'none'

In [233]:
dataset[['action', 'amount']] = dataset['trend'].apply(lambda x: pd.Series(determine_action_amount(x)))

In [234]:
# Target columns (shifted to predict the next action)
dataset['target_trend'] = dataset['trend'].shift(-1)
dataset['target_action'] = dataset['action'].shift(-1)
dataset['target_amount'] = dataset['amount'].shift(-1)

In [235]:
dataset

Unnamed: 0_level_0,open,high,low,close,volume,RSI,EMA_5,EMA_30,EMA_90,volume_sma_3,BB_upper,BB_middle,BB_lower,trend,action,amount,target_trend,target_action,target_amount
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-01-01 00:00:00,16541.77,16544.76,16527.51,16535.38,486.60903,,16535.380000,16535.380000,16535.380000,,,,,sideways,hold,half,sideways,hold,half
2023-01-01 00:05:00,16534.91,16540.43,16522.55,16526.67,391.19043,,16532.476667,16534.818065,16535.188571,,,,,sideways,hold,half,sideways,hold,half
2023-01-01 00:10:00,16526.67,16530.87,16520.00,16520.69,294.73889,,16528.547778,16533.906576,16534.869922,390.846117,,,,sideways,hold,half,sideways,hold,half
2023-01-01 00:15:00,16521.26,16537.73,16517.72,16534.94,481.18777,,16530.678519,16533.973249,16534.871462,389.039030,,,,sideways,hold,half,sideways,hold,half
2023-01-01 00:20:00,16534.94,16540.66,16532.33,16535.54,309.53189,,16532.299012,16534.074330,16534.886155,361.819517,,,,sideways,hold,half,sideways,hold,half
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-01 23:40:00,69485.12,69515.16,69479.90,69489.83,27.34582,57.351248,69499.354209,69397.646767,69503.901125,20.330643,69614.752757,69435.4080,69256.063243,sideways,hold,half,sideways,hold,half
2024-11-01 23:45:00,69489.83,69499.99,69472.00,69484.00,36.44869,56.777375,69494.236140,69403.217944,69503.463738,26.405213,69619.073497,69441.0075,69262.941503,sideways,hold,half,sideways,hold,half
2024-11-01 23:50:00,69484.01,69560.00,69478.00,69540.00,33.85343,60.831645,69509.490760,69412.042592,69504.266732,32.549313,69630.007115,69447.5075,69265.007885,sideways,hold,half,sideways,hold,half
2024-11-01 23:55:00,69539.99,69546.00,69496.00,69496.01,10.94950,56.359467,69504.997173,69417.459845,69504.085266,27.083873,69632.878937,69454.7080,69276.537063,sideways,hold,half,sideways,hold,half


In [236]:
# Drop rows with NaN values in the target
df2 = dataset.dropna().copy()

In [237]:
# Fit and transform the labels
df2['target_trend'] = lr_trend.fit_transform(df2['target_trend'])
df2['target_action'] = lr_action.fit_transform(df2['target_action'])
df2['target_amount'] = lr_amount.fit_transform(df2['target_amount'])

In [238]:
# Define features 
X = df2[['EMA_5', 'EMA_30', 'EMA_90', 'volume', 'RSI', 'BB_upper', 'BB_middle', 'BB_lower', 'volume_sma_3']]

In [239]:
# Define targets
y_trend = df2['target_trend']
y_action = df2['target_action']
y_amount = df2['target_amount']

In [240]:
# Split the data
X_train_trend, X_test_trend, y_train_trend, y_test_trend = train_test_split(X, y_trend, test_size=0.2, random_state=101)
X_train_action, X_test_action, y_train_action, y_test_action = train_test_split(X, y_action, test_size=0.2, random_state=101)
X_train_amount, X_test_amount, y_train_amount, y_test_amount = train_test_split(X, y_amount, test_size=0.2, random_state=101)

In [259]:
# Without Optimization
#Trend Model
model = LogisticRegression()
model.fit(X_train_trend, y_train_trend)
y_pred_trend = model.predict(X_test_trend)
print("accuracy: "+ str(accuracy_score(y_test_trend, y_pred_trend)))
print("precision: "+ str(precision_score(y_test_trend, y_pred_trend, average="weighted")))
print("recall: " + str(recall_score(y_test_trend, y_pred_trend, average="weighted")))
print( "f1_score: " + str(f1_score(y_test_trend, y_pred_trend, average="weighted")))
print(confusion_matrix(y_test_trend, y_pred_trend))

accuracy: 0.6165670367207515
precision: 0.5412508572522897
recall: 0.6165670367207515
f1_score: 0.5333879011182161
[[    0   311     2     0     0]
 [    0 21657  1187   513     0]
 [    0  6686  2030     0     0]
 [    0  3992     0   139     0]
 [    0  2116     0    10     0]]


In [260]:
#Action Model
model_action = LogisticRegression()
model_action.fit(X_train_action, y_train_action)
y_pred_action = model_action.predict(X_test_action)
print("accuracy: "+ str(accuracy_score(y_test_action, y_pred_action)))
print("precision: "+ str(precision_score(y_test_action, y_pred_action, average="weighted")))
print("recall: " + str(recall_score(y_test_action, y_pred_action, average="weighted")))
print( "f1_score: " + str(f1_score(y_test_action, y_pred_action, average="weighted")))
print(confusion_matrix(y_test_action, y_pred_action))

accuracy: 0.6153507750433455
precision: 0.5646016397831869
recall: 0.6153507750433455
f1_score: 0.5449784394700956
[[  261  5996     0]
 [  770 21156  1431]
 [    0  6667  2362]]


In [261]:
#Amount Model
model_amount = LogisticRegression()
model_amount.fit(X_train_amount, y_train_amount)
y_pred_amount = model_amount.predict(X_test_amount)
print("accuracy: "+ str(accuracy_score(y_test_amount, y_pred_amount)))
print("precision: "+ str(precision_score(y_test_amount, y_pred_amount, average="weighted")))
print("recall: " + str(recall_score(y_test_amount, y_pred_amount, average="weighted")))
print( "f1_score: " + str(f1_score(y_test_amount, y_pred_amount, average="weighted")))
print(confusion_matrix(y_test_amount, y_pred_amount))

accuracy: 0.6868773128380302
precision: 0.6786632885567833
recall: 0.6868773128380302
f1_score: 0.6053119932471733
[[ 1552 11295]
 [  805 24991]]


In [256]:
import warnings
warnings.filterwarnings('ignore')

In [262]:
# With Parameter Optimization
param_grid = {"C": [0.1, 1, 10], "max_iter": [500,1000],"penalty": ["l2", "elasticnet"]}
#grid_search = GridSearchCV(LogisticRegression(max_iter=500), param_grid, cv=5)

In [263]:
# Grid search for each target model
def optimize_model(X_train, y_train):
    model = LogisticRegression()
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5,scoring='accuracy')
    grid_search.fit(X_train, y_train)
    print(f"Best parameters found: {grid_search.best_params_}")
    return grid_search.best_estimator_

In [264]:
# Optimize Trend Model
print("Optimizing trend model...")
model_trend = optimize_model(X_train_trend, y_train_trend)

Optimizing trend model...
Best parameters found: {'C': 0.1, 'max_iter': 1000, 'penalty': 'l2'}


In [265]:
# Optimize Action Model
print("Optimizing action model...")
model_action = optimize_model(X_train_action, y_train_action)

Optimizing action model...
Best parameters found: {'C': 0.1, 'max_iter': 500, 'penalty': 'l2'}


In [266]:
# Optimize Amount Model
print("Optimizing amount model...")
model_amount = optimize_model(X_train_amount, y_train_amount)

Optimizing amount model...
Best parameters found: {'C': 10, 'max_iter': 500, 'penalty': 'l2'}


In [267]:
#Trend Model
y_pred_optimized_trend = model_trend.predict(X_test_trend)
print("accuracy: "+ str(accuracy_score(y_test_trend, y_pred_optimized_trend)))
print("precision: "+ str(precision_score(y_test_trend, y_pred_optimized_trend, average="weighted")))
print("recall: " + str(recall_score(y_test_trend, y_pred_optimized_trend, average="weighted")))
print( "f1_score: " + str(f1_score(y_test_trend, y_pred_optimized_trend, average="weighted")))
print(confusion_matrix(y_test_trend, y_pred_optimized_trend))

accuracy: 0.656858939523329
precision: 0.586042448136812
recall: 0.656858939523329
f1_score: 0.5886966894042013
[[    0   310     3     0     0]
 [    0 21360  1735   211    51]
 [    0  4773  3943     0     0]
 [    0  4059     1    71     0]
 [    0  2020    97     0     9]]


In [268]:
#Action Model
y_pred_optimized_action = model_action.predict(X_test_action)
print("accuracy: "+ str(accuracy_score(y_test_action, y_pred_optimized_action)))
print("precision: "+ str(precision_score(y_test_action, y_pred_optimized_action, average="weighted")))
print("recall: " + str(recall_score(y_test_action, y_pred_optimized_action, average="weighted")))
print( "f1_score: " + str(f1_score(y_test_action, y_pred_optimized_action, average="weighted")))
print(confusion_matrix(y_test_action, y_pred_optimized_action))

accuracy: 0.652899619594752
precision: 0.5939979886903722
recall: 0.652899619594752
f1_score: 0.5840824369899223
[[   78  6155    24]
 [  242 21486  1629]
 [    0  5363  3666]]


In [269]:
#Amount Model
y_pred_optimized_amount = model_amount.predict(X_test_amount)
print("accuracy: "+ str(accuracy_score(y_test_amount, y_pred_optimized_amount)))
print("precision: "+ str(precision_score(y_test_amount, y_pred_optimized_amount, average="weighted")))
print("recall: " + str(recall_score(y_test_amount, y_pred_optimized_amount, average="weighted")))
print( "f1_score: " + str(f1_score(y_test_amount, y_pred_optimized_amount, average="weighted")))
print(confusion_matrix(y_test_amount, y_pred_optimized_amount))

accuracy: 0.68390135341459
precision: 0.673508161459408
recall: 0.68390135341459
f1_score: 0.5979590645810814
[[ 1380 11467]
 [  748 25048]]
