In [1]:
!pip install xgboost tensorflow



In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Input
from tensorflow.keras.callbacks import EarlyStopping

In [4]:
# Load dataset with technical analysis features
df_daily = pd.read_csv('../binance_dataset_extract/df_daily.csv')
df_daily

Unnamed: 0,open_time,open,high,low,close,volume,number_of_trades,volatility,log_returns,SMA_20,SMA_50,RSI
0,2020-12-25,0.000009,0.000168,0.000009,0.000093,31915448.00,226447,,,,,
1,2020-12-26,0.000093,0.000100,0.000060,0.000060,16588695.00,93597,,-0.436802,,,
2,2020-12-27,0.000060,0.000062,0.000039,0.000040,18382334.00,83251,,-0.400495,,,
3,2020-12-28,0.000040,0.000047,0.000040,0.000041,9797253.00,42828,,0.020366,,,
4,2020-12-29,0.000041,0.000041,0.000029,0.000032,11666843.00,48704,,-0.240787,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
687,2022-11-12,0.000033,0.000033,0.000029,0.000030,839696.80,9573,1.363364e-06,-0.080348,0.000030,0.00003,50.834724
688,2022-11-13,0.000030,0.000032,0.000030,0.000032,893477.70,10205,1.291739e-06,0.053754,0.000030,0.00003,58.083838
689,2022-11-14,0.000032,0.000032,0.000029,0.000031,640879.06,4708,1.278455e-06,-0.028610,0.000031,0.00003,54.096049
690,2022-11-15,0.000031,0.000032,0.000031,0.000031,297733.00,2425,1.044792e-06,0.012498,0.000031,0.00003,56.175853


In [6]:
# Define features and target variable
X = df_daily[['open', 'high', 'low', 'volume', 'number_of_trades']]
y = df_daily['close']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 1. Random Forest Regressor 

In [7]:
# --- Model 1: Random Forest Regressor ---
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

In [8]:
y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest MSE: {mse_rf}, R2: {r2_rf}')

Random Forest MSE: 5.754517857895185e-12, R2: 0.991543536468198


### Model 2: XGBoost Regressor

In [9]:
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth=6, learning_rate=0.1)
xgb_model.fit(X_train, y_train)

In [10]:
y_pred_xgb = xgb_model.predict(X_test)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print(f'XGBoost MSE: {mse_xgb}, R2: {r2_xgb}')

XGBoost MSE: 6.928955141909784e-10, R2: -0.018233984462495245


### Model 3: LSTM for TimeSeries Prediction

In [13]:
# Scale the features for LSTM
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [14]:
# Create LSTM data with a specific number of timesteps
def create_lstm_data(X, y, timesteps):
    X_lstm, y_lstm = [], []
    for i in range(len(X) - timesteps):
        X_lstm.append(X[i:(i + timesteps), :])
        y_lstm.append(y.iloc[i + timesteps])
    return np.array(X_lstm), np.array(y_lstm)

In [16]:
import numpy as np

# Prepare the data with timesteps
timesteps = 10  # Adjust the number of timesteps as needed
X_lstm, y_lstm = create_lstm_data(X_scaled, y, timesteps)

# Train-test split
X_train_lstm, X_test_lstm, y_train_lstm, y_test_lstm = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42)

In [17]:
# Reshape the data for LSTM
X_train_lstm = np.reshape(X_train_lstm, (X_train_lstm.shape[0], X_train_lstm.shape[1], X_train_lstm.shape[2]))
X_test_lstm = np.reshape(X_test_lstm, (X_test_lstm.shape[0], X_test_lstm.shape[1], X_test_lstm.shape[2]))

In [22]:
from tensorflow.keras.layers import Input

# Define and train the LSTM model
lstm_model = Sequential()
lstm_model.add(Input(shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))  # Use Input layer
lstm_model.add(LSTM(units=50, return_sequences=False))  # LSTM layer
lstm_model.add(Dropout(0.2))  # Dropout layer for regularization
lstm_model.add(Dense(1))  # Output layer

In [25]:
# Compile the model
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [26]:
# Train the model
lstm_model.fit(X_train_lstm, y_train_lstm, 
                epochs=50, 
                batch_size=32, 
                validation_split=0.2, 
                callbacks=[early_stopping], 
                verbose=1)

Epoch 1/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 0.0034 - val_loss: 9.1558e-04
Epoch 2/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0011 - val_loss: 1.1780e-04
Epoch 3/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 7.2159e-04 - val_loss: 2.2542e-05
Epoch 4/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 4.6706e-04 - val_loss: 6.6835e-05
Epoch 5/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 4.3818e-04 - val_loss: 4.0459e-05
Epoch 6/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 3.0910e-04 - val_loss: 9.4130e-06
Epoch 7/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 2.9171e-04 - val_loss: 1.1967e-05
Epoch 8/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 3.0192e-04 - val_loss: 1.1325e-0

<keras.src.callbacks.history.History at 0x7fd7ea0b7340>

In [27]:
# Predictions and evaluation
y_pred_lstm = lstm_model.predict(X_test_lstm)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


In [28]:
# Calculate MSE and R2 Score
mse_lstm = mean_squared_error(y_test_lstm, y_pred_lstm)
r2_lstm = r2_score(y_test_lstm, y_pred_lstm)

print(f'LSTM MSE: {mse_lstm}, R2: {r2_lstm}')

LSTM MSE: 9.118016099574456e-06, R2: -14797.832348613507
