# Import Libraries

In [None]:
#Load the required libraries

import pandas as pd
import numpy as np
from datetime import datetime

import seaborn as sns
import matplotlib.pyplot as plt
# import plotly.express as px
# import plotly.graph_objects as go
plt.style.use('seaborn-v0_8-darkgrid')


from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score


# from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor


# Import Data

In [None]:
data_BTC = pd.read_csv("../data/BTCUSDT_final_clean.csv")

In [None]:
data_BTC.head()

Here's a breakdown of each feature:

- **open_time**: This represents the opening time 

- **open**: This refers to the opening price of the trading instrument at the given period.

- **high**: This represents the highest price reached by the trading instrument during the specified period.

- **low**: This denotes the lowest price reached by the trading instrument during the specified period.

- **close**: This indicates the closing price of the trading instrument at the given period.

- **volume**: This refers to the total volume, typically the quantity of the trading instrument, traded during the specified period.

- **quote_volume**: This refers to the total volume in terms of the quote asset. The quote asset is the asset used to determine the value of the trading instrument.

- **count**: This represents the number of trades that occurred during the specified period.

- **taker_buy_volume**: This denotes the volume of the quote asset bought by takers (market takers) during the specified period.

- **taker_buy_quote_volume**: This represents the volume of the base asset bought by takers (market takers) during the specified period.

In [None]:
# Renaming the columns to match the name in the previous notebook with bitstamp data so that I do not have to change the whole code
data_BTC.rename(columns = {'open_time':'date', 'volume':'Volume'}, inplace = True)

In [None]:
data_BTC.head()

In [None]:
data_BTC.info()

In [None]:
data_BTC.head()

# Datetime Conversion

In [None]:
# Converting the Date column from object to datetime
data_BTC[["date"]] = data_BTC[["date"]].apply(pd.to_datetime)

In [None]:
data_BTC.info()

# Modeling

In [None]:
data_BTC.shape

In [None]:
# Train/Test Split

split_date = "2022-09-01"

train_split = data_BTC['date'] <= split_date
test_split = data_BTC['date'] > split_date

train_rf = data_BTC[train_split]
test_rf = data_BTC[test_split]

columns = ['open', 'high', 'low', 'Volume', 'quote_volume', 'taker_buy_volume', 'taker_buy_quote_volume']

X_train, y_train = train_rf[columns], train_rf.close
X_test, y_test = test_rf[columns], test_rf.close

In [None]:
### GridSearchCV

# Parameters
num_estimators = [120, 130]
learn_rates = [0.07, 0.06]
max_depths = [4, 3]
min_samples_leaf = [1,2]

# Model
regressor = GradientBoostingRegressor(loss='huber')


# Param Grid
param_grid = {
              'n_estimators': num_estimators,
              'learning_rate': learn_rates,
              'max_depth': max_depths,
              'min_samples_leaf': min_samples_leaf,
              }

# Model Fit
grid_search = GridSearchCV(
                           regressor,
                           param_grid,
                           cv=5,
                           )

grid_search.fit(X_train, y_train)

In [None]:
print(f"Model Best Score : {grid_search.best_score_}")
print(f"Model Best Estimator : {grid_search.best_estimator_}")
print(f"Model Best Parameters : {grid_search.best_params_}")

In [None]:
best_rf_model = grid_search.best_estimator_

In [None]:
# Plot Actual vs Prediction

train_rf = train_rf.copy() # This avoids a warning

train_rf['Predicted_close'] = best_rf_model.predict(X_train)

train_rf[['close','Predicted_close']].plot(figsize=(15, 5));

In [None]:
test_rf = test_rf.copy() # This avoids a warning

test_rf['Forecast_GBR'] = best_rf_model.predict(X_test)

overall_data = pd.concat([train_rf, test_rf], sort=False)

# Plot whole timeframe

overall_data[['close', 'Forecast_GBR']].plot(figsize=(15, 5));

In [None]:
# Results GBR Model

test_mae_rf = round(mean_absolute_error(test_rf['close'], test_rf['Forecast_GBR']))
test_rmse_rf = round(np.sqrt(mean_squared_error(test_rf['close'], test_rf['Forecast_GBR'])))
test_r2_rf = r2_score(test_rf['close'], test_rf['Forecast_GBR'])

print(f"test MAE GBR : {test_mae_rf}")
print(f"test RMSE GBR : {test_rmse_rf}")
print(f"test R2 GBR : {test_r2_rf}")

## Results Modeling

In [None]:
# Overall Results

results = {
    "METRIC": ["MAE", "RMSE", "R2"],
    "GradientBoostingRegressor": [test_mae_rf, test_rmse_rf, test_r2_rf]
}


df_results = pd.DataFrame(results).round(1)
df_results