# **<div align="center">Price Models </div>**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import os

In [2]:
sys.path.append(os.path.abspath(".."))
from src.train_price_models import (
    run_random_forest, run_xgboost,
    run_arima, run_sarima, run_arimax, run_sarimax, run_prophet, 
    run_ruptures,
    evaluate_model
)

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


## **Load dataset**

In [3]:
train = pd.read_csv("../data/processed/binance_train_price.csv", index_col=0, parse_dates=['date.1'])
test = pd.read_csv("../data/processed/binance_test_price.csv", index_col=0, parse_dates=['date.1'])

In [4]:
# Reset index to get date as a column
train_reset = train.reset_index().copy()
test_reset  = test.reset_index().copy()

# Assume the first column after reset_index is the date
date_col = train_reset.columns[0]

# Extract date features
for df in [train_reset, test_reset]:
    df['day_of_week'] = df[date_col].dt.dayofweek
    df['month']       = df[date_col].dt.month
    df['quarter']     = df[date_col].dt.quarter

# Drop original date column (we only use derived features)
train_reset = train_reset.drop(columns=[date_col])
test_reset  = test_reset.drop(columns=[date_col])

# Define target
y_train = train_reset['target_price']
y_test  = test_reset['target_price']

# Define features
X_train = train_reset.drop(columns=['target_price'])
X_test  = test_reset.drop(columns=['target_price'])


In [5]:
X_train = train.drop(columns='target_price')
y_train = train['target_price']

X_test = test.drop(columns='target_price')
y_test = test['target_price']

## **Train and evaluate models**

#### Random Forest Regressor

In [6]:
y_pred_rf, rf_model = run_random_forest(X_train,X_test, y_train)
evaluate_model(y_test, y_pred_rf, rf_model)

{'MAE': 17.687859564872763,
 'RMSE': np.float64(19.266498898794563),
 'R2': -3.1958902386026553}

#### XGBoost Regressor

In [7]:
y_pred_xgb, xgb_model = run_xgboost(X_train,X_test, y_train)
evaluate_model(y_test, y_pred_xgb, xgb_model)

{'MAE': 17.37667023813417,
 'RMSE': np.float64(19.00505027498867),
 'R2': -3.0827854695789494}

#### ARIMA

In [8]:
y_pred_arima, arima_model = run_arima(train['target_price'], len(y_test))
evaluate_model(y_test, y_pred_arima, arima_model)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


{'MAE': 13.161363749302568,
 'RMSE': np.float64(15.832522801222524),
 'R2': -1.8334709157215912}

#### SARIMA

In [9]:
y_pred_sarima, sarima_model = run_sarima(train['target_price'], len(y_test))
evaluate_model(y_test, y_pred_sarima, sarima_model)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


{'MAE': 33.388534210435566,
 'RMSE': np.float64(36.1448906244491),
 'R2': -13.767682433967325}

#### ARIMAX

In [10]:
y_pred_arimax, arimax_model = run_arimax(train['target_price'], X_train, X_test, order = (1, 1, 1))
evaluate_model(y_test, y_pred_arimax, arimax_model)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


{'MAE': 47.19846809401746,
 'RMSE': np.float64(58.741715856724106),
 'R2': -38.00423561690015}

In [11]:
y_pred_sarimax, sarimax_model = run_sarimax(train['target_price'], X_train, X_test, order = (1, 1, 1))
evaluate_model(y_test, y_pred_sarimax, sarimax_model)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


{'MAE': 36.091831740733355,
 'RMSE': np.float64(42.21270659344775),
 'R2': -19.14210766836442}

#### Prophet

In [12]:
prophet_train = train.reset_index().copy()
prophet_train = prophet_train.rename(columns={
    prophet_train.columns[0]: 'ds',   
    'target_price': 'y'
})

horizon = len(y_test)
forecast_df, prophet_model = run_prophet(prophet_train[['ds','y']], horizon=horizon)

# The last 'horizon' rows correspond to the forecast for the test set
y_pred_prophet = forecast_df['yhat'].iloc[-horizon:].values

evaluate_model(y_test.values, y_pred_prophet, model_name="Prophet")

16:40:00 - cmdstanpy - INFO - Chain [1] start processing
16:40:01 - cmdstanpy - INFO - Chain [1] done processing


{'MAE': 217.85355993800877,
 'RMSE': np.float64(423.1479065560931),
 'R2': -2022.9645808109374}