### Import, mount, and configure

In [None]:
import os
import sys
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn
import sklearn
import statsmodels
import tensorflow as tf
import tqdm
from google.colab import drive

In [None]:
drive.mount('/content/drive')

DRIVE_ROOT = Path('/content/drive/MyDrive')

PROJECT_ROOT = DRIVE_ROOT / 'projects/btc'
PROJECT_DATA_DIR = PROJECT_ROOT / 'data'
PROJECT_CLEANED_DIR = PROJECT_DATA_DIR / 'cleaned'

BTC_CLEANED_CSV_FILENAME = 'btc_274_cleaned.csv'

BTC_CLEANED_FILEPATH = PROJECT_CLEANED_DIR / BTC_CLEANED_CSV_FILENAME

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
if 'google.colab' in sys.modules:
    %pip install -q -U keras_tuner

import keras_tuner as kt

In [None]:
%pip install finta



In [None]:
project_root = os.path.abspath(PROJECT_ROOT)

if project_root not in sys.path:
  sys.path.append(project_root)

print(f'Project root "{project_root}" added to sys.path.')

Project root "/content/drive/MyDrive/projects/btc" added to sys.path.


In [None]:
from src.analysis import *
from src.data_loader import *
from src.data_splitter import *
from src.model import *
from src.preprocessing import *
from src.utils import *
from src.visualization import *

### Load the cleaned dataset, and resample it

In [None]:
# A missing date(2025-03-15) exists in the test dataset.
X_btc_cleaned = load_btc_dataset(BTC_CLEANED_FILEPATH, 'date', True)
btc_resampled = resample_btc_data(X_btc_cleaned)


Loading dataset from: btc_274_cleaned.csv...
Dataset loaded successfully.

--- Resampling BTC Data to Multiple Timeframes ---
Resampling to hourly frequency...
Resampling to daily frequency...
Resampling to weekly frequency...
Resampling to monthly frequency...
--- BTC Data Resampling Complete ---


### Split the resampled datasets into a training set, a validation set, and a test set of respective timeframe, and take returns

In [None]:
for X_btc_resampled in btc_resampled.values():
    X_btc_resampled = calculate_returns(X_btc_resampled, 'close')

In [None]:
X_btc_daily_train, X_btc_daily_valid, X_btc_daily_test = split_btc_dataset(btc_resampled['daily'], 'Day')
X_btc_weekly_train, X_btc_weekly_valid, X_btc_weekly_test = split_btc_dataset(btc_resampled['weekly'], 'Week')


--- Splitting BTC Dataset to Training, Validation, and Test Set (Day) ---
--- BTC Dataset Splitting Complete ---

--- Splitting BTC Dataset to Training, Validation, and Test Set (Week) ---
--- BTC Dataset Splitting Complete ---


### Run the ADF test on datasets at multiple frequencies to verify stationarity and seasonality

In [None]:
run_adf_test(X_btc_daily_train, 'returns_close', 'BTC Daily Closing Returns')
run_adf_test(X_btc_weekly_train, 'returns_close', 'BTC Weekly Closing Returns')

= ADF Test: BTC Daily Closing Returns =
ADF Statistics: -10.1710
p-value: 0.0000
Critical Values:
	1%:-3.4320
	5%:-2.8623
	10%:-2.5672
Conclusion: The p-value is less than or equal to 0.05. The data is likely stationary and seasonal.

= ADF Test: BTC Weekly Closing Returns =
ADF Statistics: -11.0168
p-value: 0.0000
Critical Values:
	1%:-3.4421
	5%:-2.8667
	10%:-2.5695
Conclusion: The p-value is less than or equal to 0.05. The data is likely stationary and seasonal.



### Fine-tune hyperparameters and utilize techniques.
1.   Architectural Hyperparameters
  *   Number of hidden layers (Conv1D and RNN)
      * Deep networks have a much higher parameter efficiency than shallow ones.
  *   Number of neurons per RNN layer
      * It's generally more effective to add more layers than to increase the number of neurons to improve performance.
  *   Number of filters per Conv1D layer
  *   Kernel size (Conv1D)
  *   Stride (Conv1D)
  *   Padding (Conv1D)
  *   Type of RNN cell (LSTM/GRU)
  *   Activation function (Conv1D, RNN and Dense)
      * A poor choice of activation functions potentially creates unstable gradients.

2.   Training & Optimization Hyperparameters
  *   Type of optimizer (e.g., Adam, Nadam, etc.)
      * A faster optimizer can hugely boost traning speed.
  *   Learning rate
      * The most important hyperparameter. The optimal learning rate is generally about half of the maximum learning rate (i.e., the learning rate above which the training algorithm diverges .).
  *   Batch size
      * Batch size can have a significant impact on a model's performance and training time.

3.   Optimization & Regularization Techniques
  *   Kernel Initialization (e.g., Glorot/He)
  *   Normalization (Layer normalization)
      * Batch normalization cannot be used as efficiently with RNNs as with deep feedforward networks.
      * Layer normalization can prevent unstable gradients during training.
  *   Learning Rate Scheduling (e.g., ReduceLROnPlateau)
  *   Gradient Clipping
      * Gradient clipping mitigates the exploding gradients by clipping the gradients during backpropagation.
  *   L1/L2 Regularization
  *   Dropout (Recurrent Dropout for RNN)
  *   Early Stopping (to prevent overfitting and save time)

### LSTM Model Evaluation and Final Forecast (1-Month)

In [None]:
tf.random.set_seed(42)

In [None]:
FEATURE_COLS = ['open', 'high', 'low', 'close', 'volume']
TARGET_COL = 'close'
INPUT_WINDOW = 270
TARGET_WINDOW = 30
FORECAST_HORIZON = 30

In [None]:
naive_metrics, naive_pred = run_naive_model(X_btc_daily_valid, 'close', 30, 'Days', 'Validation')

= Naive Model Evaluation on Validation Set (Horizon: 30 Days) =
--- Skipping Naive Model Fitting ---

--- Evaluating Naive Model ---
- Mean Absolute Percentage Error (MAPE): 10.5643%
- Directional Accuracy (DA): 54.8611%

--- Generating Final Naive Forecast ---
- Forecast for 2024-03-15: $51475.00


In [None]:
lstm_metrics, lstm_pred = run_lstm_model(X_btc_daily_train, X_btc_daily_valid, X_btc_daily_test,
                                         FEATURE_COLS, TARGET_COL, FORECAST_HORIZON,
                                         INPUT_WINDOW, TARGET_WINDOW,
                                         'Days', 'Validation',
                                         naive_metrics, 'direct')

Trial 15 Complete [00h 01m 12s]
val_mae: 0.3603381812572479

Best val_mae So Far: 0.36025407910346985
Total elapsed time: 05h 01m 02s

LSTM hyperparameter tuning complete:
- n_conv_layers: 2
- kernel_size: 5
- n_rnn_layers: 1
- use_l2: True
- dropout_rate: 0.0
- learning_rate: 0.005218086982564764
- optimizer: nadam
- clipnorm: 0.8
- filters_0: 192
- units_0: 192
- filters_1: 64
- units_1: 224
- filters_2: 96
- units_2: 32
- l2_rate: 0.0025124079357885637

--- Fitting LSTM Model ---
Epoch 1/100
113/113 - 5s - 46ms/step - loss: 0.7517 - mae: 0.6574 - val_loss: 0.1596 - val_mae: 0.3612 - learning_rate: 0.0052
Epoch 2/100
113/113 - 2s - 18ms/step - loss: 0.3390 - mae: 0.6298 - val_loss: 0.1333 - val_mae: 0.3607 - learning_rate: 0.0052
Epoch 3/100
113/113 - 2s - 18ms/step - loss: 0.3328 - mae: 0.6297 - val_loss: 0.1339 - val_mae: 0.3622 - learning_rate: 0.0052
Epoch 4/100
113/113 - 2s - 18ms/step - loss: 0.3321 - mae: 0.6285 - val_loss: 0.1348 - val_mae: 0.3644 - learning_rate: 0.0052
Epoc



--- Fitting LSTM Model ---
--- Preparing Data for LSTM ---
Epoch 1/100
128/128 - 6s - 44ms/step - loss: 0.7209 - mae: 0.6584 - val_loss: 0.1951 - val_mae: 0.4540 - learning_rate: 0.0052
Epoch 2/100
128/128 - 2s - 17ms/step - loss: 0.3379 - mae: 0.6318 - val_loss: 0.1848 - val_mae: 0.4667 - learning_rate: 0.0052
Epoch 3/100
128/128 - 2s - 17ms/step - loss: 0.3332 - mae: 0.6289 - val_loss: 0.1790 - val_mae: 0.4553 - learning_rate: 0.0052
Epoch 4/100
128/128 - 2s - 17ms/step - loss: 0.3337 - mae: 0.6307 - val_loss: 0.1777 - val_mae: 0.4531 - learning_rate: 0.0052
Epoch 5/100
128/128 - 2s - 18ms/step - loss: 0.3336 - mae: 0.6297 - val_loss: 0.1775 - val_mae: 0.4517 - learning_rate: 0.0052
Epoch 6/100
128/128 - 2s - 18ms/step - loss: 0.3335 - mae: 0.6298 - val_loss: 0.1782 - val_mae: 0.4523 - learning_rate: 0.0052
Epoch 7/100
128/128 - 2s - 18ms/step - loss: 0.3328 - mae: 0.6282 - val_loss: 0.1782 - val_mae: 0.4530 - learning_rate: 0.0052
Epoch 8/100
128/128 - 2s - 17ms/step - loss: 0.3339 



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step
- Forecast for 2025-07-22: $102997.64


In [None]:
naive_metrics, naive_pred = run_naive_model(X_btc_daily_test, 'close', 30, 'Days', 'Test')

= Naive Model Evaluation on Test Set (Horizon: 30 Days) =
--- Skipping Naive Model Fitting ---

--- Evaluating Naive Model ---
- Mean Absolute Percentage Error (MAPE): 9.8882%
- Directional Accuracy (DA): 42.0323%

--- Generating Final Naive Forecast ---
- Forecast for 2025-07-22: $102958.00


### LSTM Model Evaluation and Final Forecast (1-Year)

In [None]:
tf.random.set_seed(42)

In [None]:
naive_metrics, naive_pred = run_naive_model(X_btc_weekly_valid, 'close', 52, 'Weeks', 'Validation')

= Naive Model Evaluation on Validation Set (Horizon: 52 Weeks) =
--- Skipping Naive Model Fitting ---

--- Evaluating Naive Model ---
- Mean Absolute Percentage Error (MAPE): 50.6933%
- Directional Accuracy (DA): 50.0000%

--- Generating Final Naive Forecast ---
- Forecast for 2025-02-09: $48295.00


In [None]:
FEATURE_COLS = ['open', 'high', 'low', 'close', 'volume']
TARGET_COL = 'close'
INPUT_WINDOW = 52
TARGET_WINDOW = 1
FORECAST_HORIZON = 52

In [None]:
lstm_metrics, lstm_pred = run_lstm_model(X_btc_weekly_train, X_btc_weekly_valid, X_btc_weekly_test,
                                         FEATURE_COLS, TARGET_COL, FORECAST_HORIZON,
                                         INPUT_WINDOW, TARGET_WINDOW,
                                         'Weeks', 'Validation',
                                         naive_metrics, 'iterative')

Trial 15 Complete [00h 00m 11s]
val_mae: 0.3718857765197754

Best val_mae So Far: 0.3718857765197754
Total elapsed time: 00h 15m 33s

LSTM hyperparameter tuning complete:
- n_conv_layers: 2
- kernel_size: 5
- n_rnn_layers: 3
- use_l2: True
- dropout_rate: 0.30000000000000004
- learning_rate: 0.005117045969850107
- optimizer: adam
- clipnorm: 1.1
- filters_0: 96
- units_0: 96
- filters_1: 192
- units_1: 160
- filters_2: 224
- units_2: 32
- l2_rate: 0.005343634220268847

--- Fitting LSTM Model ---
Epoch 1/100
15/15 - 15s - 977ms/step - loss: 5.4017 - mae: 1.0635 - val_loss: 3.1071 - val_mae: 0.4329 - learning_rate: 0.0051
Epoch 2/100
15/15 - 5s - 308ms/step - loss: 2.4607 - mae: 0.8343 - val_loss: 1.3747 - val_mae: 0.4007 - learning_rate: 0.0051
Epoch 3/100
15/15 - 4s - 300ms/step - loss: 1.3164 - mae: 0.7680 - val_loss: 0.7444 - val_mae: 0.3948 - learning_rate: 0.0051
Epoch 4/100
15/15 - 5s - 306ms/step - loss: 0.8550 - mae: 0.7330 - val_loss: 0.4414 - val_mae: 0.3762 - learning_rate: 0

Walk-Forward Validation: 100%|██████████| 17/17 [01:43<00:00,  6.09s/it]


- Mean Absolute Percentage Error (MAPE): 49.8245%
- Directional Accuracy (DA): 62.5000%

--- Selecting Best LSTM Model ---
- Naive Model Benchmark: MAPE: 50.6933%, DA: 50.0000%

Found 1 candidate model(s) that beat the naive model:
- Model: hyperparameters, MAPE: 49.8245%, DA: 62.5000%

--- Best LSTM Model Chosen ---
- MAPE: 49.8245%
- DA: 62.5000%

Fitting, evaluating, and predicting LSTM model with the best hyperparameters...





--- Fitting LSTM Model ---
--- Preparing Data for LSTM ---
Epoch 1/100
18/18 - 16s - 891ms/step - loss: 5.0671 - mae: 1.0087 - val_loss: 2.5966 - val_mae: 0.4159 - learning_rate: 0.0051
Epoch 2/100
18/18 - 5s - 295ms/step - loss: 1.9669 - mae: 0.7459 - val_loss: 1.0324 - val_mae: 0.3915 - learning_rate: 0.0051
Epoch 3/100
18/18 - 5s - 291ms/step - loss: 1.0041 - mae: 0.7136 - val_loss: 0.5220 - val_mae: 0.3578 - learning_rate: 0.0051
Epoch 4/100
18/18 - 5s - 299ms/step - loss: 0.6765 - mae: 0.7055 - val_loss: 0.3227 - val_mae: 0.3578 - learning_rate: 0.0051
Epoch 5/100
18/18 - 5s - 293ms/step - loss: 0.5370 - mae: 0.7125 - val_loss: 0.2338 - val_mae: 0.3672 - learning_rate: 0.0051
Epoch 6/100
18/18 - 5s - 300ms/step - loss: 0.4551 - mae: 0.6900 - val_loss: 0.1878 - val_mae: 0.3637 - learning_rate: 0.0051
Epoch 7/100
18/18 - 5s - 296ms/step - loss: 0.4383 - mae: 0.7159 - val_loss: 0.1575 - val_mae: 0.3578 - learning_rate: 0.0051
Epoch 8/100
18/18 - 5s - 297ms/step - loss: 0.3795 - mae: 

Walk-Forward Validation: 100%|██████████| 18/18 [01:53<00:00,  6.32s/it]



- Mean Absolute Percentage Error (MAPE): 29.1709%
- Directional Accuracy (DA): 52.9412%

--- Generating Final LSTM Forecast ---
- Forecast for 2025-08-13: $104530.32


In [None]:
naive_metrics, naive_pred = run_naive_model(X_btc_weekly_test, 'close', 52, 'Weeks', 'Test')

= Naive Model Evaluation on Test Set (Horizon: 52 Weeks) =
--- Skipping Naive Model Fitting ---

--- Evaluating Naive Model ---
- Mean Absolute Percentage Error (MAPE): 30.4386%
- Directional Accuracy (DA): 50.0000%

--- Generating Final Naive Forecast ---
- Forecast for 2026-06-21: $102958.00
