

### 1. Install necessary packages



In [None]:
!pip install -U -q PyDrive
!pip install keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.3.5 kt-legacy-1.0.5


## 2. Import necessary packages

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras import optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Conv2D, MaxPooling2D, SimpleRNN
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Flatten
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import os
import time
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from keras.utils import to_categorical
from keras_tuner import RandomSearch
import keras

## 3. If using colab complete these to import datasets from drive.

In [None]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


In [None]:
#https://drive.google.com/file/d/1CjJyTJbyIs3rvtQh1cG0RyoKX_1inXb2/view?usp=drive_link
#https://drive.google.com/file/d/1HObEZNhDxOCCFcVnRQzW9y3w4nd_yiDx/view?usp=drive_link
#https://drive.google.com/file/d/156TEVK1L99FXA7FFfMWle-Y4nvxhfOot/view?usp=drive_link
your_file_all = drive.CreateFile({'id':'1CjJyTJbyIs3rvtQh1cG0RyoKX_1inXb2'})
your_file_ta = drive.CreateFile({'id':'1HObEZNhDxOCCFcVnRQzW9y3w4nd_yiDx'})
your_file_oc = drive.CreateFile({'id':'156TEVK1L99FXA7FFfMWle-Y4nvxhfOot'})

In [None]:
your_file_all.GetContentFile('bitcoin-all-on-chain-and-technical-indicators.csv')
your_file_ta.GetContentFile('bitcoin-all-technical-indicators.csv')
your_file_oc.GetContentFile('bitcoin-all-on-chain.csv')
ta_df = pd.read_csv('bitcoin-all-technical-indicators.csv')
ta_df['timestamp'] = pd.to_datetime(ta_df['timestamp'])
oc_df = pd.read_csv('bitcoin-all-on-chain.csv')
oc_df['timestamp'] = pd.to_datetime(oc_df['timestamp'])
oc_ta_df = pd.read_csv('bitcoin-all-on-chain-and-technical-indicators.csv')
oc_ta_df['timestamp'] = pd.to_datetime(oc_ta_df['timestamp'])
price_only_df = ta_df[['timestamp', 'price']]

## 4. If running locally complete this to import datasets

In [None]:
ta_df = pd.read_csv('bitcoin-all-technical-indicators.csv')
ta_df['timestamp'] = pd.to_datetime(ta_df['timestamp'])
oc_df = pd.read_csv('bitcoin-all-on-chain.csv')
oc_df['timestamp'] = pd.to_datetime(oc_df['timestamp'])
oc_ta_df = pd.read_csv('bitcoin-all-on-chain-and-technical-indicators.csv')
oc_ta_df['timestamp'] = pd.to_datetime(oc_ta_df['timestamp'])
price_only_df = ta_df[['timestamp', 'price']]

## 5. Function to plot two or more timeseries

In [None]:
def plot_values(timestamp, *args):
    import matplotlib

    if len(args) % 2 != 0:
        raise ValueError("Every feature should have a corresponding name")

    # Use a predefined style
    plt.style.use('ggplot')

    # Set figure size
    fig, ax = plt.subplots(figsize=(12,6))

    # Create a colormap that will generate colors
    colormap = matplotlib.colormaps['tab10']

    # Plot data with customized line for each feature
    for i in range(0, len(args), 2):
        feature_name = args[i]
        feature_values = args[i+1]
        color = colormap(i // 2 / (len(args)//2))
        ax.plot(timestamp, feature_values, label=feature_name, color=color, linewidth=1)

    # Set labels with improved readability
    ax.set_xlabel('Date')
    ax.yaxis.set_label_position("right") # This line moves y label to the right
    ax.set_ylabel('Feature Values')
    ax.yaxis.tick_right() # This line moves y axis to the right
    ax.set_title('Features over Time')

    # Format the timestamps and set locator
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # format as year-month-day
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3)) # display a label at the start of every 3rd month

    # Rotate x-axis labels for better visibility
    plt.xticks(rotation=45)

    # Add a grid
    ax.grid(True)

    # Add legend
    ax.legend()

    # Show plot
    plt.show()

## 6. Function to scale the data and split it for training and testing

In [None]:
def data_preperation(df, lookback, future, scale, test_size=0.1):
    # Convert 'Date' column to datetime
    try:
        date_train = pd.to_datetime(df['timestamp'])
        df = df.drop(columns=['timestamp'])
    except:
        date_train = [0]


    # Ensure all data is float type
    df = df.astype(float)

    # Split data into training and testing before scaling
    df_train, df_test = train_test_split(df, test_size=test_size, shuffle=False)

    # Scale data
    df_train_scaled = scale.fit_transform(df_train)
    df_test_scaled = scale.transform(df_test)  # use the scaler fitted on the training data

    # Create the feature and target arrays
    X_train, y_train = [], []
    for i in range(lookback, len(df_train_scaled)-future+1):
        X_train.append(df_train_scaled[i-lookback:i, :])
        y_train.append(df_train_scaled[i+future-1:i+future, 0])

    X_test, y_test = [], []
    for i in range(lookback, len(df_test_scaled)-future+1):
        X_test.append(df_test_scaled[i-lookback:i, :])
        y_test.append(df_test_scaled[i+future-1:i+future, 0])

    # Convert to numpy arrays
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)

    return X_train, X_test, y_train, y_test, date_train

## 7. Function that defines the RNN

In [None]:
def build_rnn_model(hp, input_shape):
    model = keras.models.Sequential(name="rnn")

    model.add(keras.layers.SimpleRNN(units=hp.Int('units_1', min_value=32, max_value=512, step=32), return_sequences=True, input_shape=input_shape))
    model.add(keras.layers.Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(keras.layers.SimpleRNN(units=hp.Int('units_2', min_value=32, max_value=256, step=32), return_sequences=False))
    model.add(keras.layers.Dense(hp.Int('units_3', min_value=10, max_value=100, step=10)))
    model.add(keras.layers.Dense(1))

    adam = optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=0.0001, max_value=0.01, sampling='LOG'))
    model.compile(loss='mean_squared_error', optimizer=adam)

    return model

## 8. Function that defines the Bidirectional RNN

In [None]:
def build_birnn_model(hp, input_shape):
    model = keras.models.Sequential(name="birnn")

    model.add(keras.layers.Bidirectional(keras.layers.SimpleRNN(units=hp.Int('units_1', min_value=32, max_value=512, step=32), return_sequences=True), input_shape=input_shape))
    model.add(keras.layers.Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(keras.layers.Bidirectional(keras.layers.SimpleRNN(units=hp.Int('units_2', min_value=32, max_value=256, step=32), return_sequences=False)))
    model.add(keras.layers.Dense(hp.Int('units_3', min_value=10, max_value=100, step=10)))
    model.add(keras.layers.Dense(1))

    adam = optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=0.0001, max_value=0.01, sampling='LOG'))
    model.compile(loss='mean_squared_error', optimizer=adam)

    return model

## 9. Function that defines the LSTM

In [None]:
def build_lstm_model(hp, input_shape):
    model = keras.models.Sequential(name="lstm")

    model.add(keras.layers.LSTM(units=hp.Int('units_1', min_value=224, max_value=512, step=32), return_sequences=True, input_shape=input_shape))
    model.add(keras.layers.Dropout(0.2)
    model.add(keras.layers.LSTM(units=hp.Int('units_2', min_value=32, max_value=112, step=32), return_sequences=False))
    model.add(keras.layers.Dense(hp.Int('units_3', min_value=10, max_value=100, step=10)))
    model.add(keras.layers.Dense(1))

    adam = optimizers.Adam(learning_rate=0.01)
    model.compile(loss='mean_squared_error', optimizer=adam)

    return model

## 10. Function that defines the Bidirectional LSTM

In [None]:
def build_bilstm_model(hp, input_shape):
    model = keras.models.Sequential(name="bilstm")

    model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=hp.Int('units_1', min_value=224, max_value=512, step=32), return_sequences=True), input_shape=input_shape))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=hp.Int('units_2', min_value=32, max_value=112, step=32), return_sequences=False)))
    model.add(keras.layers.Dense(hp.Int('units_3', min_value=10, max_value=100, step=10)))
    model.add(keras.layers.Dense(1))

    adam = optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=0.0001, max_value=0.01, sampling='LOG'))
    model.compile(loss='mean_squared_error', optimizer=adam)

    return model

## 11. Function for running the hyperperameter search





In [None]:
def run_hyperparameter_search(build_model_function, X_train, y_train):
    tuner = RandomSearch(
        build_model_function,
        objective='val_loss',
        max_trials=15,
        executions_per_trial=3,
        directory='my_dir',
        project_name='helloworld')

    tuner.search_space_summary()

    tuner.search(X_train, y_train, epochs=15, batch_size=32, validation_split=0.1)

    tuner.results_summary()

## 11. Function for running the hyperperameter search on each data set and model


In [None]:
def run_hyperparameter_search_on_datasets_and_models(datasets, models):
    for dataset_name, dataset in datasets.items():
        X_train, X_test, y_train, y_test = dataset
        input_shape = (X_train.shape[1], X_train.shape[2])  # Assuming X_train is 3D (batch_size, timesteps, features)
        for model_name, build_model_function in models.items():
            print(f"Running hyperparameter search for {model_name} model on {dataset_name} dataset...")
            run_hyperparameter_search(lambda hp: build_model_function(hp, input_shape), X_train, y_train)


In [None]:
scale_ta = MinMaxScaler(feature_range=(0,1))
X_train_ta, X_test_ta, y_train_ta, y_test_ta, date_train = data_preperation(ta_df, 60, 1, scale_ta)

scale_oc_ta = MinMaxScaler(feature_range=(0,1))
X_train_oc_ta, X_test_oc_ta, y_train_oc_ta, y_test_oc_ta, date_train = data_preperation(oc_ta_df, 60, 1, scale_oc_ta)



# Define your datasets
datasets = {
    'ta_df': (X_train_ta, X_test_ta, y_train_ta, y_test_ta),
    #'oc_ta_df': (X_train_oc_ta, X_test_oc_ta, y_train_oc_ta, y_test_oc_ta)
}

# Define your models
models = {
    #'rnn': build_rnn_model,
    #'birnn': build_birnn_model,
    #'lstm': build_lstm_model,
    'bilstm': build_bilstm_model
}

# Run the hyperparameter search
run_hyperparameter_search_on_datasets_and_models(datasets, models)

Trial 15 Complete [00h 01m 57s]
val_loss: 0.001811000828941663

Best val_loss So Far: 0.0011915800860151649
Total elapsed time: 00h 30m 04s
Results summary
Results in my_dir/helloworld
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 05 summary
Hyperparameters:
units_1: 480
units_2: 96
units_3: 10
learning_rate: 0.0012625225902271942
Score: 0.0011915800860151649

Trial 00 summary
Hyperparameters:
units_1: 288
units_2: 64
units_3: 20
learning_rate: 0.0017825900024172885
Score: 0.001268486026674509

Trial 06 summary
Hyperparameters:
units_1: 480
units_2: 32
units_3: 40
learning_rate: 0.0035072152287283025
Score: 0.0013616127350057166

Trial 04 summary
Hyperparameters:
units_1: 224
units_2: 64
units_3: 100
learning_rate: 0.004430408635132608
Score: 0.0014001492333287995

Trial 07 summary
Hyperparameters:
units_1: 384
units_2: 96
units_3: 90
learning_rate: 0.00012763229197396264
Score: 0.0015230241309230526

Trial 12 summary
Hyperparameters:
units_1: 384
units_2: 6