In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import pandas_ta as ta

# sklearn imports
from sklearn.model_selection import (train_test_split, RandomizedSearchCV, TimeSeriesSplit)
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline

# metrics
from sklearn.metrics import RocCurveDisplay, ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, roc_curve, auc, precision_score, recall_score, f1_score

# import base models
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# import meta model
from xgboost import XGBClassifier

from boruta import BorutaPy
import quantstats as qs

In [17]:
def load_and_prepare_data(data):
    """
    Loads BTC 1m data initializes additional columns for analysis.
    
    """
    df = pd.read_csv(data, index_col=0)

    if not isinstance(df.index, pd.DatatimeIndex):
        df.index = pd.to_datetime(df.index)

    df['avg_price'] = (df['high'] + df['low'] + df['close']) / 3
    df['avg_price_volm'] = df['avg_price'] * df['volume']

    return df

In [18]:
def momentum(df):
    """
    Calculates momentum for price and volume from 1m data, and their respective changes over specified periods.
    
    """
    for period in [5, 10, 15, 30, 45]:

        # price momentum
        df[f'pm{period}'] = df['close'].pct_change(periods=period)
        df[f'delta_pm{period}'] = df[f'pm{period}'] - df[f'pm{period}'].shift(1)

        # log-volume momentum
        df[f'lv{period}'] = np.log(df['volume'] + 1).diff(periods=period)
        df[f'delta_lv{period}'] = df[f'lv{period}'].diff(1)

In [20]:
def resample_data(df):
    """
    Resamples the dataframe from 1m intervals to 1hour intervals, applying aggregation functions.
    
    """
    ohlc_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last'}
    df_1h = df.resample('1h').agg(ohlc_dict)
    df_1h['VWAP_1h'] = df.resample('1h').apply(
        lambda x: (x['avg_price'] * x['avg_price_volm']).sum() / x['avg_price_volm'].sum()
    )

    return df_1h