<a href="https://colab.research.google.com/github/availe/Spystat-Engine-Colab/blob/main/StatsQuant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Project setup

!pip install itables > /dev/null 2>&1
!pip install feature-engine > /dev/null 2>&1

from pathlib import Path
import pandas as pd
from itables import init_notebook_mode, show
init_notebook_mode(all_interactive=True)

repo_dir = "/content/Spystat-Engine-Colab"

if Path(repo_dir).exists():
    !git -C Spystat-Engine-Colab pull
else:
    !git clone https://github.com/availe/Spystat-Engine-Colab.git

data_path = Path('/content/Spystat-Engine-Colab/data/')

Cloning into 'Spystat-Engine-Colab'...
remote: Enumerating objects: 109, done.[K
remote: Counting objects: 100% (109/109), done.[K
remote: Compressing objects: 100% (76/76), done.[K
remote: Total 109 (delta 43), reused 86 (delta 24), pack-reused 0 (from 0)[K
Receiving objects: 100% (109/109), 3.00 MiB | 8.72 MiB/s, done.
Resolving deltas: 100% (43/43), done.


In [2]:
#@title Merge daily datasets and generate financial features

from feature_engine.creation import CyclicalFeatures

# Load and merge the datasets
file_names = ['dxy_nyb_us_dollar_index.csv', 'fvy_t_yield_5_years.csv',
              'irx_13_week_t_bill.csv', 'spy.csv',
              'tnx_interest_rate_10_year.csv', 'vix.csv']


dfs = []

for file in file_names:
  temp_df = pd.read_csv(data_path / file)
  temp_df.columns = temp_df.columns.str.lower()
  prefix = file[:-4].lower()
  temp_df.columns = [col if col == 'date' or col.startswith(prefix) else f'{prefix}_{col}' for col in temp_df.columns]
  dfs.append(temp_df)

df = dfs[0]
for temp_df in dfs[1:]:
    df = pd.merge(df, temp_df, on='date', how='inner')



# Calculate technical indicators
def calculate_metrics(df, instrument, window):
    # Moving Average
    ma_col = f'{instrument}_{window}_ma_close'
    df[ma_col] = df[f'{instrument}_close'].rolling(window=window).mean()
    # Oscillator
    df[f'{instrument}_oscillator_{window}'] = ((df[f'{instrument}_close'] - df[ma_col]) / df[ma_col] * 100)
    # Volume Z-Score
    volume_col = f'{instrument}_volume'
    if volume_col in df.columns:
        df[f'{instrument}_volume_z_{window}'] = (
            (df[volume_col] - df[volume_col].rolling(window=window).mean()) /
            df[volume_col].rolling(window=window).std()
        )

calculate_metrics(df, 'spy', 5)
calculate_metrics(df, 'spy', 20)
calculate_metrics(df, 'vix', 20)

# Calculate percentage changes
df['spy_percent_close_change'] = df['spy_close'] - df['spy_close'].shift(1) / df['spy_close'] * 100
df['spy_percent_change:high_to_previous_close'] = (df['spy_high'] - df['spy_close'].shift(1)) / df['spy_close'].shift(1) * 100
df['spy_percent_change:low_to_previous_close'] = (df['spy_low'] - df['spy_close'].shift(1)) / df['spy_close'].shift(1) * 100

# Create labels for prediction
label_cols = [
    'spy_percent_close_change',
    'spy_percent_change:high_to_previous_close',
    'spy_percent_change:low_to_previous_close'
]
df[[f'{col}_label' for col in label_cols]] = df[label_cols].shift(-1)

df.dropna(subset=[f'{col}_label' for col in label_cols], inplace=True)

show(df, lengthMenu=[5, 10, 25, 50, 100], paging=True)
df.to_csv(data_path / 'merged.csv')

Unnamed: 0,date,dxy_nyb_us_dollar_index_open,dxy_nyb_us_dollar_index_high,dxy_nyb_us_dollar_index_low,dxy_nyb_us_dollar_index_close,dxy_nyb_us_dollar_index_adj close,fvy_t_yield_5_years_open,fvy_t_yield_5_years_high,fvy_t_yield_5_years_low,fvy_t_yield_5_years_close,fvy_t_yield_5_years_adj close,irx_13_week_t_bill_open,irx_13_week_t_bill_high,irx_13_week_t_bill_low,irx_13_week_t_bill_close,irx_13_week_t_bill_adj close,spy_open,spy_high,spy_low,spy_close,spy_adj close,spy_volume,tnx_interest_rate_10_year_open,tnx_interest_rate_10_year_high,tnx_interest_rate_10_year_low,tnx_interest_rate_10_year_close,tnx_interest_rate_10_year_adj close,vix_open,vix_high,vix_low,vix_close,vix_adj close,spy_5_ma_close,spy_oscillator_5,spy_volume_z_5,spy_20_ma_close,spy_oscillator_20,spy_volume_z_20,vix_20_ma_close,vix_oscillator_20,spy_percent_close_change,spy_percent_change:high_to_previous_close,spy_percent_change:low_to_previous_close,spy_percent_close_change_label,spy_percent_change:high_to_previous_close_label,spy_percent_change:low_to_previous_close_label
Loading ITables v2.2.2 from the internet... (need help?),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [3]:
#@title Merge with monthly datasets and generate temporal features

cpi = pd.read_csv(data_path / 'cpi.csv')
ppi = pd.read_csv(data_path / 'ppi.csv')

# show(ppi)

# show(df, lengthMenu=[5, 10, 25, 50, 100], paging=True)

In [4]:
#@title testing

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

df.dropna(inplace=True)
show(df, lengthMenu=[5, 10, 25, 50, 100], paging=True)
print(df.shape)

columns_to_drop = [
    'spy_close', 'spy_adj close', 'spy_percent_close_change',
    'spy_percent_change:high_to_previous_close',
    'spy_percent_change:low_to_previous_close',
    'spy_percent_close_change_label',
    'spy_percent_change:high_to_previous_close_label',
    'spy_percent_change:low_to_previous_close_label'
]
X = df.drop(columns=columns_to_drop)
y = df['spy_percent_close_change_label']

pipe = Pipeline([
    ("scale", StandardScaler()),
    ("model", RandomForestRegressor())
])

param_grid = {
    'model__n_estimators': [100],          # Number of trees
    # 'model__max_depth': [None, 10],        # Maximum tree depth
    # 'model__min_samples_split': [2, 5],    # Minimum samples to split
    # 'model__min_samples_leaf': [1, 2],     # Minimum samples per leaf
    # 'model__bootstrap': [True, False]      # Bootstrap samples
}

# Set up GridSearchCV
model = GridSearchCV(pipe, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1, error_score='raise')

# Fit the model


Unnamed: 0,date,dxy_nyb_us_dollar_index_open,dxy_nyb_us_dollar_index_high,dxy_nyb_us_dollar_index_low,dxy_nyb_us_dollar_index_close,dxy_nyb_us_dollar_index_adj close,fvy_t_yield_5_years_open,fvy_t_yield_5_years_high,fvy_t_yield_5_years_low,fvy_t_yield_5_years_close,fvy_t_yield_5_years_adj close,irx_13_week_t_bill_open,irx_13_week_t_bill_high,irx_13_week_t_bill_low,irx_13_week_t_bill_close,irx_13_week_t_bill_adj close,spy_open,spy_high,spy_low,spy_close,spy_adj close,spy_volume,tnx_interest_rate_10_year_open,tnx_interest_rate_10_year_high,tnx_interest_rate_10_year_low,tnx_interest_rate_10_year_close,tnx_interest_rate_10_year_adj close,vix_open,vix_high,vix_low,vix_close,vix_adj close,spy_5_ma_close,spy_oscillator_5,spy_volume_z_5,spy_20_ma_close,spy_oscillator_20,spy_volume_z_20,vix_20_ma_close,vix_oscillator_20,spy_percent_close_change,spy_percent_change:high_to_previous_close,spy_percent_change:low_to_previous_close,spy_percent_close_change_label,spy_percent_change:high_to_previous_close_label,spy_percent_change:low_to_previous_close_label
Loading ITables v2.2.2 from the internet... (need help?),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


(7902, 46)
