### Importing cleaned dataframes 

In [1]:
import pandas as pd
import logging
import sys
import numpy as np
from dataclasses import asdict
from pathlib import Path

sys.path.append(r"C:\Users\giuli\Repositories\intraday-momentum\src")
from classes.metrics.metrics import MetricsCalculator
from classes.backtest.engine import BacktestEngine  
from classes.backtest.default import BacktestDefaults 

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    force=True,   # <-- allow printing in jupyter notebooks
)

logger = logging.getLogger(__name__)


In [2]:
# Load files
df_spy = pd.read_pickle("C:\\Users\\giuli\\Repositories\\intraday-momentum\\data\\cleaned\\SPY_1min_20231027_20251027.pkl")  
df_vixx = pd.read_pickle("C:/Users/giuli/Repositories/intraday-momentum/data/cleaned/^VIX_1day_20231027_20251027.pkl")
df_dividends = pd.read_csv("C:\\Users\\giuli\\Repositories\\intraday-momentum\\data\\cleaned\\SPY_dividends_20231027_20250919.csv")
df_spy_daily = pd.read_pickle("C:/Users/giuli/Repositories/intraday-momentum/data/cleaned/SPY_daily_from27-10-2023.pkl")

# Print information about the dataframes
df_spy.info()
print("\n")
df_vixx.info()
print("\n")
df_dividends.info()
print("\n")
df_spy_daily.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 194105 entries, 2023-10-30 09:30:00 to 2025-10-27 15:59:00
Data columns (total 5 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   volume  194105 non-null  float64
 1   open    194105 non-null  float64
 2   high    194105 non-null  float64
 3   low     194105 non-null  float64
 4   close   194105 non-null  float64
dtypes: float64(5)
memory usage: 8.9 MB


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 500 entries, 2023-10-27 05:00:00+00:00 to 2025-10-24 05:00:00+00:00
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   volume  500 non-null    int64  
 1   open    500 non-null    float64
 2   high    500 non-null    float64
 3   low     500 non-null    float64
 4   close   500 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 23.4 KB


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (t

In [3]:
dfs = {
    "df_spy": df_spy,
    "df_vixx": df_vixx,
    "df_spy_daily": df_spy_daily,
}

for name, df in dfs.items():
    if not isinstance(df.index, pd.DatetimeIndex):
        raise TypeError(f"{name}: index must be a DatetimeIndex.")
    else:
        logger.info(f"{name}: index is a valid DatetimeIndex with timezone={df.index.tz}.")


2025-12-01 13:48:44,758 | INFO | df_spy: index is a valid DatetimeIndex with timezone=None.
2025-12-01 13:48:44,758 | INFO | df_vixx: index is a valid DatetimeIndex with timezone=UTC.
2025-12-01 13:48:44,760 | INFO | df_spy_daily: index is a valid DatetimeIndex with timezone=None.


Two dfs have no timezone, so we need to make sure everything is uniform.

In [4]:
dfs = {
    "df_spy": df_spy,
    "df_vixx": df_vixx,
    "df_spy_daily": df_spy_daily
}

for name, df in dfs.items():
    # Ensure index is DatetimeIndex
    if not isinstance(df.index, pd.DatetimeIndex):
        df.index = pd.to_datetime(df.index)

    # Localize naive timestamps (assuming US/Eastern for intraday and daily SPY; VIX depends)
    if df.index.tz is None:
        if name in ["df_spy", "df_spy_daily"]:
            df.index = df.index.tz_localize("US/Eastern")
        else:  
            df.index = df.index.tz_localize("UTC")  

    # Convert to common timezone UTC
    df.index = df.index.tz_convert("UTC")

    # Assign back to dict
    dfs[name] = df

    # Log result
    if isinstance(df.index, pd.DatetimeIndex):
        logger.info(f"{name}: index is a valid DatetimeIndex (tz={df.index.tz})")
    else:
        logger.warning(f"{name}: index is NOT a DatetimeIndex (type={type(df.index)})")

#reassign to original variables
df_spy = dfs["df_spy"]
df_vixx = dfs["df_vixx"]
df_spy_daily = dfs["df_spy_daily"]


2025-12-01 13:48:44,956 | INFO | df_spy: index is a valid DatetimeIndex (tz=UTC)
2025-12-01 13:48:44,956 | INFO | df_vixx: index is a valid DatetimeIndex (tz=UTC)
2025-12-01 13:48:44,956 | INFO | df_spy_daily: index is a valid DatetimeIndex (tz=UTC)


### Add key variables

In [5]:
df_spy[500:510]


Unnamed: 0_level_0,volume,open,high,low,close
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-10-31 15:20:00+00:00,114037.0,416.035,416.07,415.83,415.88
2023-10-31 15:21:00+00:00,113127.0,415.88,415.99,415.72,415.8
2023-10-31 15:22:00+00:00,133653.0,415.8,415.94,415.765,415.91
2023-10-31 15:23:00+00:00,73109.0,415.91,415.91,415.735,415.77
2023-10-31 15:24:00+00:00,168774.0,415.78,415.79,415.6002,415.75
2023-10-31 15:25:00+00:00,128031.0,415.76,416.06,415.68,415.98
2023-10-31 15:26:00+00:00,136299.0,415.98,416.33,415.98,416.3
2023-10-31 15:27:00+00:00,102300.0,416.32,416.35,416.13,416.27
2023-10-31 15:28:00+00:00,83196.0,416.27,416.31,416.1006,416.13
2023-10-31 15:29:00+00:00,81919.0,416.13,416.15,415.65,415.71


In [6]:
# Ensure df_spy index is DatetimeIndex
if not isinstance(df_spy.index, pd.DatetimeIndex):
    df_spy.index = pd.to_datetime(df_spy.index)

# Add 'day' column for dividends merge
df_spy["day"] = df_spy.index.date

project_root = Path().resolve().parents[0]  # adjust if needed
data_path = project_root / "data" / "cleaned" / "SPY_dividends_20231027_20250919.csv"

dividends = pd.read_csv(data_path)

dividends["caldt"] = pd.to_datetime(dividends["caldt"]).dt.date

In [7]:
# Compute metrics
mc = MetricsCalculator()
mc.from_clean_df(df_spy_daily)
mc._validate_input(df_spy_daily)

mc.compute_intraday_cum_vwap(df_spy)
mc.compute_move_open(df_spy)
mc.compute_daily_returns_and_vol(df_spy)
mc.compute_minute_features(df_spy)  # requires DatetimeIndex
mc.merge_dividends(df_spy, dividends)
mc.quality_check(df_spy, df_spy_daily)

mc.logger.info("All SPY metrics successfully computed.")

df_spy.head()


2025-12-01 13:48:45,062 | INFO | Starting computation of market microstructure metrics...
2025-12-01 13:48:45,556 | ERROR | Error computing metrics.
Traceback (most recent call last):
  File "c:\Users\giuli\Repositories\intraday-momentum\.venv\Lib\site-packages\pandas\core\indexes\base.py", line 3812, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas/_libs/index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 196, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 7096, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Datetime'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\giuli\Repositories\intraday-momentum\src\classes\metrics

RuntimeError: Metric computation failed.

In [8]:

df_spy.info()
df_spy.describe()
df_spy.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 194105 entries, 2023-10-30 13:30:00+00:00 to 2025-10-27 19:59:00+00:00
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   volume  194105 non-null  float64
 1   open    194105 non-null  float64
 2   high    194105 non-null  float64
 3   low     194105 non-null  float64
 4   close   194105 non-null  float64
 5   day     194105 non-null  object 
dtypes: float64(5), object(1)
memory usage: 10.4+ MB


volume    0
open      0
high      0
low       0
close     0
day       0
dtype: int64

In [9]:

defaults = BacktestDefaults(
    minute_path="",
    daily_path="",
    initial_aum=100_000.0,
    commission_rate=0.0035,
    min_comm_per_order=0.35,
    slippage_bps=0,
    band_mult=1.0,
    trade_freq=30,
    sizing_type="vol_target",
    target_vol=0.02,
    max_leverage=4.0,
)

engine = BacktestEngine()
trade_log_df, daily_pnl_df, equity_curve_df = engine.run_backtest(asdict(defaults))

FileNotFoundError: [Errno 2] No such file or directory: ''