### Importing cleaned dataframes 

In [33]:
import pandas as pd
import logging
import sys
import numpy as np
from dataclasses import asdict
from pathlib import Path

sys.path.append(r"C:\Users\giuli\Repositories\intraday-momentum\src")
sys.path.append("/home/corwyn/Black_swan/Intraday_project/intraday-momentum/src")
from classes.metrics.metrics import MetricsCalculator
from classes.backtest.engine import BacktestEngine  
from classes.backtest.default import BacktestDefaults 

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    force=True,   # <-- allow printing in jupyter notebooks
)

logger = logging.getLogger(__name__)


In [34]:
# Load files
"""df_spy = pd.read_pickle("C:\\Users\\giuli\\Repositories\\intraday-momentum\\data\\cleaned\\SPY_1min_20231027_20251027.pkl")  
df_vixx = pd.read_pickle("C:/Users/giuli/Repositories/intraday-momentum/data/cleaned/^VIX_1day_20231027_20251027.pkl")
df_dividends = pd.read_csv("C:\\Users\\giuli\\Repositories\\intraday-momentum\\data\\cleaned\\SPY_dividends_20231027_20250919.csv")
df_spy_daily = pd.read_pickle("C:/Users/giuli/Repositories/intraday-momentum/data/cleaned/SPY_daily_from27-10-2023.pkl")
"""


df_vixx = pd.read_pickle("/home/corwyn/Black_swan/Intraday_project/intraday-momentum/data/cleaned/^VIX_1day_20231027_20251027.pkl")  
df_spy = pd.read_pickle("/home/corwyn/Black_swan/Intraday_project/intraday-momentum/data/cleaned/SPY_1min_20231027_20251027.pkl")
df_dividends = pd.read_csv("/home/corwyn/Black_swan/Intraday_project/intraday-momentum/data/cleaned/SPY_dividends_20231027_20250919.csv")
df_spy_daily = pd.read_pickle("/home/corwyn/Black_swan/Intraday_project/intraday-momentum/data/cleaned/SPY_daily_from27-10-2023.pkl")

# Print information about the dataframes
df_spy.info()
print("\n")
df_vixx.info()
print("\n")
df_dividends.info()
print("\n")
df_spy_daily.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 194105 entries, 2023-10-30 09:30:00 to 2025-10-27 15:59:00
Data columns (total 5 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   volume  194105 non-null  float64
 1   open    194105 non-null  float64
 2   high    194105 non-null  float64
 3   low     194105 non-null  float64
 4   close   194105 non-null  float64
dtypes: float64(5)
memory usage: 8.9 MB


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 500 entries, 2023-10-27 05:00:00+00:00 to 2025-10-24 05:00:00+00:00
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   volume  500 non-null    int64  
 1   open    500 non-null    float64
 2   high    500 non-null    float64
 3   low     500 non-null    float64
 4   close   500 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 23.4 KB


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (t

In [35]:
dfs = {
    "df_spy": df_spy,
    "df_vixx": df_vixx,
    "df_spy_daily": df_spy_daily,
}

for name, df in dfs.items():
    if not isinstance(df.index, pd.DatetimeIndex):
        raise TypeError(f"{name}: index must be a DatetimeIndex.")
    else:
        logger.info(f"{name}: index is a valid DatetimeIndex with timezone={df.index.tz}.")


2025-12-01 18:04:58,888 | INFO | df_spy: index is a valid DatetimeIndex with timezone=None.
2025-12-01 18:04:58,889 | INFO | df_vixx: index is a valid DatetimeIndex with timezone=UTC.
2025-12-01 18:04:58,890 | INFO | df_spy_daily: index is a valid DatetimeIndex with timezone=None.


Two dfs have no timezone, so we need to make sure everything is uniform.

In [36]:
dfs = {
    "df_spy": df_spy,
    "df_vixx": df_vixx,
    "df_spy_daily": df_spy_daily
}

for name, df in dfs.items():
    # Ensure index is DatetimeIndex
    if not isinstance(df.index, pd.DatetimeIndex):
        df.index = pd.to_datetime(df.index)

    # Localize naive timestamps (assuming US/Eastern for intraday and daily SPY; VIX depends)
    if df.index.tz is None:
        if name in ["df_spy", "df_spy_daily"]:
            df.index = df.index.tz_localize("US/Eastern")
        else:  
            df.index = df.index.tz_localize("UTC")  

    # Convert to common timezone UTC
    df.index = df.index.tz_convert("UTC")

    # Assign back to dict
    dfs[name] = df

    # Log result
    if isinstance(df.index, pd.DatetimeIndex):
        logger.info(f"{name}: index is a valid DatetimeIndex (tz={df.index.tz})")
    else:
        logger.warning(f"{name}: index is NOT a DatetimeIndex (type={type(df.index)})")

#reassign to original variables
df_spy = dfs["df_spy"]
df_vixx = dfs["df_vixx"]
df_spy_daily = dfs["df_spy_daily"]


2025-12-01 18:04:58,915 | INFO | df_spy: index is a valid DatetimeIndex (tz=UTC)
2025-12-01 18:04:58,916 | INFO | df_vixx: index is a valid DatetimeIndex (tz=UTC)
2025-12-01 18:04:58,917 | INFO | df_spy_daily: index is a valid DatetimeIndex (tz=UTC)


### Add key variables

In [37]:
df_spy[500:510]


Unnamed: 0_level_0,volume,open,high,low,close
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-10-31 15:20:00+00:00,114037.0,416.035,416.07,415.83,415.88
2023-10-31 15:21:00+00:00,113127.0,415.88,415.99,415.72,415.8
2023-10-31 15:22:00+00:00,133653.0,415.8,415.94,415.765,415.91
2023-10-31 15:23:00+00:00,73109.0,415.91,415.91,415.735,415.77
2023-10-31 15:24:00+00:00,168774.0,415.78,415.79,415.6002,415.75
2023-10-31 15:25:00+00:00,128031.0,415.76,416.06,415.68,415.98
2023-10-31 15:26:00+00:00,136299.0,415.98,416.33,415.98,416.3
2023-10-31 15:27:00+00:00,102300.0,416.32,416.35,416.13,416.27
2023-10-31 15:28:00+00:00,83196.0,416.27,416.31,416.1006,416.13
2023-10-31 15:29:00+00:00,81919.0,416.13,416.15,415.65,415.71


In [38]:
# Ensure df_spy index is DatetimeIndex
if not isinstance(df_spy.index, pd.DatetimeIndex):
    df_spy.index = pd.to_datetime(df_spy.index)

# Add 'day' column for dividends merge
df_spy["day"] = df_spy.index.date

project_root = Path().resolve().parents[0]  # adjust if needed
data_path = project_root / "data" / "cleaned" / "SPY_dividends_20231027_20250919.csv"

dividends = pd.read_csv(data_path)

dividends["caldt"] = pd.to_datetime(dividends["caldt"]).dt.date

In [41]:
df_spy

Unnamed: 0_level_0,volume,open,high,low,close,day
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-10-30 13:30:00+00:00,630285.0,413.560,413.940,413.530,413.770,2023-10-30
2023-10-30 13:31:00+00:00,322290.0,413.780,414.010,413.750,413.882,2023-10-30
2023-10-30 13:32:00+00:00,455364.0,413.910,414.210,413.845,414.140,2023-10-30
2023-10-30 13:33:00+00:00,269190.0,414.130,414.240,414.050,414.205,2023-10-30
2023-10-30 13:34:00+00:00,330914.0,414.205,414.320,414.205,414.270,2023-10-30
...,...,...,...,...,...,...
2025-10-27 19:55:00+00:00,826675.0,685.490,685.520,685.280,685.395,2025-10-27
2025-10-27 19:56:00+00:00,670327.0,685.390,685.540,685.380,685.460,2025-10-27
2025-10-27 19:57:00+00:00,832993.0,685.460,685.460,685.345,685.415,2025-10-27
2025-10-27 19:58:00+00:00,942046.0,685.420,685.425,685.330,685.340,2025-10-27


In [43]:
# Compute metrics
mc = MetricsCalculator()
df_spy, df_spy_daily= mc.from_clean_df(df_spy)
#mc._validate_input(df_spy_daily)

mc.compute_intraday_cum_vwap(df_spy)
mc.compute_move_open(df_spy)
mc.compute_daily_returns_and_vol(df_spy)
mc.compute_minute_features(df_spy)  # requires DatetimeIndex
mc.merge_dividends(df_spy, dividends)
mc.quality_check(df_spy, df_spy_daily)

mc.logger.info("All SPY metrics successfully computed.")

df_spy.head()


2025-12-01 18:09:16,270 | INFO | Starting computation of market microstructure metrics...
2025-12-01 18:09:16,892 | INFO | Running quality checks...
2025-12-01 18:09:16,908 | INFO | NaN summary:
log_returns    1
RV             0
BV             0
vwap           0
price          0
day            0
dtype: int64
2025-12-01 18:09:16,910 | INFO | df_all_days shape: (194105, 12)
2025-12-01 18:09:16,911 | INFO | df_daily_profiles shape: (450, 5)
2025-12-01 18:09:17,170 | INFO | Metrics saved to data/processed/df_and_metrics.pkl
2025-12-01 18:09:19,201 | INFO | Running quality checks...
2025-12-01 18:09:19,219 | INFO | NaN summary:
log_returns    1
RV             0
BV             0
vwap           0
price          0
day            0
dtype: int64
2025-12-01 18:09:19,221 | INFO | df_all_days shape: (194105, 20)
2025-12-01 18:09:19,221 | INFO | df_daily_profiles shape: (450, 5)
2025-12-01 18:09:19,222 | INFO | All SPY metrics successfully computed.


Unnamed: 0_level_0,volume,open,high,low,close,day,log_returns,RV,BV,price,vwap,minute_of_day,hlc,move_open,spy_dvol,spy_ret,min_from_open,move_open_rolling_mean,sigma_open,dividend
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-10-30 13:30:00+00:00,630285.0,413.56,413.94,413.53,413.77,2023-10-30,,0.006868,3.1e-05,413.746667,413.746667,241,413.746667,0.000508,,,241.0,,,0.0
2023-10-30 13:31:00+00:00,322290.0,413.78,414.01,413.75,413.882,2023-10-30,0.000271,0.006868,3.1e-05,413.880667,413.792004,242,413.880667,0.000779,,,242.0,,,0.0
2023-10-30 13:32:00+00:00,455364.0,413.91,414.21,413.845,414.14,2023-10-30,0.000623,0.006868,3.1e-05,414.065,413.880298,243,414.065,0.001402,,,243.0,,,0.0
2023-10-30 13:33:00+00:00,269190.0,414.13,414.24,414.05,414.205,2023-10-30,0.000157,0.006868,3.1e-05,414.165,413.925994,244,414.165,0.00156,,,244.0,,,0.0
2023-10-30 13:34:00+00:00,330914.0,414.205,414.32,414.205,414.27,2023-10-30,0.000157,0.006868,3.1e-05,414.265,413.98186,245,414.265,0.001717,,,245.0,,,0.0


In [44]:
df_spy_daily.head()

Unnamed: 0_level_0,vwap,RV,BV,price,log_returns
minute_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
810,573.12957,0.00833,4.5e-05,573.082406,0.000611
811,573.12957,0.00833,4.5e-05,573.065087,3e-06
812,573.12957,0.00833,4.5e-05,573.079475,1.3e-05
813,573.12957,0.00833,4.5e-05,573.071366,-1e-05
814,573.12957,0.00833,4.5e-05,573.073817,9e-06


In [45]:

df_spy.info()
df_spy.describe()
df_spy.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 194105 entries, 2023-10-30 13:30:00+00:00 to 2025-10-27 19:59:00+00:00
Data columns (total 20 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   volume                  194105 non-null  float64
 1   open                    194105 non-null  float64
 2   high                    194105 non-null  float64
 3   low                     194105 non-null  float64
 4   close                   194105 non-null  float64
 5   day                     194105 non-null  object 
 6   log_returns             194104 non-null  float64
 7   RV                      194105 non-null  float64
 8   BV                      194105 non-null  float64
 9   price                   194105 non-null  float64
 10  vwap                    194105 non-null  float64
 11  minute_of_day           194105 non-null  int64  
 12  hlc                     194105 non-null  float64
 13  move_open               1941

volume                       0
open                         0
high                         0
low                          0
close                        0
day                          0
log_returns                  1
RV                           0
BV                           0
price                        0
vwap                         0
minute_of_day                0
hlc                          0
move_open                    0
spy_dvol                  5850
spy_ret                    390
min_from_open                0
move_open_rolling_mean    5400
sigma_open                5850
dividend                     0
dtype: int64

In [46]:

defaults = BacktestDefaults(
    minute_path="",
    daily_path="",
    initial_aum=100_000.0,
    commission_rate=0.0035,
    min_comm_per_order=0.35,
    slippage_bps=0,
    band_mult=1.0,
    trade_freq=30,
    sizing_type="vol_target",
    target_vol=0.02,
    max_leverage=4.0,
)

engine = BacktestEngine()
trade_log_df, daily_pnl_df, equity_curve_df = engine.run_backtest(asdict(defaults))

FileNotFoundError: [Errno 2] No such file or directory: ''