In [9]:
import os
import pandas as pd
from datetime import datetime, date, timedelta
from dateutil.relativedelta import relativedelta
from massive import RESTClient

client = RESTClient(api_key="Tk5LfseWp1DK94gCsNc89JRBvJk1YFJt")
TICKER = "SPY"
ADJUSTED = True

today = date.today()
anchors = {
    "4y": today - relativedelta(years=4),
    "2y": today - relativedelta(years=2),
    "1y": today - relativedelta(years=1),
    "6m": today - relativedelta(months=6),
    "3m": today - relativedelta(months=3),
    "1m": today - relativedelta(months=1),
    "1d": today,
}

def nearest_trading_bar(symbol: str, target_day: date, window_days: int = 7):
    """Fetch daily bars around target_day and return the bar on/just before it."""
    start = (target_day - relativedelta(days=window_days)).isoformat()
    end   = (target_day + relativedelta(days=window_days)).isoformat()

    # Use keyword 'from_' because 'from' is reserved
    bars = list(client.list_aggs(
        ticker=symbol,
        multiplier=1,
        timespan="day",
        from_=start,
        to=end,
        adjusted=ADJUSTED,
        sort="asc",
        limit=50000,  # safe max; see docs note on 'limit'
    ))
    if not bars:
        return None

    target_ts_ms = int(datetime.combine(target_day, datetime.min.time()).timestamp() * 1000)

    # Agg has attributes: timestamp/open/high/low/close/volume
    prior_or_equal = [b for b in bars if getattr(b, "timestamp", 0) <= target_ts_ms]
    return prior_or_equal[-1] if prior_or_equal else None

rows = []
for label, d in anchors.items():
    bar = nearest_trading_bar(TICKER, d)
    if bar:
        actual_day = datetime.utcfromtimestamp(bar.timestamp / 1000).date().isoformat()
        rows.append({
            "anchor": label,
            "requested_date": d.isoformat(),
            "actual_trading_date": actual_day,
            "open": bar.open,
            "high": bar.high,
            "low":  bar.low,
            "close": bar.close,
            "volume": bar.volume,
        })
    else:
        rows.append({
            "anchor": label,
            "requested_date": d.isoformat(),
            "actual_trading_date": None,
            "open": None, "high": None, "low": None, "close": None, "volume": None,
        })

df = pd.DataFrame(rows).sort_values("requested_date")

# Save next to this script's parent folder
out_file = os.path.join(os.getcwd(), "benchmark_history.csv")
df.to_csv(out_file, index=False)
print(f"Saved benchmark history to {out_file}")
print(df)


Saved benchmark history to /Users/brendantorok/Documents/Schooling/BU MET/CS767 - Advanced Machine Learning/Assignments/Assignment 2/benchmark_history.csv
  anchor requested_date actual_trading_date    open      high       low  \
0     4y     2021-11-08          2021-11-08  469.70  470.2300  468.2031   
1     2y     2023-11-08          2023-11-08  437.55  438.0900  434.8700   
2     1y     2024-11-08          2024-11-08  596.17  599.6400  596.1650   
3     6m     2025-05-08          2025-05-08  565.24  570.3100  561.7000   
4     3m     2025-08-08          2025-08-08  634.06  637.6450  633.7400   
5     1m     2025-10-08          2025-10-08  670.25  673.2056  669.4200   
6     1d     2025-11-08          2025-11-07  667.91  671.0800  661.2050   

    close       volume  
0  468.93   50192592.0  
1  437.25   61731027.0  
2  598.19   46422893.0  
3  565.06   65130787.0  
4  637.18   64051626.0  
5  673.11   60702241.0  
6  670.97  100549762.0  
