Day 1 — Build the Benchmark

Today’s goal:
- Define the benchmark tickers
- Define the benchmark weights (sum = 1)
- Download 15 years of historical prices
- Save benchmark data for Day 2

In [3]:
!python -m pip install --upgrade yfinance pandas pyarrow



In [4]:
import yfinance as yf
import pandas as pd
import json
from pathlib import Path

In [5]:
benchmark_tickers = [
    "VT",   # International Equity
    "BNDW",   # International Bond
]

benchmark_weights = {
    "VT": 0.88,
    "BNDW": 0.12
}

In [6]:
def get_fund_name(ticker):
    """
    Fetch the official longName or shortName from yfinance.
    Returns the ticker itself if unavailable.
    """
    try:
        info = yf.Ticker(ticker).info
        return info.get("longName") or info.get("shortName") or ticker
    except Exception:
        return ticker

In [7]:
benchmark_names = {t: get_fund_name(t) for t in benchmark_tickers}
benchmark_names

{'VT': 'Vanguard Total World Stock Index Fund ETF Shares',
 'BNDW': 'Vanguard Total World Bond ETF'}

In [8]:
from datetime import date
from dateutil.relativedelta import relativedelta

def get_price_data(tickers):
    """
    Download 15 years of daily price data (Close) ending today.
    Returns a clean, forward/backward-filled DataFrame with sorted index.
    """
    end = date.today()
    start = end - relativedelta(years=15)

    data = yf.download(
        tickers=tickers,
        start=start,
        end=end,
        progress=False
    )["Close"]

    return data.ffill().bfill().sort_index()

In [9]:
benchmark_prices = get_price_data(benchmark_tickers)

  data = yf.download(


In [10]:
display_df = benchmark_prices.copy()
display_df.columns = [
    f"{t} ({benchmark_names[t]})" for t in benchmark_prices.columns
]

display(display_df.head())
display(display_df.tail())
print("Shape:", benchmark_prices.shape)

Unnamed: 0_level_0,BNDW (Vanguard Total World Bond ETF),VT (Vanguard Total World Stock Index Fund ETF Shares)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-11-15,60.844543,33.206192
2010-11-16,60.844543,32.533569
2010-11-17,60.844543,32.625603
2010-11-18,60.844543,33.291153
2010-11-19,60.844543,33.291153


Unnamed: 0_level_0,BNDW (Vanguard Total World Bond ETF),VT (Vanguard Total World Stock Index Fund ETF Shares)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-11-10,69.769997,140.770004
2025-11-11,69.910004,141.149994
2025-11-12,69.879997,141.490005
2025-11-13,69.709999,139.289993
2025-11-14,69.589996,139.240005


Shape: (3774, 2)


In [11]:
# Create data directory if needed
data_dir = Path("..") / "data"
data_dir.mkdir(exist_ok=True)

# --- Save benchmark tickers ---
with open(data_dir / "benchmark_tickers.json", "w") as f:
    json.dump(benchmark_tickers, f, indent=4)

# --- Save benchmark weights ---
with open(data_dir / "benchmark_weights.json", "w") as f:
    json.dump(benchmark_weights, f, indent=4)

# --- Save benchmark names (sanity check results) ---
with open(data_dir / "benchmark_names.json", "w") as f:
    json.dump(benchmark_names, f, indent=4)

# --- Save prices in CSV + Parquet ---
benchmark_prices.to_csv(data_dir / "benchmark_prices.csv")
benchmark_prices.to_parquet(data_dir / "benchmark_prices.parquet")

print("✅ Benchmark artifacts saved:")
print(" - benchmark_tickers.json")
print(" - benchmark_weights.json")
print(" - benchmark_names.json")
print(" - benchmark_prices.csv")
print(" - benchmark_prices.parquet")


✅ Benchmark artifacts saved:
 - benchmark_tickers.json
 - benchmark_weights.json
 - benchmark_names.json
 - benchmark_prices.csv
 - benchmark_prices.parquet
