# DSCI 510 Final Project â€“ Building a Set-and-Forget ETF Strategy for Graduate Students Results

This notebook runs the main steps of my project and shows a few tables and charts.
All code is imported from the python files in the `src` folder.

In [None]:
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

from src.config import RESULTS_DIR, TICKERS, START_DATE
from src.fetch import get_prices
from src.metrics import cagr, ann_vol, max_drawdown, sharpe
from src.compounding import (
    chart_start_early_vs_late,
    chart_early_small_vs_late_big,
    chart_monthly_to_1M,
    chart_monthly_to_1M_multi_returns,
    dca_constant_return,
)
from src.viz import line_chart, risk_return_scatter


In [None]:
prices = get_prices(TICKERS, start=START_DATE, end=None)
prices.head()


In [None]:
rows = []

for ticker in prices.columns:
    s = prices[ticker].dropna()
    row = {
        "Ticker": ticker,
        "CAGR": round(cagr(s), 4),
        "AnnualVolatility": round(ann_vol(s), 4),
        "MaxDD": round(max_drawdown(s), 4),
        "Sharpe(0%)": round(sharpe(s, rf_annual=0.0), 4),
    }
    rows.append(row)

metrics_df = pd.DataFrame(rows)
metrics_df


In [None]:
# rebase each ETF so it starts at 100
rebased = prices.copy()

for col in rebased.columns:
    first_valid = rebased[col].dropna()
    if len(first_valid) == 0:
        continue
    first_price = first_valid.iloc[0]
    rebased[col] = rebased[col] / first_price * 100.0

# growth chart
line_chart(
    rebased,
    title="ETF Growth (rebased to 100)",
    xlab="Date",
    ylab="Index (Start = 100)",
    out_path=RESULTS_DIR / "etf_growth.png",
)

# risk vs return scatter
risk_return_scatter(metrics_df, RESULTS_DIR / "risk_return_scatter.png")

print("Saved etf_growth.png and risk_return_scatter.png in", RESULTS_DIR)


In [None]:
chart_start_early_vs_late(RESULTS_DIR / "early_vs_late_200mo.png")
chart_early_small_vs_late_big(RESULTS_DIR / "early_small_vs_late_big.png")
chart_monthly_to_1M(RESULTS_DIR / "monthly_needed_to_1M.png")
chart_monthly_to_1M_multi_returns(
    RESULTS_DIR / "monthly_to_1M_multi_returns.png",
    returns=(0.07, 0.09, 0.13, 0.15, 0.20),
)

print("Saved compounding charts in", RESULTS_DIR)


In [None]:
dca_tickers = ["QQQ", "SPY", "VIG"]
assumed_returns = {"QQQ": 0.13, "SPY": 0.09, "VIG": 0.07}

dca_df = pd.DataFrame()

for t in dca_tickers:
    r = assumed_returns[t]
    series = dca_constant_return(
        cagr_value=r,
        monthly=200.0,
        start_age=22,
        end_age=65,
    )
    dca_df[t] = series.values

plt.figure(figsize=(10, 6))

for col in dca_df.columns:
    plt.plot(range(len(dca_df)), dca_df[col], label=col)

ax = plt.gca()

start_age = 22
end_age = 65
ages = list(range(start_age, end_age + 1, 5))
age_months = [(age - start_age) * 12 for age in ages]
ax.set_xticks(age_months)
ax.set_xticklabels([str(age) for age in ages])

max_val = dca_df.max().max()
step_dollars = 1_000_000
upper = step_dollars * np.ceil(max_val / step_dollars)

ax.set_ylim(0, upper)
ticks = np.arange(0, upper + step_dollars, step_dollars)
ax.set_yticks(ticks)
ax.yaxis.set_major_formatter(
    mtick.FuncFormatter(lambda x, _: f"${x / 1_000_000:.0f}M")
)

plt.title(
    "Investing $200/Month in a Single ETF (DCA)\n"
    "Modeled from Age 22 to 65 using ETF Historical CAGR"
)
plt.xlabel("Age")
plt.ylabel("Portfolio Value ($ in millions)")
plt.legend()
plt.grid(True, linestyle=":", linewidth=0.5)
plt.tight_layout()
plt.savefig(RESULTS_DIR / "dca_200_single_etf.png")
plt.show()

print("Saved dca_200_single_etf.png in", RESULTS_DIR)


## Notes

All charts and CSV files used in my presentation are saved in the `results` folder.
This notebook shows how they are created from the raw ETF prices and from the simple compounding simulations.
