# Vanguard ETF Analyzer (Polygon.io Only)
This notebook loops through 11 Vanguard ETFs from 2023 to 2024 using Polygon.io data, calculates indicators and risk metrics, and saves each dataset to the ETFs folder.


In [48]:
import os
import time
import requests
import numpy as np
import pandas as pd
import pandas_ta as ta
from dotenv import load_dotenv

# Load Polygon API key
load_dotenv()
api_key = os.getenv("POLYGON_API_KEY")

# Create output folder
os.makedirs("ETFs", exist_ok=True)


In [49]:
def calculate_risk_metrics(df):
    returns = df['close'].pct_change().dropna()
    volatility = returns.std()
    sharpe_ratio = returns.mean() / volatility if volatility != 0 else 0
    cumulative = (1 + returns).cumprod()
    peak = cumulative.cummax()
    drawdown = (cumulative - peak) / peak
    max_drawdown = drawdown.min()
    return volatility, sharpe_ratio, max_drawdown


In [50]:
from datetime import datetime

start_date = "2023-01-01"
end_date = datetime.today().strftime("%Y-%m-%d")  # dynamic end date


In [51]:
# Discover all active Vanguard ETFs from Polygon
search_url = f"https://api.polygon.io/v3/reference/tickers?market=stocks&type=ETF&active=true&limit=1000&apiKey={api_key}"
response = requests.get(search_url).json()

# Pull all Vanguard ETFs with their name
vanguard_etfs = {}
for result in response.get("results", []):
    name = result.get("name", "")
    ticker = result.get("ticker", "")
    if "Vanguard" in name:
        vanguard_etfs[ticker] = name  # you can assign asset class manually after if needed

print("Vanguard ETFs Found:", list(vanguard_etfs.keys()))


Vanguard ETFs Found: ['BIV', 'BLV', 'BND', 'BNDW', 'BNDX', 'BSV', 'EDV']


## Step 1: Pull OHLCV Data from Polygon.io
Checks for a cached CSV first. If it doesn't exist, fetches from the API.


In [52]:
from datetime import datetime

start_date = "2023-01-01"
end_date = datetime.today().strftime("%Y-%m-%d")  # dynamic end date

all_dfs = []

for symbol, asset_class in vanguard_etfs.items():
    print(f"\n--- Processing {symbol} ({asset_class}) ---")

    # Validate ticker
    if not validate_ticker_exists(symbol):
        print(f"Skipping {symbol} — not active or not in supported markets.")
        continue

    # Fetch OHLCV from Polygon.io
    url = f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"
    response = requests.get(url)
    # Throttle to avoid 429 errors (free tier = 5 calls/minute)
    if response.status_code == 429:
        print(f"Rate limit hit for {symbol}, sleeping for 60 seconds...")
        time.sleep(60)
        response = requests.get(url)

    # Add small delay to avoid hitting rate limit over time
    time.sleep(13)  # spacing 11 requests evenly across a 3-minute window

    data = response.json().get('results', [])
    
    if not data:
        print(f"Warning: No data returned for {symbol}")
        continue

    df = pd.DataFrame(data)
    df['t'] = pd.to_datetime(df['t'], unit='ms')
    df.rename(columns={'t': 'date', 'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume'}, inplace=True)
    df = df[['date', 'open', 'high', 'low', 'close', 'volume']]
    df.set_index('date', inplace=True)

    # Add technical indicators
    df['rsi'] = ta.rsi(df['close'], length=14)
    df['macd'] = ta.macd(df['close'])['MACD_12_26_9']
    df['sma50'] = ta.sma(df['close'], length=50)
    df['ema20'] = ta.ema(df['close'], length=20)
    df['atr'] = ta.atr(df['high'], df['low'], df['close'])

    # Add target and drop incomplete rows
    df['next_day_return'] = df['close'].pct_change().shift(-1)
    df['target'] = (df['next_day_return'] > 0).astype(int)
    df.dropna(subset=['rsi', 'macd', 'sma50', 'ema20', 'atr', 'next_day_return'], inplace=True)

    # Add risk metrics
    volatility, sharpe, max_dd = calculate_risk_metrics(df)
    df['volatility'] = volatility
    df['sharpe_ratio'] = sharpe
    df['max_drawdown'] = max_dd

    # Simulate sentiment
    np.random.seed(42)
    df['daily_sentiment'] = np.random.normal(loc=0.02, scale=0.1, size=len(df))

    # Add symbol and asset class
    df['symbol'] = symbol
    df['asset_class'] = asset_class

    # Save individual CSV
    df.to_csv(f"ETFs/{symbol}_enriched.csv")
    all_dfs.append(df)

    print(f"{symbol} data range: {df.index.min().date()} to {df.index.max().date()}")


print("All ETFs processed and saved.")



--- Processing BIV (Vanguard Intermediate-Term Bond ETF) ---


NameError: name 'validate_ticker_exists' is not defined

In [None]:
etf_master_df = pd.concat(all_dfs)
etf_master_df.to_csv("ETFs/vanguard_etfs_combinedv1.1.3.csv")
print("Saved combined dataset.")
etf_master_df.head()


Saved combined dataset.


Unnamed: 0_level_0,open,high,low,close,volume,rsi,macd,sma50,ema20,atr,next_day_return,target,volatility,sharpe_ratio,max_drawdown,daily_sentiment,symbol,asset_class
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2023-06-20 04:00:00,403.49,404.32,401.26,403.38,4246881.0,69.667455,6.187024,383.988,394.373661,4.104436,-0.005479,0,0.008412,0.057968,-0.123317,0.069671,VOO,US Equity - Large Cap
2023-06-21 04:00:00,402.31,403.06,400.67,401.17,3423543.0,64.792494,5.953397,384.4842,395.020931,4.002323,0.003639,1,0.008412,0.057968,-0.123317,0.006174,VOO,US Equity - Large Cap
2023-06-22 04:00:00,400.28,402.72,399.95,402.63,3141021.0,66.46213,5.818978,385.0086,395.745604,3.912243,-0.007501,0,0.008412,0.057968,-0.123317,0.084769,VOO,US Equity - Large Cap
2023-06-23 04:00:00,399.33,401.28,398.93,399.61,4215307.0,60.111947,5.406439,385.5026,396.113642,3.896754,-0.004129,0,0.008412,0.057968,-0.123317,0.172303,VOO,US Equity - Large Cap
2023-06-26 04:00:00,399.04,400.86,397.73,397.96,3250700.0,56.912454,4.889989,385.8664,396.289486,3.840886,0.011082,1,0.008412,0.057968,-0.123317,-0.003415,VOO,US Equity - Large Cap
