In [None]:
This notebook runs the full ingestion→cleaning→storage pipeline via our `ingest.py` script, then reads the merged data and produces key exploratory charts:

1. Price history  
2. Return distributions  
3. Return correlation heatmap  
4. Drawdown curve  

In [None]:
# Run the end‐to‐end pipeline
!python ingest.py \
  --symbols symbols.txt \
  --out-clean data/clean \
  --out-merged data/prices.parquet \
  --start 2015-01-01


In [None]:
import pandas as pd

# Read in the merged price table
prices = pd.read_parquet("data/prices.parquet")

# Quick sanity check
print("Shape:", prices.shape)
prices.head()


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
for sym in prices.columns:
    plt.plot(prices.index, prices[sym], label=sym)
plt.title("Price History")
plt.legend(loc="upper left")
plt.show()


In [None]:
returns = prices.pct_change().dropna()

import seaborn as sns

for sym in prices.columns:
    plt.figure(figsize=(6,4))
    sns.histplot(returns[sym], bins=50, kde=False)
    plt.title(f"{sym} Daily Return Distribution")
    plt.xlabel("Return")
    plt.show()


In [None]:
corr = returns.corr()

plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="vlag")
plt.title("Return Correlation Heatmap")
plt.show()


In [None]:
# Example: drawdown for the first symbol
sym0 = prices.columns[0]
cum = (1 + returns[sym0]).cumprod()
drawdown = cum / cum.cummax() - 1

plt.figure(figsize=(10,4))
plt.plot(drawdown.index, drawdown, label=sym0)
plt.title(f"{sym0} Drawdown Curve")
plt.ylabel("Drawdown")
plt.show()
