In [1]:
# Cell 1 – Imports & data load
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller, kpss

# ── load the feature-ready parquet created in Phase 1 ──
DATA = Path("../data/processed/nasdaq_features.parquet")  # adjust if your path differs
df = pd.read_parquet(DATA)

df.head()        # quick sanity check


FileNotFoundError: [Errno 2] No such file or directory: '../data/processed/nasdaq_features.parquet'

In [None]:
# Cell 2 – Price level & log-returns
fig, ax = plt.subplots(2, 1, figsize=(12, 6), sharex=True)

df["Close"].plot(ax=ax[0], title="NASDAQ Close Price")
ax[0].set_ylabel("Price ($)")

df["log_ret"].plot(ax=ax[1], title="Daily Log Returns")
ax[1].set_ylabel("Log return")

plt.tight_layout()
plt.show()


In [None]:
# Cell 3 – ACF & PACF (log returns)
fig, ax = plt.subplots(2, 1, figsize=(10, 6))

plot_acf(df["log_ret"].dropna(), ax=ax[0], lags=50)
ax[0].set_title("ACF (log returns)")

plot_pacf(df["log_ret"].dropna(), ax=ax[1], lags=50, method="ywm")
ax[1].set_title("PACF (log returns)")

plt.tight_layout()
plt.show()


In [None]:
# Cell 4 – 252-day rolling variance of log returns
rolling_var = df["log_ret"].rolling(window=252).var()

fig, ax = plt.subplots(figsize=(12, 4))
rolling_var.plot(ax=ax, title="252-Day Rolling Variance (≈1 trading year)")
ax.set_ylabel("Variance")

plt.tight_layout()
plt.show()


In [None]:
# Cell 5 – Stationarity diagnostics
series = df["log_ret"].dropna()

adf_stat, adf_p, *_ = adfuller(series)
kpss_stat, kpss_p, *_ = kpss(series, regression="c", nlags="auto")

print(f"ADF statistic   : {adf_stat: .4f}  |  p-value = {adf_p: .4g}")
print(f"KPSS statistic  : {kpss_stat: .4f}  |  p-value = {kpss_p: .4g}\n")

if adf_p < 0.05 and kpss_p > 0.05:
    print("→ Likely stationary (reject unit-root, fail to reject stationarity).")
else:
    print("→ Non-stationary indication; consider differencing or transformation.")


In [None]:
# Cell 6 – Correlation heat-map: NASDAQ vs. macro indicators
#   - keep the set small enough to be readable; adjust list as desired
base_feats = ["Close", "log_ret", "pct_change", "sma_20", "rsi_14"]
macro_feats = [c for c in df.columns if c not in base_feats][:10]   # first 10 macro cols
corr = df[base_feats + macro_feats].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True)
plt.title("Correlation Matrix")
plt.tight_layout()
plt.show()
