# 02 â€” Rolling Correlation Construction
Compute rolling correlations for stock pairs using a fixed window (e.g., 100 days).


In [None]:
import os
import numpy as np
import pandas as pd

RET_PATH = "../data/processed/returns.parquet"
rets = pd.read_parquet(RET_PATH)
rets.shape

In [None]:
# Define rolling correlation builder
def rolling_corr_series(rets_df, a, b, window=100):
    x = rets_df[a]
    y = rets_df[b]
    return x.rolling(window).corr(y).dropna()

def sample_pairs(columns, n_pairs=200, seed=42):
    rng = np.random.default_rng(seed)
    cols = np.array(columns)
    pairs = set()
    while len(pairs) < n_pairs:
        i, j = rng.choice(len(cols), size=2, replace=False)
        a, b = cols[i], cols[j]
        if a > b:
            a, b = b, a
        pairs.add((a, b))
    return list(pairs)

In [None]:
# Build correlations (start with sampled pairs)
WINDOW = 100
pairs = sample_pairs(rets.columns, n_pairs=200, seed=1)

corr_dict = {}
for (a, b) in pairs:
    s = rolling_corr_series(rets, a, b, window=WINDOW)
    corr_dict[f"{a}__{b}"] = s

corr_df = pd.DataFrame(corr_dict).dropna()
corr_df.shape

In [None]:
os.makedirs("../data/processed", exist_ok=True)
corr_df.to_parquet("../data/processed/rolling_corr_sample.parquet")