In [1]:
import numpy as np
import pandas as pd

In [None]:
# Reproducibility
np.random.seed(42)

# Hyperparams
T = 2000  # time steps
N = 10   # number of assets
K = 3    # latent factors

# GARCH parameters (stationary values)
omega = 0.01
alpha = 0.1
beta = 0.85

# Generate latent factors (T x K)
F = np.random.randn(T, K)

# Generate factor loadings (N x K)
B = np.random.uniform(-1, 1, size=(N, K))

# Base linear signal
linear_signal = F @ B.T  # (T x N)

# === GARCH(1,1) noise generator ===
def garch_noise(T, N, omega=0.1, alpha=0.05, beta=0.9):
    z = np.random.randn(T, N)
    epsilon = np.zeros((T, N))
    sigma2 = np.ones((T, N)) * omega / (1 - alpha - beta)  # unconditional variance init

    for t in range(1, T):
        sigma2[t] = omega + alpha * epsilon[t-1]**2 + beta * sigma2[t-1]
        epsilon[t] = z[t] * np.sqrt(sigma2[t])
    
    return epsilon

# Generate GARCH noise
epsilon = garch_noise(T, N)

# Combine linear signal + GARCH noise
X = linear_signal + epsilon

# Normalize (z-score) each asset column
#X_mean = X.mean(axis=0)
#X_std = X.std(axis=0)
#X_normalized = (X - X_mean) / X_std

# Wrap in DataFrames
X_df = pd.DataFrame(X, columns=[f"Asset_{i+1}" for i in range(N)])
F_df = pd.DataFrame(F, columns=[f"Factor_{j+1}" for j in range(K)])
B_df = pd.DataFrame(B, columns=[f"Factor_{j+1}" for j in range(K)])
B_df.index = [f"Asset_{i+1}" for i in range(N)]

# Save to CSVs
X_df.to_csv("assetReturns_garch.csv", index=False)
F_df.to_csv("latentFactors_garch.csv", index=False)
B_df.to_csv("factorLoadings_garch.csv")

print("GARCH-like toy dataset saved")

GARCH-like toy dataset saved
