In [92]:
# imports 
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from scipy.linalg import orthogonal_procrustes
from sklearn.decomposition import PCA
import os
import random
from torch.utils.data import DataLoader

In [93]:
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [None]:
def compute_optimal_A_b_mu(X_np: np.ndarray, r: int, eps: float = 1e-8):
    # ---------- 1. mean & covariance ----------
    mu  = X_np.mean(axis=0)                         # (d,)
    X0  = X_np - mu
    T   = X_np.shape[0]
    S   = (X0.T @ X0) / (T - 1)                    # (d,d)

    try:
        K = np.linalg.cholesky(S + eps * np.eye(S.shape[0]))  # upper-triangular
    except np.linalg.LinAlgError:                             # still not D
        # fall back to eig-sqrt so the factor always exists
        eigvals, V = np.linalg.eigh(S)
        K = V @ np.diag(np.sqrt(np.maximum(eigvals, 0)))      # (d,d)

    U, _, _ = np.linalg.svd(K, full_matrices=False)           # U ∈ ℝ^{d×d}
    U_r   = U[:, :r]                                          # (d,r)

    A_map = U_r @ U_r.T                                       # (d,d)
    b     = (np.eye(S.shape[0]) - A_map) @ mu                 # (d,)

    return A_map.astype(np.float32), U_r.astype(np.float32), b.astype(np.float32), mu.astype(np.float32)


In [95]:
# python packaged affine autoencoder
"""A

class ClassicAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, bottleneck_dim):
        super().__init__()
        self.encoder = nn.Linear(input_dim, bottleneck_dim, bias=True)
        self.decoder = nn.Linear(bottleneck_dim, input_dim, bias=True)

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat
        """
class ClassicAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, r):
        super().__init__()
        self.input_dim = input_dim
        self.r = r
        
        # The projection matrix A (10x10)
        self.projection = nn.Linear(input_dim, input_dim, bias=True)
        
        # Decoder from r-dimensional space
        self.decoder = nn.Linear(r, input_dim, bias=True)
        
    def encoder(self, x):
        """Extract r-dimensional factors from projected data"""
        projected = self.projection(x)  # Apply A matrix
        # Take first r dimensions or use SVD to get factors
        return projected[:, :self.r]  # Simple approach: take first r dims

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat

In [96]:
class OptimalAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, r, A_map, U_r, b_img, mu):
        super().__init__()
        self.register_buffer("A_map", torch.tensor(A_map, dtype=torch.float32))   # (d,d)
        self.register_buffer("Ur",    torch.tensor(U_r,   dtype=torch.float32))   # (d,r)
        self.register_buffer("b_img", torch.tensor(b_img, dtype=torch.float32))   # (d,)  image‑space bias
        self.register_buffer("mu",    torch.tensor(mu,    dtype=torch.float32))   # (d,)

    # -------- encoder / decoder pair used for reconstruction --------
    def encoder(self, x):                      # x : (batch,d)
        return (x - self.mu) @ self.Ur         # (batch,r)   NO bias

    def decoder(self, z):                      # z : (batch,r)
        return z @ self.Ur.T + self.mu         # (batch,d)

    # full affine projector  x ↦ Âx + b  (rarely used here)
    def full_map(self, x):
        return x @ self.A_map + self.b_img

    # -------- forward pass required by nn.Module --------
    def forward(self, x):
        return self.decoder(self.encoder(x))


In [97]:
# NONLINEAR AUTOENCODER 
class NonlinearAutoencoder(nn.Module):
    def __init__(self, input_dim, bottleneck_dim, hidden_dim=5):
        super().__init__()
        
        # Encoder: input -> hidden -> hidden -> bottleneck
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, bottleneck_dim)
        )
        
        # Decoder: bottleneck -> hidden -> hidden -> output
        self.decoder = nn.Sequential(
            nn.Linear(bottleneck_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat
    

In [98]:
class OptimalTrainableAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, r, U_r_init, mu_init):
        super().__init__()
        self.Ur = nn.Parameter(torch.tensor(U_r_init, dtype=torch.float32))   # (d,r)
        self.mu = nn.Parameter(torch.tensor(mu_init, dtype=torch.float32))    # (d,)

    def encoder(self, x):
        return (x - self.mu) @ self.Ur

    def decoder(self, z):
        return z @ self.Ur.T + self.mu

    def forward(self, x):
        return self.decoder(self.encoder(x))

In [99]:
# 3. Modify train_autoencoder to accept loaders instead of raw tensor
def train_autoencoder(model, train_loader, val_loader, num_epochs, lr=1e-3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            recon = model(batch)
            loss = criterion(recon, batch)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item() * batch.size(0)

        avg_train_loss = total_train_loss / len(train_loader.dataset)
        train_losses.append(avg_train_loss)

        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)
                recon = model(batch)
                loss = criterion(recon, batch)
                total_val_loss += loss.item() * batch.size(0)

        avg_val_loss = total_val_loss / len(val_loader.dataset)
        val_losses.append(avg_val_loss)


    return model, train_losses, val_losses

In [100]:
# validation function only code for the optimal affine autoencoder
def valOnlyOptimalAffineAutoencoder(model, val_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    model.eval()
    total_loss = 0
    criterion = nn.MSELoss()

    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            recon = model(batch)
            loss = criterion(recon, batch)
            total_loss += loss.item() * batch.size(0)

    avg_loss = total_loss / len(val_loader.dataset)
    return avg_loss 

In [101]:
# %% -------------------------------------------------------------------- #
#  0.  DATA  &  THEORY‑CONSISTENT  OPTIMAL  PARAMETERS
# ----------------------------------------------------------------------- #
X_df      = pd.read_csv("assetReturns_ff3factor.csv")          # (T,d)
X_np      = X_df.to_numpy(dtype=np.float32)
X_tensor  = torch.tensor(X_np)

print(f"Data shape: {X_np.shape}")                         # (T, d)

input_dim = X_np.shape[1]
r         = 3                                              # bottleneck width

# --- new: returns projector A_map (d,d)  AND  frame U_r (d,r) ---
A_map, U_r, b, mu = compute_optimal_A_b_mu(X_np, r)

# %% -------------------------------------------------------------------- #
#  1.  RESULTS  DICT  (place‑holders kept for future variants)
# ----------------------------------------------------------------------- #
results = {
    'classic_mse':   [], 'optimal_mse':   [], 'pca_mse':      [],
    'classic_train_loss': [], 'classic_val_loss': [],
    'classic_factors':    [], 'optimal_factors':   [], 'pca_factors': []
}

# %% -------------------------------------------------------------------- #
#  2.  TRAIN / VAL  SPLIT  (chronological)
# ----------------------------------------------------------------------- #
n_samples  = X_tensor.shape[0]
train_size = int(0.8 * n_samples)

train_data = X_tensor[:train_size]
train_indices = np.arange(train_size)                      # <-- fixed
val_data   = X_tensor[train_size:]
val_indices = np.arange(train_size, n_samples)

# dates if you ever need them
val_dates = X_df.index[val_indices]

# %% -------------------------------------------------------------------- #
#  3.  DATALOADERS
# ----------------------------------------------------------------------- #
g = torch.Generator().manual_seed(seed)
batch_size  = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True,  generator=g)
val_loader   = DataLoader(val_data,   batch_size=batch_size, shuffle=False)

# %% -------------------------------------------------------------------- #
#  4.  MODEL INSTANTIATION
# ----------------------------------------------------------------------- #
model_classic = ClassicAffineAutoencoder(input_dim, r)
model_optimal = OptimalAffineAutoencoder(input_dim, r, A_map, U_r, b, mu)

# %% -------------------------------------------------------------------- #
#  5.  TRAIN / EVAL
# ----------------------------------------------------------------------- #
# ----- Classic AE -----
model_classic, losses_classic_train, losses_classic_val = train_autoencoder(
    model_classic, train_loader, val_loader, num_epochs=70, lr=1e-3)

# ----- Optimal analytic projector (no training) -----
loss_optimal_val = valOnlyOptimalAffineAutoencoder(model_optimal, val_loader)

# store scalar metrics
results['classic_mse'].append(min(losses_classic_val))
results['optimal_mse'].append(loss_optimal_val)
results['classic_train_loss'].append(losses_classic_train[-1])
results['classic_val_loss'].append(losses_classic_val[-1])

# keep whole histories (optional diagnostics)
results.setdefault('classic_train_histories', []).append(losses_classic_train)
results.setdefault('classic_val_histories',   []).append(losses_classic_val)

# %% -------------------------------------------------------------------- #
#  6.  PCA  BASELINE
# ----------------------------------------------------------------------- #
X_train_np = train_data.numpy()
X_val_np   = val_data.numpy()

pca = PCA(n_components=r).fit(X_train_np)
X_val_recon = pca.inverse_transform(pca.transform(X_val_np))
mse_pca     = np.mean((X_val_np - X_val_recon) ** 2)
print(f"PCA baseline MSE: {mse_pca:.6f}")

# align PCA factors to true factors via Procrustes
F_true_full   = pd.read_csv("latentFactors_ff3factor.csv").to_numpy(dtype=np.float32)
F_true_tensor = torch.tensor(F_true_full[val_indices])
F_val         = F_true_tensor.numpy()

Z_pca         = pca.transform(X_val_np)
R_pca, _      = orthogonal_procrustes(Z_pca, F_val)
Z_pca_aligned = Z_pca @ R_pca
corr_pca      = np.abs([np.corrcoef(Z_pca_aligned[:, i], F_val[:, i])[0, 1]
                        for i in range(r)])

# store PCA stats
results['pca_mse'].append(mse_pca)
results['pca_factors'].append(corr_pca)

# %% -------------------------------------------------------------------- #
#  7.  ALIGNMENT HELPER  &  FACTOR  CORRELATIONS
# ----------------------------------------------------------------------- #
# ------------------------------------------------------------------ #
#  Drop‑in replacement for aligned_corr
# ------------------------------------------------------------------ #
def aligned_corr(model, X_val, F_true_val):
    """
    Align latent factors from `model` to F_true_val (Procrustes) and
    return absolute per‑factor correlations.
    Works even if the model has no nn.Parameters (buffers only).
    """
    # --- find the device of the model ---
    try:                                  # usual case: at least one Parameter
        dev = next(model.parameters()).device
    except StopIteration:                 # parameter‑less model (buffers only)
        dev = next(model.buffers()).device

    X_val = X_val.to(dev)

    with torch.no_grad():
        Z = model.encoder(X_val).cpu().numpy()
        F = F_true_val.cpu().numpy()

    R, _ = orthogonal_procrustes(Z, F)
    Z_aligned = Z @ R
    return np.abs([
        np.corrcoef(Z_aligned[:, i], F[:, i])[0, 1] for i in range(F.shape[1])
    ])


val_tensor = X_tensor[val_indices]                         # still CPU

results['classic_factors'].append(
    aligned_corr(model_classic, val_tensor, F_true_tensor))
results['optimal_factors'].append(
    aligned_corr(model_optimal, val_tensor, F_true_tensor))

# %% -------------------------------------------------------------------- #
#  8.  REPORT UTILITY
# ----------------------------------------------------------------------- #
def print_results_summary(res):
    # ---------- MSE ----------
    classic_mse = np.array(res['classic_mse'])
    optimal_mse = np.array(res['optimal_mse'])
    pca_mse     = np.array(res['pca_mse'])

    print("\nVALIDATION MSE")
    print(f"  Classic AE :  {classic_mse.mean():.8f}")
    print(f"  Optimal AE :  {optimal_mse.mean():.8f}")
    print(f"  PCA        :  {pca_mse.mean():.8f}")

    # ---------- train / val losses ----------
    tr_final = np.array(res['classic_train_loss'])
    va_final = np.array(res['classic_val_loss'])
    print("\nFINAL CLASSIC LOSSES")
    print(f"  Train: {tr_final.mean():.8f} | Val: {va_final.mean():.8f}")

    # ---------- factor correlations ----------
    classic_f  = np.array(res['classic_factors'])
    optimal_f  = np.array(res['optimal_factors'])
    pca_f      = np.array(res['pca_factors'])

    print("\nFACTOR RECOVERY")
    print(f"  Classic mean | std : {classic_f.mean():.4f} | {classic_f.std():.4f}")
    print(f"  Optimal mean | std : {optimal_f.mean():.4f} | {optimal_f.std():.4f}")
    print(f"  PCA     mean | std : {pca_f.mean():.4f} | {pca_f.std():.4f}")

    print(f"\nPer‑factor means:")
    header = f"{'Factor':<6}{'Classic':>10}{'Optimal':>10}{'PCA':>10}"
    print(header + "\n" + "-" * len(header))
    for i in range(r):
        print(f"{i+1:<6}{classic_f[:, i].mean():>10.4f}"
                    f"{optimal_f[:, i].mean():>10.4f}"
                    f"{pca_f[:, i].mean():>10.4f}")

    # ---------- summary table ----------
    summary_df = pd.DataFrame({
        'Model': ['Classic', 'Optimal', 'PCA'],
        'MSE_Mean': [classic_mse.mean(), optimal_mse.mean(), pca_mse.mean()],
        'Factor_Corr_Mean': [classic_f.mean(), optimal_f.mean(), pca_f.mean()]
    })
    print("\nSUMMARY")
    print(summary_df.round(6))

# %% -------------------------------------------------------------------- #
print_results_summary(results)


Data shape: (2000, 10)
PCA baseline MSE: 0.000016

VALIDATION MSE
  Classic AE :  0.00002010
  Optimal AE :  0.00001494
  PCA        :  0.00001562

FINAL CLASSIC LOSSES
  Train: 0.00001980 | Val: 0.00002010

FACTOR RECOVERY
  Classic mean | std : 0.2224 | 0.0425
  Optimal mean | std : 0.3555 | 0.3188
  PCA     mean | std : 0.3185 | 0.3446

Per‑factor means:
Factor   Classic   Optimal       PCA
------------------------------------
1         0.2821    0.8052    0.8057
2         0.1862    0.1584    0.0886
3         0.1988    0.1030    0.0613

SUMMARY
     Model  MSE_Mean  Factor_Corr_Mean
0  Classic  0.000020          0.222354
1  Optimal  0.000015          0.355529
2      PCA  0.000016          0.318534


In [102]:
# ------------------------------------------------------------------------- #
#  REPRODUCIBLE SEEDS
# ------------------------------------------------------------------------- #
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


# ------------------------------------------------------------------------- #
#  DATA SHAPES & DEVICE
# ------------------------------------------------------------------------- #
X_val   = val_data.numpy()            # only for external use later
r       = 3                           # number of latent factors
device  = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ------------------------------------------------------------------------- #
#  RESULTS CONTAINERS
# ------------------------------------------------------------------------- #
results = {
    'linear_train_mse':   [], 'linear_val_mse':   [],
    'nonlinear_train_mse':[], 'nonlinear_val_mse':[],
    'classic_factors':    [], 'nonlinear_factors':[],
    'classic_analysis':   [], 'nonlinear_analysis':[]
}

# ------------------------------------------------------------------------- #
#  CONSTANT VALIDATION TENSORS
# ------------------------------------------------------------------------- #
val_tensor        = X_tensor[val_indices]          # CPU master copy
val_tensor_device = val_tensor.to(device)          # GPU copy for factor grab

# ------------------------------------------------------------------------- #
#  HELPER: ALIGN & CORRELATE  (robust to param‑less models)
# ------------------------------------------------------------------------- #
def aligned_corr(model, X_val, F_true_val):
    """Align latent factors from `model` to true factors and return abs corr."""
    try:                     # get model device from parameters if present
        dev = next(model.parameters()).device
    except StopIteration:    # fall back to buffers (e.g. analytic models)
        dev = next(model.buffers()).device

    X_val = X_val.to(dev)
    with torch.no_grad():
        Z = model.encoder(X_val).cpu().numpy()
        F = F_true_val.cpu().numpy()

    R, _ = orthogonal_procrustes(Z, F)
    Z_aligned = Z @ R
    return np.abs([
        np.corrcoef(Z_aligned[:, i], F[:, i])[0, 1] for i in range(F.shape[1])
    ])


# ------------------------------------------------------------------------- #
#  MONTE‑CARLO RUNS
# ------------------------------------------------------------------------- #
for run in range(100):
    seed = 42 + run
    set_seed(seed)

    if (run + 1) % 10 == 0:
        print(f"Completed run {run + 1}/100")

    # ---------- models ----------
    modellinear    = ClassicAffineAutoencoder(input_dim, r).to(device)
    modelnonlinear = NonlinearAutoencoder(input_dim, r).to(device)

    modellinear,   train_losslinear,   val_losslinear   = train_autoencoder(
        modellinear,   train_loader, val_loader, num_epochs=150, lr=1e-3)
    modelnonlinear,train_lossnonlinear,val_lossnonlinear= train_autoencoder(
        modelnonlinear,train_loader, val_loader, num_epochs=100, lr=1e-3)

    # ---------- store MSE ----------
    results['linear_train_mse'].append(train_losslinear[-1])
    results['linear_val_mse']  .append(val_losslinear[-1])
    results['nonlinear_train_mse'].append(train_lossnonlinear[-1])
    results['nonlinear_val_mse']  .append(val_lossnonlinear[-1])

    # ---------- factors on GPU, then back to CPU ----------
    classic_factors   = modellinear   .encoder(val_tensor_device) \
                                     .detach().cpu().numpy()
    nonlinear_factors = modelnonlinear.encoder(val_tensor_device) \
                                     .detach().cpu().numpy()

    results['classic_factors'].append(classic_factors)
    results['nonlinear_factors'].append(nonlinear_factors)

    # ---------- correlations ----------
    results['classic_analysis'].append(
        aligned_corr(modellinear,   val_tensor, F_true_tensor))
    results['nonlinear_analysis'].append(
        aligned_corr(modelnonlinear,val_tensor, F_true_tensor))

# ------------------------------------------------------------------------- #
#  SUMMARY  (MSE + FACTOR CORR)
# ------------------------------------------------------------------------- #
print("\n" + "="*60 + "\nRESULTS SUMMARY\n" + "="*60)

print("\nMSE RESULTS:")
print(f"  Linear   Train: {np.mean(results['linear_train_mse']):.8f} ± "
      f"{np.std(results['linear_train_mse']):.8f}")
print(f"  Linear   Val  : {np.mean(results['linear_val_mse']):.8f} ± "
      f"{np.std(results['linear_val_mse']):.8f}")
print(f"  Nonlinear Train: {np.mean(results['nonlinear_train_mse']):.8f} ± "
      f"{np.std(results['nonlinear_train_mse']):.8f}")
print(f"  Nonlinear Val  : {np.mean(results['nonlinear_val_mse']):.8f} ± "
      f"{np.std(results['nonlinear_val_mse']):.8f}")

def print_results_summary(res):
    print("\nFACTOR RECOVERY ANALYSIS")

    classic_corr   = np.array(res['classic_analysis'])     # (runs,r)
    nonlinear_corr = np.array(res['nonlinear_analysis'])   # (runs,r)

    print(f"\nOverall correlations:")
    print(f"  Classic  : {classic_corr.mean():.4f} ± {classic_corr.std():.4f}")
    print(f"  Nonlinear: {nonlinear_corr.mean():.4f} ± "
          f"{nonlinear_corr.std():.4f}")

    print("\nPer‑factor means:")
    header = f"{'Factor':<8}{'Classic':>12}{'Nonlinear':>12}"
    print(header + "\n" + "-" * len(header))
    for i in range(classic_corr.shape[1]):
        print(f"{i+1:<8}{classic_corr[:, i].mean():>12.4f}"
                    f"{nonlinear_corr[:, i].mean():>12.4f}")

print_results_summary(results)


  c /= stddev[:, None]
  c /= stddev[None, :]


Completed run 10/100
Completed run 20/100
Completed run 30/100
Completed run 40/100
Completed run 50/100
Completed run 60/100
Completed run 70/100
Completed run 80/100
Completed run 90/100
Completed run 100/100

RESULTS SUMMARY

MSE RESULTS:
  Linear   Train: 0.00001887 ± 0.00000100
  Linear   Val  : 0.00001960 ± 0.00000120
  Nonlinear Train: 0.00002541 ± 0.00000162
  Nonlinear Val  : 0.00002642 ± 0.00000181

FACTOR RECOVERY ANALYSIS

Overall correlations:
  Classic  : 0.1944 ± 0.1892
  Nonlinear: nan ± nan

Per‑factor means:
Factor       Classic   Nonlinear
--------------------------------
1             0.3723         nan
2             0.1263         nan
3             0.0845         nan
