In [1]:
# imports 
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from scipy.linalg import orthogonal_procrustes
from sklearn.decomposition import PCA
import os
import random
from torch.utils.data import DataLoader

In [2]:
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [3]:
def compute_optimal_A_b_mu(X_np, r):
    mu = np.mean(X_np, axis=0)
    cov = (X_np - mu).T @ (X_np - mu)
    U, S, _ = np.linalg.svd(cov)
    Ur = U[:, :r]  # shape (d, r)
    
    A = Ur @ Ur.T  # encoder projects from d->r: (r, d)
    b = np.zeros(r)  # no bias needed in latent space
    
    return A, b, mu

In [4]:
# python packaged affine autoencoder
"""A

class ClassicAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, bottleneck_dim):
        super().__init__()
        self.encoder = nn.Linear(input_dim, bottleneck_dim, bias=True)
        self.decoder = nn.Linear(bottleneck_dim, input_dim, bias=True)

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat
        """
class ClassicAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, r):
        super().__init__()
        self.input_dim = input_dim
        self.r = r
        
        # The projection matrix A (10x10)
        self.projection = nn.Linear(input_dim, input_dim, bias=True)
        
        # Decoder from r-dimensional space
        self.decoder = nn.Linear(r, input_dim, bias=True)
        
    def encoder(self, x):
        """Extract r-dimensional factors from projected data"""
        projected = self.projection(x)  # Apply A matrix
        # Take first r dimensions or use SVD to get factors
        return projected[:, :self.r]  # Simple approach: take first r dims

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat

In [5]:
class OptimalAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, r, A, b, mu):
        super().__init__()
        self.A = torch.tensor(A, dtype=torch.float32)
        self.b = torch.tensor(b, dtype=torch.float32)
        self.mu = torch.tensor(mu, dtype=torch.float32)

    def encoder(self, x):
        # Return centered projected latent space: Ur^T(x - mu)
        x_centered = x - self.mu  # center by mean
        UrT = self.A  # A = Ur Ur^T ⇒ A.T = Ur^T Ur ⇒ encoder ~ Ur^T
        #return x_centered @ UrT  # [batch_size, d] x [d, r] → [batch_size, r]
        return x_centered @ UrT  # i changed this switched the order
    def decoder(self, z):
        # Reconstruct from latent space: Ur * z + mu
        return z @ self.A + self.mu # [batch_size, r] x [r, d] → [batch_size, d]

    def forward(self, x):
        # Complete encode-decode cycle
        z = self.encoder(x)
    
        return self.decoder(z)

In [6]:
# NONLINEAR AUTOENCODER 
class NonlinearAutoencoder(nn.Module):
    def __init__(self, input_dim, bottleneck_dim, hidden_dim=5):
        super().__init__()
        
        # Encoder: input -> hidden -> hidden -> bottleneck
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, bottleneck_dim)
        )
        
        # Decoder: bottleneck -> hidden -> hidden -> output
        self.decoder = nn.Sequential(
            nn.Linear(bottleneck_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat
    

In [7]:
# this is a combination modeel that allows for training after instantiation of optimal theoretical weights. 
class OptimalTrainableAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, r, A_init, b_init, mu_init):
        super().__init__()
        self.A = nn.Parameter(torch.tensor(A_init, dtype=torch.float32))  # (r, d)
        self.b = nn.Parameter(torch.tensor(b_init, dtype=torch.float32))  # (r,)
        self.mu = nn.Parameter(torch.tensor(mu_init, dtype=torch.float32))  # (d,)

    def encoder(self, x):
        x_centered = x - self.mu  # (batch, d)
        return x_centered @ self.A + self.b  # (batch, r)

    def decoder(self, z):
        return z @ self.A + self.mu  # (batch, d) I changed this too

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)


In [8]:
# 3. Modify train_autoencoder to accept loaders instead of raw tensor
def train_autoencoder(model, train_loader, val_loader, num_epochs, lr=1e-3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            recon = model(batch)
            loss = criterion(recon, batch)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item() * batch.size(0)

        avg_train_loss = total_train_loss / len(train_loader.dataset)
        train_losses.append(avg_train_loss)

        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)
                recon = model(batch)
                loss = criterion(recon, batch)
                total_val_loss += loss.item() * batch.size(0)

        avg_val_loss = total_val_loss / len(val_loader.dataset)
        val_losses.append(avg_val_loss)


    return model, train_losses, val_losses

In [9]:
# validation function only code for the optimal affine autoencoder
def valOnlyOptimalAffineAutoencoder(model, val_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    model.eval()
    total_loss = 0
    criterion = nn.MSELoss()

    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            recon = model(batch)
            loss = criterion(recon, batch)
            total_loss += loss.item() * batch.size(0)

    avg_loss = total_loss / len(val_loader.dataset)
    return avg_loss 

In [10]:
# get data ready
X_df = pd.read_csv("assetReturns_garch.csv")
X_np = X_df.to_numpy().astype(np.float32)
X_tensor = torch.tensor(X_np)

print(f"Data shape: {X_np.shape}")

# set dims and latent space size 
input_dim = X_np.shape[1]
r = 3  # bottleneck dimension

# compute the optimal params (only once, outside the loop)
A, b, mu = compute_optimal_A_b_mu(X_np, r)

# Initialize results stoage

results = {
    'classic_mse': [],
    'optimal_mse': [],
    'trainable_mse': [],
    'nonlinear_mse': [],    
    'classic_train_loss': [],
    'classic_val_loss': [],
    'trainable_train_loss': [],
    'trainable_val_loss': [],
    'nonlinear_train_loss': [],
    'nonlinear_val_loss': [],
    'classic_factors': [],
    'optimal_factors': [],
    'trainable_factors': [],
    'nonlinear_factors': [],    
    'classic_train_histories': [],
    'trainable_train_histories': [],
    'classic_val_histories': [],
    'trainable_val_histories': [],
    'nonlinear_train_histories': [],
    'nonlinear_val_histories': []
}
  
    # split data into training and validation with current seed

n_samples = X_tensor.shape[0]
train_size = int(0.8 * n_samples)
val_size = n_samples - train_size

# Slice chronologically
train_data = X_tensor[:train_size]
train_indices = train_data.indices
val_data = X_tensor[train_size:]
val_indices = np.arange(train_size, len(X_np))
val_dates = X_df.index[val_indices]

# create the data loaders
g = torch.Generator()
g.manual_seed(seed)
batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
    
    # initialize models for this run
model_classic = ClassicAffineAutoencoder(input_dim, r)
model_optimal = OptimalAffineAutoencoder(input_dim, r, A, b, mu)
#model_optimal_trainable = OptimalTrainableAffineAutoencoder(input_dim, r, A, b, mu)
#model_nonlinear = NonlinearAutoencoder(input_dim, r, hidden_dim=128)
    
    # Train classic model
model_classic, losses_classic_train, losses_classic_val = train_autoencoder(
    model_classic, 
    train_loader,
    val_loader,
    70,
    0.001
    )
    
    # Validation only on optimal model
losses_optimal_val = valOnlyOptimalAffineAutoencoder(model_optimal, val_loader)
    
    # Train optimal trainable model
"""
    model_optimal_trainable, losses_trainable_train, losses_trainable_val = train_autoencoder(
    model_optimal_trainable,
    train_loader,
    val_loader,
    20,
    0.001
)
"""

"""
model_nonlinear, losses_nonlinear_train, losses_nonlinear_val = train_autoencoder(
    model_nonlinear,
    train_loader,
    val_loader,
    70,
    0.001
)
"""
    
    # Store results for this run
results['classic_mse'].append(min(losses_classic_val))
results['optimal_mse'].append(losses_optimal_val)
#results['trainable_mse'].append(min(losses_trainable_val))
#results['nonlinear_mse'].append(min(losses_nonlinear_val))

results['classic_train_loss'].append(losses_classic_train[-1])
results['classic_val_loss'].append(losses_classic_val[-1])
#results['trainable_train_loss'].append(losses_trainable_train[-1])
#results['trainable_val_loss'].append(losses_trainable_val[-1])
#results['nonlinear_train_loss'].append(losses_nonlinear_train[-1])
#results['nonlinear_val_loss'].append(losses_nonlinear_val[-1])

    # Store full training histories
results['classic_train_histories'].append(losses_classic_train)
results['classic_val_histories'].append(losses_classic_val)
#results['trainable_train_histories'].append(losses_trainable_train)
#results['trainable_val_histories'].append(losses_trainable_val)
#results['nonlinear_train_histories'].append(losses_nonlinear_train)
#results['nonlinear_val_histories'].append(losses_nonlinear_val)

    # Convert to NumPy arrays
X_train_np = train_data.numpy()
X_val_np = val_data.numpy()

    # Load ground truth latent factors (full set) and slice val rows
F_true_full = pd.read_csv("latentFactors_garch.csv").to_numpy().astype(np.float32)
F_true_tensor = torch.tensor(F_true_full[val_indices])

    # --------------------------------------------
    # PCA BASELINE: Fit on training data, evaluate on val
    # --------------------------------------------

    # Fit PCA on training data
pca = PCA(n_components=r)
pca.fit(X_train_np)

    # Reconstruct validation data
X_val_recon = pca.inverse_transform(pca.transform(X_val_np))

    # Compute MSE on validation set
pca_mse = np.mean((X_val_np - X_val_recon) ** 2)
print(f"PCA baseline MSE: {pca_mse:.6f}")
    
    # Transform validation data
Z_pca = pca.transform(X_val_np)

    # Align to true latent factors using Procrustes
F = F_true_tensor.numpy()
R_pca, _ = orthogonal_procrustes(Z_pca, F)
Z_pca_aligned = Z_pca @ R_pca

    # Compute correlation per factor
corr_pca = np.abs([
        np.corrcoef(Z_pca_aligned[:, i], F[:, i])[0, 1]
        for i in range(F.shape[1])
])
    
    # Compute reconstruction MSE on validation data
X_val_reconstructed = pca.inverse_transform(Z_pca)
mse_pca = np.mean((X_val_np - X_val_reconstructed) ** 2)

    # Store results
results.setdefault('pca_mse', []).append(mse_pca)
results.setdefault('pca_factors', []).append(corr_pca)

 # FACTOR ANALYSIS: Procrustes-aligned correlations
  
    # Get validation data as tensor
val_tensor = X_tensor[val_indices]

    # Helper function to align & compute correlation
def aligned_corr(model, X_val, F_true_val):
    with torch.no_grad():
        Z = model.encoder(X_val).cpu().numpy()
        F = F_true_val.cpu().numpy()
    R, _ = orthogonal_procrustes(Z, F)
    Z_aligned = Z @ R
    return np.abs([
        np.corrcoef(Z_aligned[:, i], F[:, i])[0, 1]
        for i in range(F.shape[1])
    ])

    # Compute and store factor correlations
results['classic_factors'].append(
    aligned_corr(model_classic, val_tensor, F_true_tensor)
)
results['optimal_factors'].append(
    aligned_corr(model_optimal, val_tensor, F_true_tensor)
)
#results['trainable_factors'].append(
    #aligned_corr(model_optimal_trainable, val_tensor, F_true_tensor)
#)
#results['nonlinear_factors'].append(
    #aligned_corr(model_nonlinear, val_tensor, F_true_tensor)
#)

def print_results_summary(results):
    # MSE Results (Validation Loss)
    print("\n VALIDATION MSE RESULTS")
    
    classic_mse = np.array(results['classic_mse'])
    optimal_mse = np.array(results['optimal_mse'])
    #trainable_mse = np.array(results['trainable_mse'])
    #nonlinear_mse = np.array(results['nonlinear_mse'])  # NEW
    pca_mse = np.array(results['pca_mse'])  # NEW

    print(f"Classic Autoencoder:")
    print(f"  Mean MSE: {classic_mse.mean():.8f} ")
    print(f"  Median:   {np.median(classic_mse):.8f}")
    print(f"  Min/Max:  {classic_mse.min():.8f}  ")
    
    print(f"\nOptimal Autoencoder:")
    print(f"  Mean MSE: {optimal_mse.mean():.8f} ")
    print(f"  Median:   {np.median(optimal_mse):.8f}")
    print(f"  Min/Max:  {optimal_mse.min():.8f}  {optimal_mse.max():.8f}")
    
    #print(f"\nTrainable Optimal Autoencoder:")
    #print(f"  Mean MSE: {trainable_mse.mean():.8f} ")
    #print(f"  Median:   {np.median(trainable_mse):.8f}")
    #print(f"  Min/Max:  {trainable_mse.min():.8f}  {trainable_mse.max():.8f}")

    #print(f"\nNonlinear Autoencoder:")
    #print(f"  Mean MSE: {nonlinear_mse.mean():.8f} ")
    #print(f"  Median:   {np.median(nonlinear_mse):.8f}")
    #print(f"  Min/Max:  {nonlinear_mse.min():.8f}  {nonlinear_mse.max():.8f}")

    print(f"\nPCA Baseline:")
    print(f"  Mean MSE: {pca_mse.mean():.8f} ")
    print(f"  Median:   {np.median(pca_mse):.8f}")
    print(f"  Min/Max:  {pca_mse.min():.8f}  {pca_mse.max():.8f}")
    
    # Final Training/Validation Losses
    print(f"\n FINAL TRAINING/VALIDATION LOSSES")
    
    classic_train_final = np.array(results['classic_train_loss'])
    classic_val_final = np.array(results['classic_val_loss'])
    #trainable_train_final = np.array(results['trainable_train_loss'])
    #trainable_val_final = np.array(results['trainable_val_loss'])
    #nonlinear_train_final = np.array(results['nonlinear_train_loss'])  # NEW
    #nonlinear_val_final = np.array(results['nonlinear_val_loss'])  # NEW            
    
    print(f"Classic Autoencoder:")
    print(f"  Final Train Loss: {classic_train_final.mean():.8f} ")
    print(f"  Final Val Loss:   {classic_val_final.mean():.8f} ")
    print(f"  Train/Val Ratio:  {(classic_train_final.mean()/classic_val_final.mean()):.4f}")
    
    #print(f"\nTrainable Optimal Autoencoder:")
    #print(f"  Final Train Loss: {trainable_train_final.mean():.8f} ")
    #print(f"  Final Val Loss:   {trainable_val_final.mean():.8f} ")
    #print(f"  Train/Val Ratio:  {(trainable_train_final.mean()/trainable_val_final.mean()):.4f}")

    #print(f"\nNonlinear Autoencoder:")
    #print(f"  Final Train Loss: {nonlinear_train_final.mean():.8f} ")
    #print(f"  Final Val Loss:   {nonlinear_val_final.mean():.8f} ")
    #print(f"  Train/Val Ratio:  {(nonlinear_train_final.mean()/nonlinear_val_final.mean()):.4f}")       
    
    # Factor Analysis Results
    print(f"\n FACTOR RECOVERY ANALYSIS")

    classic_factors = np.array(results['classic_factors'])
    optimal_factors = np.array(results['optimal_factors'])
   #trainable_factors = np.array(results['trainable_factors'])
    #nonlinear_factors = np.array(results['nonlinear_factors'])  # NEW
    pca_factors = np.array(results['pca_factors'])  # NEW
    
    print(f"Factor Correlations:")
    print(f"  Classic:           {classic_factors.mean():.4f}")
    print(f"  Optimal:           {optimal_factors.mean():.4f} ")
    #print(f"  Trainable Optimal: {trainable_factors.mean():.4f} ")
    #print(f"  Nonlinear:        {nonlinear_factors.mean():.4f} ")
    print(f"  PCA:               {pca_factors.mean():.4f} ")
    
    print(f"\nPer-Factor Correlations:")
    print(f"{'Factor':<8} {'Classic':<10} {'Optimal':<10} {'Trainable':<10} {'PCA':<10}")
    
    for i in range(classic_factors.shape[1]):
        factor_classic = classic_factors[:, i].mean()
        factor_optimal = optimal_factors[:, i].mean()
        #factor_trainable = trainable_factors[:, i].mean()
        factor_pca = pca_factors[:, i].mean()
        #factor_nonlinear = nonlinear_factors[:, i].mean()  # NEW
        #print(f"{i+1:<8} {factor_classic:<10.4f} {factor_optimal:<10.4f} {factor_trainable:<10.4f} {factor_pca:<10.4f} {factor_nonlinear:<10.4f}")

    # Summary Table
    print(f"\n SUMMARY TABLE")
    
    summary_df = pd.DataFrame({
        'Model': ['Classic', 'Optimal', 'Trainable', 'PCA', 'Nonlinear'],
        'MSE_Mean': [
            classic_mse.mean(), 
            optimal_mse.mean(), 
            #trainable_mse.mean(), 
            pca_mse.mean(),
            #nonlinear_mse.mean()  # NEW
        ],

        'Factor_Corr_Mean': [
            classic_factors.mean(), 
            optimal_factors.mean(), 
            #trainable_factors.mean(),
            pca_factors.mean(),
            #nonlinear_factors.mean()
        ]
    })
    
    print(summary_df.round(6))
print_results_summary(results)  

FileNotFoundError: [Errno 2] No such file or directory: 'assetReturns_garch.csv'

In [None]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

X_val = val_data.numpy()
r = 3  # number of latent factors
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

results = {
    'linear_train_mse': [],
    'linear_val_mse': [],
    'nonlinear_train_mse': [],
    'nonlinear_val_mse': [],
    'classic_factors': [],
    'nonlinear_factors': [],
    'classic_analysis': [],
    'nonlinear_analysis': []
}

# Get validation data as tensor (move this before the loop)
val_tensor = X_tensor[val_indices]

# Helper function to align & compute correlation
def aligned_corr(model, X_val, F_true_val):
    with torch.no_grad():
        Z = model.encoder(X_val).cpu().numpy()
        F = F_true_val.cpu().numpy()
    R, _ = orthogonal_procrustes(Z, F)
    Z_aligned = Z @ R
    return np.abs([
        np.corrcoef(Z_aligned[:, i], F[:, i])[0, 1]
        for i in range(F.shape[1])
    ])

# Run 100 iterations
for run in range(100):
    seed = 42 + run  # Different seed for each run
    set_seed(seed)
    
    # Print progress every 10 runs
    if (run + 1) % 10 == 0:
        print(f"Completed run {run + 1}/100")
    
    # Init model and train model
    modellinear = ClassicAffineAutoencoder(input_dim, r).to(device)
    modelnonlinear = NonlinearAutoencoder(input_dim, r).to(device)
    
    modellinear, train_losslinear, val_losslinear = train_autoencoder(
        modellinear, train_loader, val_loader, num_epochs=150, lr=0.001)
    modelnonlinear, train_lossnonlinear, val_lossnonlinear = train_autoencoder(
        modelnonlinear, train_loader, val_loader, num_epochs=100, lr=0.001)
    
    # Store final MSE values (assuming losses are MSE)
    results['linear_train_mse'].append(train_losslinear[-1])  # Final training MSE
    results['linear_val_mse'].append(val_losslinear[-1])      # Final validation MSE
    results['nonlinear_train_mse'].append(train_lossnonlinear[-1])  # Final training MSE
    results['nonlinear_val_mse'].append(val_lossnonlinear[-1])      # Final validation MSE
    
    # Get factors for this run
    classic_factors = modellinear.encoder(val_data).detach().cpu().numpy()
    nonlinear_factors = modelnonlinear.encoder(val_data).detach().cpu().numpy()
    
    # Store factors for this run
    results['classic_factors'].append(classic_factors)
    results['nonlinear_factors'].append(nonlinear_factors)
    
    # Compute and store factor correlations (MOVED INSIDE THE LOOP)
    results['classic_analysis'].append(
        aligned_corr(modellinear, val_tensor, F_true_tensor)
    )
    results['nonlinear_analysis'].append(
        aligned_corr(modelnonlinear, val_tensor, F_true_tensor)
    )

# After all runs are complete, compute summary statistics
print("\n" + "="*60)
print("RESULTS SUMMARY")
print("="*60)

# Print MSE results
print("\nMSE RESULTS:")
print(f"  Linear Train MSE:    {np.mean(results['linear_train_mse']):.8f} ± {np.std(results['linear_train_mse']):.8f}")
print(f"  Linear Val MSE:      {np.mean(results['linear_val_mse']):.8f} ± {np.std(results['linear_val_mse']):.8f}")
print(f"  Nonlinear Train MSE: {np.mean(results['nonlinear_train_mse']):.8f} ± {np.std(results['nonlinear_train_mse']):.8f}")
print(f"  Nonlinear Val MSE:   {np.mean(results['nonlinear_val_mse']):.8f} ± {np.std(results['nonlinear_val_mse']):.8f}")

def print_results_summary(results):
    # Factor Analysis Results
    print(f"\nFACTOR RECOVERY ANALYSIS")
    
    # Use the correct keys - these contain correlation results
    classic_factors = np.array(results['classic_analysis'])  # Shape: (100, 3)
    nonlinear_factors = np.array(results['nonlinear_analysis'])  # Shape: (100, 3)
    
    print(f"\nOverall Factor Correlations (mean across all factors and runs):")
    print(f"  Classic:    {classic_factors.mean():.4f} ± {classic_factors.std():.4f}")
    print(f"  Nonlinear:  {nonlinear_factors.mean():.4f} ± {nonlinear_factors.std():.4f}")
    
    print(f"\nPer-Factor Correlations (mean across runs):")
    print(f"{'Factor':<8} {'Classic':<12} {'Nonlinear':<12}")
    print("-" * 35)
    
    for i in range(classic_factors.shape[1]):
        factor_classic_mean = classic_factors[:, i].mean()
        factor_classic_std = classic_factors[:, i].std()
        factor_nonlinear_mean = nonlinear_factors[:, i].mean()
        factor_nonlinear_std = nonlinear_factors[:, i].std()
        print(f"{i+1:<8} {factor_classic_mean:.4f}±{factor_classic_std:.4f}   {factor_nonlinear_mean:.4f}±{factor_nonlinear_std:.4f}")

# Call the function
print_results_summary(results)

  c /= stddev[:, None]
  c /= stddev[None, :]


Completed run 10/100
Completed run 20/100
Completed run 30/100
Completed run 40/100
Completed run 50/100
Completed run 60/100
Completed run 70/100
Completed run 80/100
Completed run 90/100
Completed run 100/100

RESULTS SUMMARY

MSE RESULTS:
  Linear Train MSE:    0.00001867 ± 0.00000089
  Linear Val MSE:      0.00001944 ± 0.00000109
  Nonlinear Train MSE: 0.00002553 ± 0.00000157
  Nonlinear Val MSE:   0.00002658 ± 0.00000172

FACTOR RECOVERY ANALYSIS

Overall Factor Correlations (mean across all factors and runs):
  Classic:    0.2052 ± 0.1830
  Nonlinear:  nan ± nan

Per-Factor Correlations (mean across runs):
Factor   Classic      Nonlinear   
-----------------------------------
1        0.3908±0.2062   nan±nan
2        0.1299±0.0517   nan±nan
3        0.0950±0.0548   nan±nan


In [None]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

X_val = val_data.numpy()
r = 3  # number of latent factors
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

results = {
    'linear_train_mse': [],
    'linear_val_mse': [],
    'nonlinear_train_mse': [],
    'nonlinear_val_mse': [],
    'classic_factors': [],
    'nonlinear_factors': [],
    'classic_analysis': [],
    'nonlinear_analysis': []
}

# Get validation data as tensor (move this before the loop)
val_tensor = X_tensor[val_indices]

# Helper function to align & compute correlation
def aligned_corr(model, X_val, F_true_val):
    with torch.no_grad():
        Z = model.encoder(X_val).cpu().numpy()
        F = F_true_val.cpu().numpy()
    
    # Check for problematic values
    if np.any(np.isnan(Z)) or np.any(np.isinf(Z)):
        print(f"Warning: NaN or Inf values in encoded factors Z")
        return np.full(F.shape[1], np.nan)
    
    # Check variance of encoded factors
    z_vars = np.var(Z, axis=0)
    if np.any(z_vars < 1e-10):
        print(f"Warning: Very low variance in encoded factors: {z_vars}")
        return np.full(F.shape[1], np.nan)
    
    R, _ = orthogonal_procrustes(Z, F)
    Z_aligned = Z @ R
    
    correlations = []
    for i in range(F.shape[1]):
        # Check if either variable has zero variance
        if np.var(Z_aligned[:, i]) < 1e-10 or np.var(F[:, i]) < 1e-10:
            correlations.append(np.nan)
        else:
            corr = np.corrcoef(Z_aligned[:, i], F[:, i])[0, 1]
            correlations.append(np.abs(corr) if not np.isnan(corr) else np.nan)
    
    return np.array(correlations)

# Run 100 iterations
for run in range(100):
    seed = 42 + run  # Different seed for each run
    set_seed(seed)
    
    # Print progress every 10 runs
    if (run + 1) % 10 == 0:
        print(f"Completed run {run + 1}/100")
    
    # Init model and train model
    modellinear = ClassicAffineAutoencoder(input_dim, r).to(device)
    modelnonlinear = NonlinearAutoencoder(input_dim, r).to(device)
    
    modellinear, train_losslinear, val_losslinear = train_autoencoder(
        modellinear, train_loader, val_loader, num_epochs=150, lr=0.001)
    modelnonlinear, train_lossnonlinear, val_lossnonlinear = train_autoencoder(
        modelnonlinear, train_loader, val_loader, num_epochs=100, lr=0.001)
    
    # Store final MSE values (assuming losses are MSE)
    results['linear_train_mse'].append(train_losslinear[-1])  # Final training MSE
    results['linear_val_mse'].append(val_losslinear[-1])      # Final validation MSE
    results['nonlinear_train_mse'].append(train_lossnonlinear[-1])  # Final training MSE
    results['nonlinear_val_mse'].append(val_lossnonlinear[-1])      # Final validation MSE
    
    # Get factors for this run
    classic_factors = modellinear.encoder(val_data).detach().cpu().numpy()
    nonlinear_factors = modelnonlinear.encoder(val_data).detach().cpu().numpy()
    
    # Store factors for this run
    results['classic_factors'].append(classic_factors)
    results['nonlinear_factors'].append(nonlinear_factors)
    
    # Compute and store factor correlations (MOVED INSIDE THE LOOP)
    classic_corr = aligned_corr(modellinear, val_tensor, F_true_tensor)
    nonlinear_corr = aligned_corr(modelnonlinear, val_tensor, F_true_tensor)
    
    # Debug: Print info for first few runs
    if run < 3:
        print(f"\nRun {run + 1} Debug Info:")
        print(f"  Classic correlations: {classic_corr}")
        print(f"  Nonlinear correlations: {nonlinear_corr}")
        
        # Check the actual factor values
        with torch.no_grad():
            classic_factors_debug = modellinear.encoder(val_tensor).cpu().numpy()
            nonlinear_factors_debug = modelnonlinear.encoder(val_tensor).cpu().numpy()
        
        print(f"  Classic factors - mean: {classic_factors_debug.mean(axis=0)}, std: {classic_factors_debug.std(axis=0)}")
        print(f"  Nonlinear factors - mean: {nonlinear_factors_debug.mean(axis=0)}, std: {nonlinear_factors_debug.std(axis=0)}")
    
    results['classic_analysis'].append(classic_corr)
    results['nonlinear_analysis'].append(nonlinear_corr)

# After all runs are complete, compute summary statistics
print("\n" + "="*60)
print("RESULTS SUMMARY")
print("="*60)

# Print MSE results
print("\nMSE RESULTS:")
print(f"  Linear Train MSE:    {np.mean(results['linear_train_mse']):.8f} ± {np.std(results['linear_train_mse']):.8f}")
print(f"  Linear Val MSE:      {np.mean(results['linear_val_mse']):.8f} ± {np.std(results['linear_val_mse']):.8f}")
print(f"  Nonlinear Train MSE: {np.mean(results['nonlinear_train_mse']):.8f} ± {np.std(results['nonlinear_train_mse']):.8f}")
print(f"  Nonlinear Val MSE:   {np.mean(results['nonlinear_val_mse']):.8f} ± {np.std(results['nonlinear_val_mse']):.8f}")

def print_results_summary(results):
    # Factor Analysis Results
    print(f"\nFACTOR RECOVERY ANALYSIS")
    
    # Use the correct keys - these contain correlation results
    classic_factors = np.array(results['classic_analysis'])  # Shape: (100, 3)
    nonlinear_factors = np.array(results['nonlinear_analysis'])  # Shape: (100, 3)
    
    print(f"\nOverall Factor Correlations (mean across all factors and runs):")
    print(f"  Classic:    {classic_factors.mean():.4f} ± {classic_factors.std():.4f}")
    print(f"  Nonlinear:  {nonlinear_factors.mean():.4f} ± {nonlinear_factors.std():.4f}")
    
    print(f"\nPer-Factor Correlations (mean across runs):")
    print(f"{'Factor':<8} {'Classic':<12} {'Nonlinear':<12}")
    print("-" * 35)
    
    for i in range(classic_factors.shape[1]):
        factor_classic_mean = classic_factors[:, i].mean()
        factor_classic_std = classic_factors[:, i].std()
        factor_nonlinear_mean = nonlinear_factors[:, i].mean()
        factor_nonlinear_std = nonlinear_factors[:, i].std()
        print(f"{i+1:<8} {factor_classic_mean:.4f}±{factor_classic_std:.4f}   {factor_nonlinear_mean:.4f}±{factor_nonlinear_std:.4f}")

# Call the function
print_results_summary(results)


Run 1 Debug Info:
  Classic correlations: [0.06767839 0.10384607 0.03067196]
  Nonlinear correlations: [nan nan nan]
  Classic factors - mean: [ 0.35990378 -0.49302375  0.21532026], std: [0.00661348 0.00365904 0.00460045]
  Nonlinear factors - mean: [-0.5608257   0.16102599  0.25119022], std: [2.2649765e-06 2.0861626e-07 2.9802322e-08]

Run 2 Debug Info:
  Classic correlations: [0.03021932 0.09925218 0.06766467]
  Nonlinear correlations: [0.62704009 0.05655566 0.1443164 ]
  Classic factors - mean: [-0.15627536 -0.38007554  0.28854406], std: [0.00462682 0.00541145 0.00418584]
  Nonlinear factors - mean: [-0.2991768   0.474788    0.29565907], std: [4.803569e-05 9.585928e-04 7.269078e-04]

Run 3 Debug Info:
  Classic correlations: [0.4432214  0.20031793 0.14000445]
  Nonlinear correlations: [0.67800499 0.11785022 0.17414877]
  Classic factors - mean: [-0.38652635  0.2551032   0.27147216], std: [0.00590612 0.0052268  0.00454902]
  Nonlinear factors - mean: [-0.08521908 -0.11870618  0.1739