In [14]:
# imports 
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
from torch.utils.data import random_split, DataLoader
from scipy import stats
from scipy.linalg import orthogonal_procrustes
from sklearn.decomposition import PCA

In [3]:
def compute_optimal_A_b_mu(X_np, r):
    mu = np.mean(X_np, axis=0)
    cov = (X_np - mu).T @ (X_np - mu)
    U, S, _ = np.linalg.svd(cov)
    Ur = U[:, :r]  # shape (d, r)
    
    A = Ur.T  # encoder projects from d->r: (r, d)
    b = np.zeros(r)  # no bias needed in latent space
    
    return A, b, mu

In [4]:
# python packaged affine autoencoder

class ClassicAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, bottleneck_dim):
        super().__init__()
        self.encoder = nn.Linear(input_dim, bottleneck_dim, bias=True)
        self.decoder = nn.Linear(bottleneck_dim, input_dim, bias=True)

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat

In [5]:
class OptimalAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, r, A, b, mu):
        super().__init__()
        self.A = torch.tensor(A, dtype=torch.float32)
        self.b = torch.tensor(b, dtype=torch.float32)
        self.mu = torch.tensor(mu, dtype=torch.float32)

    def encoder(self, x):
        # Return centered projected latent space: Ur^T(x - mu)
        x_centered = x - self.mu  # center by mean
        UrT = self.A.T  # A = Ur Ur^T ⇒ A.T = Ur^T Ur ⇒ encoder ~ Ur^T
        return x_centered @ UrT  # [batch_size, d] x [d, r] → [batch_size, r]
    
    def decoder(self, z):
        # Reconstruct from latent space: Ur * z + mu
        return z @ self.A + self.mu  # [batch_size, r] x [r, d] → [batch_size, d]

    def forward(self, x):
        # Complete encode-decode cycle
        z = self.encoder(x)
        return self.decoder(z)

In [6]:
# this is a combination modeel that allows for training after instantiation of optimal theoretical weights. 
class OptimalTrainableAffineAutoencoder(nn.Module):
    def __init__(self, input_dim, r, A_init, b_init, mu_init):
        super().__init__()
        self.A = nn.Parameter(torch.tensor(A_init, dtype=torch.float32))  # (r, d)
        self.b = nn.Parameter(torch.tensor(b_init, dtype=torch.float32))  # (r,)
        self.mu = nn.Parameter(torch.tensor(mu_init, dtype=torch.float32))  # (d,)

    def encoder(self, x):
        x_centered = x - self.mu  # (batch, d)
        return x_centered @ self.A.T + self.b  # (batch, r)

    def decoder(self, z):
        return z @ self.A + self.mu  # (batch, d)

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)


In [7]:


# 3. Modify train_autoencoder to accept loaders instead of raw tensor
def train_autoencoder(model, train_loader, val_loader, num_epochs=300, lr=1e-3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            recon = model(batch)
            loss = criterion(recon, batch)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item() * batch.size(0)

        avg_train_loss = total_train_loss / len(train_loader.dataset)
        train_losses.append(avg_train_loss)

        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)
                recon = model(batch)
                loss = criterion(recon, batch)
                total_val_loss += loss.item() * batch.size(0)

        avg_val_loss = total_val_loss / len(val_loader.dataset)
        val_losses.append(avg_val_loss)


    return model, train_losses, val_losses









In [8]:
# validation function only code for the optimal affine autoencoder
def valOnlyOptimalAffineAutoencoder(model, val_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    model.eval()
    total_loss = 0
    criterion = nn.MSELoss()

    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            recon = model(batch)
            loss = criterion(recon, batch)
            total_loss += loss.item() * batch.size(0)

    avg_loss = total_loss / len(val_loader.dataset)
    return avg_loss 

In [21]:
# get data ready
X_df = pd.read_csv("assetReturns_garch.csv")
X_np = X_df.to_numpy().astype(np.float32)
X_tensor = torch.tensor(X_np)

print(f"Data shape: {X_np.shape}")

# set dims and latent space size 
input_dim = X_np.shape[1]
r = 3  # bottleneck dimension

# compute the optimal params (only once, outside the loop)
A, b, mu = compute_optimal_A_b_mu(X_np, r)

# Initialize results storage
results = {
    'classic_mse': [],
    'optimal_mse': [],
    'trainable_mse': [],
    'classic_train_loss': [],
    'classic_val_loss': [],
    'trainable_train_loss': [],
    'trainable_val_loss': [],
    'classic_factors': [],
    'optimal_factors': [],
    'trainable_factors': [],
    'classic_train_histories': [],
    'trainable_train_histories': [],
    'classic_val_histories': [],
    'trainable_val_histories': []
}

print("Starting 100-run experiment...")

# Run 100 experiments
for run in range(100):
    print(f"Run {run + 1}/100", end="")
    
    # Set different seed for each run
    torch.manual_seed(run)
    np.random.seed(run)
    
    # split data into training and validation with current seed
    val_split = 0.2
    total_size = len(X_tensor)
    val_size = int(val_split * total_size)
    train_size = total_size - val_size
    
    train_data, val_data = random_split(X_tensor, [train_size, val_size])
    
    # create the data loaders 
    batch_size = 64
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
    
    # initialize models for this run
    model_classic = ClassicAffineAutoencoder(input_dim, r)
    model_optimal = OptimalAffineAutoencoder(input_dim, r, A, b, mu)
    model_optimal_trainable = OptimalTrainableAffineAutoencoder(input_dim, r, A, b, mu)
    
    # Train classic model
    model_classic, losses_classic_train, losses_classic_val = train_autoencoder(
        model_classic, 
        train_loader,
        val_loader,
        70,
        0.001
    )
    
    # Validation only on optimal model
    losses_optimal_val = valOnlyOptimalAffineAutoencoder(model_optimal, val_loader)
    
    # Train optimal trainable model
    model_optimal_trainable, losses_trainable_train, losses_trainable_val = train_autoencoder(
        model_optimal_trainable,
        train_loader,
        val_loader,
        20,
        0.001
    )
    
    # Store results for this run
    results['classic_mse'].append(min(losses_classic_val))
    results['optimal_mse'].append(losses_optimal_val)
    results['trainable_mse'].append(min(losses_trainable_val))

    results['classic_train_loss'].append(losses_classic_train[-1])
    results['classic_val_loss'].append(losses_classic_val[-1])
    results['trainable_train_loss'].append(losses_trainable_train[-1])
    results['trainable_val_loss'].append(losses_trainable_val[-1])
    
    # Store full training histories
    results['classic_train_histories'].append(losses_classic_train)
    results['classic_val_histories'].append(losses_classic_val)
    results['trainable_train_histories'].append(losses_trainable_train)
    results['trainable_val_histories'].append(losses_trainable_val)

        # --------------------------------------------
    # PCA BASELINE: Fit on training data, evaluate on val
    # --------------------------------------------

    # Fit PCA on training data (NumPy)
    X_train_np = X_tensor[train_data.indices].numpy()
    X_val_np = val_tensor.numpy()  # already selected earlier

    pca = PCA(n_components=r)
    pca.fit(X_train_np)

    # Transform validation data
    Z_pca = pca.transform(X_val_np)

    # Align to true latent factors using Procrustes
    F = F_true_tensor.numpy()
    R_pca, _ = orthogonal_procrustes(Z_pca, F)
    Z_pca_aligned = Z_pca @ R_pca

    # Compute correlation per factor
    corr_pca = np.abs([
        np.corrcoef(Z_pca_aligned[:, i], F[:, i])[0, 1]
        for i in range(F.shape[1])
    ])
    
    # Compute reconstruction MSE on validation data
    X_val_reconstructed = pca.inverse_transform(Z_pca)
    mse_pca = np.mean((X_val_np - X_val_reconstructed) ** 2)

    # Store results
    results.setdefault('pca_mse', []).append(mse_pca)
    results.setdefault('pca_factors', []).append(corr_pca)

    # --------------------------------------------
    # FACTOR ANALYSIS: Procrustes-aligned correlations
    # --------------------------------------------

    # Load ground truth latent factors (full set) and slice val rows
    F_true_full = pd.read_csv("latentFactors_garch.csv").to_numpy().astype(np.float32)
    F_true_tensor = torch.tensor(F_true_full[val_data.indices])

    # Get validation data as tensor
    val_tensor = X_tensor[val_data.indices]

    # Helper function to align & compute correlation
    def aligned_corr(model, X_val, F_true_val):
        with torch.no_grad():
            Z = model.encoder(X_val).cpu().numpy()
            F = F_true_val.cpu().numpy()
        R, _ = orthogonal_procrustes(Z, F)
        Z_aligned = Z @ R
        return np.abs([
            np.corrcoef(Z_aligned[:, i], F[:, i])[0, 1]
            for i in range(F.shape[1])
        ])

    # Compute and store factor correlations
    results['classic_factors'].append(
        aligned_corr(model_classic, val_tensor, F_true_tensor)
    )
    results['optimal_factors'].append(
        aligned_corr(model_optimal, val_tensor, F_true_tensor)
    )
    results['trainable_factors'].append(
        aligned_corr(model_optimal_trainable, val_tensor, F_true_tensor)
    )



Data shape: (2000, 10)
Starting 100-run experiment...
Run 1/100Run 2/100Run 3/100Run 4/100Run 5/100Run 6/100Run 7/100Run 8/100Run 9/100Run 10/100Run 11/100Run 12/100Run 13/100Run 14/100Run 15/100Run 16/100Run 17/100Run 18/100Run 19/100Run 20/100Run 21/100Run 22/100Run 23/100Run 24/100Run 25/100Run 26/100Run 27/100Run 28/100Run 29/100Run 30/100Run 31/100Run 32/100Run 33/100Run 34/100Run 35/100Run 36/100Run 37/100Run 38/100Run 39/100Run 40/100Run 41/100Run 42/100Run 43/100Run 44/100Run 45/100Run 46/100Run 47/100Run 48/100Run 49/100Run 50/100Run 51/100Run 52/100Run 53/100Run 54/100Run 55/100Run 56/100Run 57/100Run 58/100Run 59/100Run 60/100Run 61/100Run 62/100Run 63/100Run 64/100Run 65/100Run 66/100Run 67/100Run 68/100Run 69/100Run 70/100Run 71/100Run 72/100Run 73/100Run 74/100Run 75/100Run 76/100Run 77/100Run 78/100Run 79/100Run 80/100Run 81/100Run 82/100Run 83/100Run 84/100Run 85/100Run 86/100Run 87/100Run 88/100Run 89/100Run 90/100Run 91/100Run 92/100Run 93/100Run 94/100Run 95/100Run 9

In [22]:
def print_results_summary(results):
    # MSE Results (Validation Loss)
    print("\n VALIDATION MSE RESULTS")
    
    classic_mse = np.array(results['classic_mse'])
    optimal_mse = np.array(results['optimal_mse'])
    trainable_mse = np.array(results['trainable_mse'])
    pca_mse = np.array(results['pca_mse'])  # NEW

    print(f"Classic Autoencoder:")
    print(f"  Mean MSE: {classic_mse.mean():.6f} ± {classic_mse.std():.6f}")
    print(f"  Median:   {np.median(classic_mse):.6f}")
    print(f"  Min/Max:  {classic_mse.min():.6f}  {classic_mse.max():.6f}")
    
    print(f"\nOptimal Autoencoder:")
    print(f"  Mean MSE: {optimal_mse.mean():.6f} ± {optimal_mse.std():.6f}")
    print(f"  Median:   {np.median(optimal_mse):.6f}")
    print(f"  Min/Max:  {optimal_mse.min():.6f}  {optimal_mse.max():.6f}")
    
    print(f"\nTrainable Optimal Autoencoder:")
    print(f"  Mean MSE: {trainable_mse.mean():.6f} ± {trainable_mse.std():.6f}")
    print(f"  Median:   {np.median(trainable_mse):.6f}")
    print(f"  Min/Max:  {trainable_mse.min():.6f}  {trainable_mse.max():.6f}")

    print(f"\nPCA Baseline:")
    print(f"  Mean MSE: {pca_mse.mean():.6f} ± {pca_mse.std():.6f}")
    print(f"  Median:   {np.median(pca_mse):.6f}")
    print(f"  Min/Max:  {pca_mse.min():.6f}  {pca_mse.max():.6f}")
    
    # Performance Comparison
    print(f"\n PERFORMANCE COMPARISON")
    
    classic_vs_optimal = (classic_mse.mean() - optimal_mse.mean()) / classic_mse.mean() * 100
    classic_vs_trainable = (classic_mse.mean() - trainable_mse.mean()) / classic_mse.mean() * 100
    optimal_vs_trainable = (optimal_mse.mean() - trainable_mse.mean()) / optimal_mse.mean() * 100
    pca_vs_classic = (classic_mse.mean() - pca_mse.mean()) / classic_mse.mean() * 100
    pca_vs_optimal = (optimal_mse.mean() - pca_mse.mean()) / optimal_mse.mean() * 100
    pca_vs_trainable = (trainable_mse.mean() - pca_mse.mean()) / trainable_mse.mean() * 100
    
    print(f"Optimal vs Classic:     {classic_vs_optimal:+.2f}% improvement")
    print(f"Trainable vs Classic:   {classic_vs_trainable:+.2f}% improvement")
    print(f"Trainable vs Optimal:   {optimal_vs_trainable:+.2f}% improvement")
    print(f"PCA vs Classic:         {pca_vs_classic:+.2f}% improvement")
    print(f"PCA vs Optimal:         {pca_vs_optimal:+.2f}% improvement")
    print(f"PCA vs Trainable:       {pca_vs_trainable:+.2f}% improvement")
    
    # Final Training/Validation Losses
    print(f"\n FINAL TRAINING/VALIDATION LOSSES")
    
    classic_train_final = np.array(results['classic_train_loss'])
    classic_val_final = np.array(results['classic_val_loss'])
    trainable_train_final = np.array(results['trainable_train_loss'])
    trainable_val_final = np.array(results['trainable_val_loss'])
    
    print(f"Classic Autoencoder:")
    print(f"  Final Train Loss: {classic_train_final.mean():.6f} ± {classic_train_final.std():.6f}")
    print(f"  Final Val Loss:   {classic_val_final.mean():.6f} ± {classic_val_final.std():.6f}")
    print(f"  Train/Val Ratio:  {(classic_train_final.mean()/classic_val_final.mean()):.3f}")
    
    print(f"\nTrainable Optimal Autoencoder:")
    print(f"  Final Train Loss: {trainable_train_final.mean():.6f} ± {trainable_train_final.std():.6f}")
    print(f"  Final Val Loss:   {trainable_val_final.mean():.6f} ± {trainable_val_final.std():.6f}")
    print(f"  Train/Val Ratio:  {(trainable_train_final.mean()/trainable_val_final.mean()):.3f}")
    
    # Factor Analysis Results
    print(f"\n FACTOR RECOVERY ANALYSIS")

    classic_factors = np.array(results['classic_factors'])
    optimal_factors = np.array(results['optimal_factors'])
    trainable_factors = np.array(results['trainable_factors'])
    pca_factors = np.array(results['pca_factors'])  # NEW
    
    print(f"Factor Correlations (mean ± std across all factors and runs):")
    print(f"  Classic:           {classic_factors.mean():.4f} ± {classic_factors.std():.4f}")
    print(f"  Optimal:           {optimal_factors.mean():.4f} ± {optimal_factors.std():.4f}")
    print(f"  Trainable Optimal: {trainable_factors.mean():.4f} ± {trainable_factors.std():.4f}")
    print(f"  PCA:               {pca_factors.mean():.4f} ± {pca_factors.std():.4f}")
    
    print(f"\nPer-Factor Correlations:")
    print(f"{'Factor':<8} {'Classic':<10} {'Optimal':<10} {'Trainable':<10} {'PCA':<10}")
    
    for i in range(classic_factors.shape[1]):
        factor_classic = classic_factors[:, i].mean()
        factor_optimal = optimal_factors[:, i].mean()
        factor_trainable = trainable_factors[:, i].mean()
        factor_pca = pca_factors[:, i].mean()
        print(f"{i+1:<8} {factor_classic:<10.4f} {factor_optimal:<10.4f} {factor_trainable:<10.4f} {factor_pca:<10.4f}")
     
    # Factor correlation tests
    classic_factors_flat = classic_factors.flatten()
    optimal_factors_flat = optimal_factors.flatten()
    trainable_factors_flat = trainable_factors.flatten()
    pca_factors_flat = pca_factors.flatten()
 
    # Summary Table
    print(f"\n SUMMARY TABLE")
    
    summary_df = pd.DataFrame({
        'Model': ['Classic', 'Optimal', 'Trainable', 'PCA'],
        'MSE_Mean': [
            classic_mse.mean(), 
            optimal_mse.mean(), 
            trainable_mse.mean(), 
            pca_mse.mean()
        ],
        'MSE_Std': [
            classic_mse.std(), 
            optimal_mse.std(), 
            trainable_mse.std(), 
            pca_mse.std()
        ],
        'Factor_Corr_Mean': [
            classic_factors.mean(), 
            optimal_factors.mean(), 
            trainable_factors.mean(),
            pca_factors.mean()
        ],
        'Factor_Corr_Std': [
            classic_factors.std(), 
            optimal_factors.std(), 
            trainable_factors.std(),
            pca_factors.std()
        ]
    })
    
    print(summary_df.round(6))
print_results_summary(results)  


 VALIDATION MSE RESULTS
Classic Autoencoder:
  Mean MSE: 1.409329 ± 0.046787
  Median:   1.401503
  Min/Max:  1.322890  1.604322

Optimal Autoencoder:
  Mean MSE: 1.381532 ± 0.033371
  Median:   1.380051
  Min/Max:  1.304812  1.493806

Trainable Optimal Autoencoder:
  Mean MSE: 1.383313 ± 0.033396
  Median:   1.381695
  Min/Max:  1.307124  1.495925

PCA Baseline:
  Mean MSE: 1.374383 ± 0.093616
  Median:   1.381638
  Min/Max:  0.503196  1.493646

 PERFORMANCE COMPARISON
Optimal vs Classic:     +1.97% improvement
Trainable vs Classic:   +1.85% improvement
Trainable vs Optimal:   -0.13% improvement
PCA vs Classic:         +2.48% improvement
PCA vs Optimal:         +0.52% improvement
PCA vs Trainable:       +0.65% improvement

 FINAL TRAINING/VALIDATION LOSSES
Classic Autoencoder:
  Final Train Loss: 1.397694 ± 0.036883
  Final Val Loss:   1.409364 ± 0.046766
  Train/Val Ratio:  0.992

Trainable Optimal Autoencoder:
  Final Train Loss: 1.382268 ± 0.008342
  Final Val Loss:   1.391408 ± 0

In [12]:
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from scipy.linalg import orthogonal_procrustes


def compare_latent_factors(model, X_tensor, F_true_path, val_indices=None):
    model.eval()
    with torch.no_grad():
        Z_learned = model.encoder(X_tensor).detach().cpu().numpy()
    Z_true_full = pd.read_csv(F_true_path).to_numpy()
    
    if val_indices is not None:
        Z_true = Z_true_full[val_indices, :]
    else:
        Z_true = Z_true_full
    
    min_samples = min(Z_learned.shape[0], Z_true.shape[0])
    min_factors = min(Z_learned.shape[1], Z_true.shape[1])
    
    Z_learned = Z_learned[:min_samples, :min_factors]
    Z_true = Z_true[:min_samples, :min_factors]
    
    Z_learned_centered = Z_learned - Z_learned.mean(axis=0)
    Z_true_centered = Z_true - Z_true.mean(axis=0)
    
    R, _ = orthogonal_procrustes(Z_learned_centered, Z_true_centered)
    Z_aligned = Z_learned_centered @ R
    
    print("Latent Factor Correlations (after Procrustes alignment):")
    correlations = []
    for i in range(Z_aligned.shape[1]):
        corr, _ = pearsonr(Z_aligned[:, i], Z_true_centered[:, i])
        correlations.append(corr)
        print(f"  Factor {i+1}: correlation = {corr:.4f}")
    
    return Z_aligned, Z_true_centered, correlations

def plot_training_curves(losses_train_baseline, losses_val_baseline, 
                         losses_val_optimal, losses_train_trainable, 
                         losses_val_trainable):
    plt.figure(figsize=(12, 5))
    
    # Plot 1: Training curves
    plt.subplot(1, 2, 1)
    plt.plot(losses_train_baseline, label='Classic AE - Train', linestyle='-', color='teal')
    plt.plot(losses_val_baseline, label='Classic AE - Val', linestyle='--', color='teal')
    plt.plot(losses_train_trainable, label='Trainable AE - Train', linestyle='-', color='magenta')
    plt.plot(losses_val_trainable, label='Trainable AE - Val', linestyle='--', color='magenta')
 
    plt.xlabel("Epoch")
    plt.ylabel("Loss (MSE)")
    plt.title("Training vs Validation Loss")
    plt.legend()
    plt.grid(True)
    
    # Plot 2: Final comparison
    plt.subplot(1, 2, 2)
    models = ['Classic', 'Optimal', 'Trainable']
    train_final = [losses_train_baseline[-1], 0, losses_train_trainable[-1]]
    val_final = [losses_val_baseline[-1], losses_val_optimal, losses_val_trainable[-1]]
    
    x = np.arange(len(models))
    width = 0.35
    
    plt.bar(x - width/2, train_final, width, label='Train', alpha=0.8, color='pink')
    plt.bar(x + width/2, val_final, width, label='Validation', alpha=0.8, color='lightblue')
    plt.xlabel('Model Type')
    plt.ylabel('Final Loss')
    plt.title('Final Loss Comparison')
    plt.xticks(x, models)
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

def evaluate_models(model_baseline, model_optimal, model_trainable, X_tensor, A_opt, b_opt, 
                   F_true_path, losses_baseline_train, losses_baseline_val,
                   losses_optimal_val, losses_trainable_train, losses_trainable_val,
                   val_indices=None):

    # 2. Latent Factor Correlation
    print("LATENT FACTOR ANALYSIS:")
    print("\nClassic AE Factors:")
    try:
        _, _, corr_base = compare_latent_factors(model_baseline, X_tensor, F_true_path, val_indices)
        avg_corr_base = np.mean(np.abs(corr_base))
    except Exception as e:
        print(f"   Error in factor analysis: {e}")
        avg_corr_base = 0
    
    print(f"   Average |correlation|: {avg_corr_base:.4f}")
    
    print("\nOptimal AE Factors:")
    try:
        _, _, corr_opt = compare_latent_factors(model_optimal, X_tensor, F_true_path, val_indices)
        avg_corr_opt = np.mean(np.abs(corr_opt))
    except Exception as e:
        print(f"   Error in factor analysis: {e}")
        avg_corr_opt = 0
    
    print(f"   Average |correlation|: {avg_corr_opt:.4f}")

    print("\nTrainable AE Factors:")
    try:
        _, _, corr_train = compare_latent_factors(model_trainable, X_tensor, F_true_path, val_indices)
        avg_corr_train = np.mean(np.abs(corr_train))
    except Exception as e:
        print(f"   Error in factor analysis: {e}")
        avg_corr_train = 0

    print(f"   Average |correlation|: {avg_corr_train:.4f}")
  
    # 5. Plot training curves
    print("PLOTTING TRAINING CURVES...")
    plot_training_curves(losses_baseline_train, losses_val_baseline=losses_baseline_val,
                         losses_val_optimal=losses_optimal_val,
                         losses_train_trainable=losses_trainable_train,
                         losses_val_trainable=losses_trainable_val)


Theoretical autoencoder always outperforms on reconstruction MSE indicating a true optimal solution 
- theoretical has no training and classic has 300 epochs of training
- however i think that in order to best learn super noisy financial data, i think you need to train the optimal one and that will tailor it to your noisy dataset -> i gave the optimal solution ~50 epochs and its latent factor predictions were better than the classic autoencoder 
Question: I have tried training the optimal for about 20-50 epochs and it showed better performance than a classic that was trained for 300 epochs for nonlinear and GARCH data. Wondering if this is useful for CT data? should I try denoising in the latent space? because the latent factors are slightly worse than the classic but im guessing thats because we let the classic train and the optimal has no training at all. 