# Imports

In [107]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

tensor_transform = transforms.ToTensor()
dataset = datasets.MNIST(root="./data", train=True, download=True, transform=tensor_transform)
loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=32, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Part 1: Data of Known Distribution

### Data Info

In [108]:
numSamples = 1000 # must be large for X to be SPD
dim = 784 # dimension of the data (chosen based on the 28x28 MNIST dataset)
r = 100 # maximum rank of the mapping

### Theoretical Results

In [109]:
X = np.random.randn(numSamples, dim)
# mu = 0 by default in numpy Gaussian distribution

gamma_X = np.eye(dim)  # known covariance for N(0, I)
L_X = np.linalg.cholesky(gamma_X)  # Cholesky decomposition

U, Sigma, V_T = np.linalg.svd(L_X)  # SVD
U_r = U[:, :r]  # first r left singular vectors of L_X

A_opt = U_r @ U_r.T  # Eckart-Young solution to the transformed minimization problem

# Minimized expected error
error_opt = np.linalg.norm((A_opt - np.eye(dim)) @ L_X, ord='fro')**2


### Computational, Autoencoder Results

In [110]:
X_tensor = torch.tensor(X, dtype=torch.float32)

# Create the autoencoder class with one hidden layer
class LinearAutoencoder(nn.Module):
    def __init__(self, input_dim, bottleneck_dim):
        super().__init__()
        self.encoder = nn.Linear(input_dim, bottleneck_dim, bias=False)
        self.decoder = nn.Linear(bottleneck_dim, input_dim, bias=False)

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon
    
model = LinearAutoencoder(input_dim=dim, bottleneck_dim=r)

# Training
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

X_loader = torch.utils.data.DataLoader(X_tensor, batch_size=128, shuffle=True)

num_epochs = 50
for epoch in range(num_epochs):
    total_loss = 0
    for batch in X_loader:
        optimizer.zero_grad()
        recon = model(batch)
        loss = criterion(recon, batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch.size(0)
    avg_loss = total_loss / numSamples
    #if epoch % 10 == 0:
        #print(f"Epoch {epoch:02d}, MSE: {avg_loss:.6f}")

# Reconstruct learned projection matrix A
W = model.decoder.weight @ model.encoder.weight  # A_learned = W_decoder * W_encoder
A_learned = W.detach().numpy()  # shape: (784, 784)

error_learned = np.linalg.norm((A_learned - np.eye(dim)) @ L_X, ord='fro')**2


### Results

In [111]:
print(f"Optimal A error: {error_opt:.3f}")
print(f"Learned A error: {error_learned:.3f}")

Optimal A error: 684.000
Learned A error: 686.677


# Part 2: Data from MNIST

### Data Info

In [112]:
numSamples = 1000 # must be large for X to be SPD
dim = 784 # dimension of the data (chosen based on the 28x28 MNIST dataset)
r = 100 # maximum rank of the mapping

### Theoretical Results

In [113]:
flatten = True

X_raw = torch.stack([dataset[i][0] for i in range(numSamples)]) # grab numSamples images from MNIST
X = X_raw.view(numSamples, -1) if flatten else X_raw
mu = X.mean(dim=0, keepdim=True) # must now calculate the sample mean
X = X - mu

gamma_X = (X.T @ X) / (numSamples - 1) # unbiased second moment (covariance matrix in this case??)
gamma_X += 1e-5*np.eye(gamma_X.shape[0]) # add the regularization term to ensure SPD
L_X = np.linalg.cholesky(gamma_X) # Cholesky decomposition

U, Sigma, V_T = np.linalg.svd(L_X) # SVD
U_r = U[:, :r] # first r left singular vectors of L_X

A_opt = U_r @ U_r.T # Echard-Young solution to the transformed minimization problem

# Minimized expected error
error_opt = np.linalg.norm((A_opt - np.eye(dim)) @ L_X,ord='fro')**2

  gamma_X += 1e-5*np.eye(gamma_X.shape[0]) # add the regularization term to ensure SPD
  error_opt = np.linalg.norm((A_opt - np.eye(dim)) @ L_X,ord='fro')**2


### Computational, Autoencoder Results

In [None]:
X_tensor = torch.tensor(X, dtype=torch.float32) # updates X_tensor to account for the new data

model = LinearAutoencoder(input_dim=dim, bottleneck_dim=r)

# Training
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

X_loader = torch.utils.data.DataLoader(X_tensor, batch_size=128, shuffle=True)

num_epochs = 50
for epoch in range(num_epochs):
    total_loss = 0
    for batch in X_loader:
        optimizer.zero_grad()
        recon = model(batch)
        loss = criterion(recon, batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch.size(0)
    avg_loss = total_loss / numSamples
    #if epoch % 10 == 0:
        #print(f"Epoch {epoch:02d}, MSE: {avg_loss:.6f}")

# Reconstruct learned projection matrix A
W = model.decoder.weight @ model.encoder.weight  # A_learned = W_decoder * W_encoder
A_learned = W.detach().numpy()  # shape: (784, 784)
L_X = L_X.detach().numpy() # L_X is a tensor since above we made X a tensor, thus we must turn it back into a ndarray

error_learned = np.linalg.norm((A_learned - np.eye(dim)) @ L_X, ord='fro')**2

  X_tensor = torch.tensor(X, dtype=torch.float32) # updates X_tensor to account for the new data


### Results

In [115]:
print(f"Optimal A error: {error_opt:.3f}")
print(f"Learned A error: {error_learned:.3f}")

Optimal A error: 3.667
Learned A error: 5.077
