### Implementing Optimal AutoEncoders (Linear, Afine Linear, Noisy Linear) for CT Scan Data

In [1]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torchvision import datasets, transforms
import tifffile as tiff
import matplotlib.pyplot as plt
import numpy as np
import os
import scipy as sp


In [19]:
# define a class for the dataset (according to pyTorch format) 

class CTScanDataset(Dataset):
    def __init__(self, img_dir, length, mode, transform=None):
        self.mode = mode
        self.img_dir = img_dir
        self.transform = transform
       # self.target_transform = target_transform
        self.len = length
    
    def __len__(self):
        return self.len
    
    def __getitem__(self, idx): 
        slice_name = f"slice{idx:05d}"
        img_path = os.path.join(self.img_dir, slice_name, self.mode, 'reconstruction.tif')
        img_np = tiff.imread(img_path).astype(np.float32)
        img_tensor = torch.from_numpy(img_np)
        if self.transform:
            img_tensor = self.transform(img_tensor)
        return img_tensor

In [14]:
# create dataset
img_dir = r'C:\Users\alexr\OneDrive\CMDS REU\Data\CT-Scan-6.9GB'
mode = 'mode1'
length = 1000
dataset = CTScanDataset(img_dir, length, mode)

# grab samples from the dataset
flatten = True
num_samples = 10
X_raw = torch.stack([dataset[i] for i in range(1,num_samples)]) # grab numSamples images from MNIST -> shape of (1000, 1, 28, 28)
X = X_raw.view(num_samples-1, -1) if flatten else X_raw

r = 100

#### Optimal AutoEncoder with Linear Map, assuming noiseless data

In [15]:
# sample mean estimation method (same as SVD of X)
X_np = X.detach().numpy().T
U, Sigma, V_t = np.linalg.svd(X_np, full_matrices=False)  # SVD of X
U_r = U[:, :r] / num_samples
A = U_r @ U_r.T

''' reconstructed_X_sampleMean = A @ X_np
diff_sampleMean = (reconstructed_X_sampleMean - X_np) / sp.linalg.norm(reconstructed_X_sampleMean, ord='fro') # normalized error
error_sampleMean = np.mean(sp.linalg.norm(diff_sampleMean, axis=1, ord=2)**2) '''

MemoryError: Unable to allocate 4.00 TiB for an array with shape (1048576, 1048576) and data type float32

In [None]:
print(A.shape)

In [None]:
# second moment estimation method (SVD of L_x)

gamma_X = (X_np @ X_np.T) # unbiased second moment
gamma_X += 1e-5*np.eye(gamma_X.shape[0]) # add the regularization term to ensure SPD
L_x = np.linalg.cholesky(gamma_X) # Cholesky decomposition

U, Sigma, V_t = np.linalg.svd(L_x) # SVD
U_r = U[:, :r] # first r left singular vectors of L_X
A = U_r @ U_r.T # Echard-Young solution to the transformed minimization problem

reconstructed_X_covariance = A @ X_np
diff_covariance = (reconstructed_X_covariance - X_np) / sp.linalg.norm(reconstructed_X_sampleMean, ord='fro') # normalized error
error_covariance = np.mean(sp.linalg.norm(diff_sampleMean, axis=1, ord=2)**2)

#### Optimal AutoEncoder with Afine Linear Map, assuming noiseless data

In [None]:
# covariance estimation method 

Sigma_X = np.cov(X_np)
mu_X = np.mean(X_np.T, axis=0)
Sigma_X += 1e-5 * np.eye(Sigma_X.shape[0]) # add a regularization term to ensure SPD
K_x = np.linalg.cholesky(Sigma_X)
U, D, V_t = np.linalg.svd(K_x)
U_r = U[:, :r]
A = U_r @ U_r.T
b = (np.eye(A.shape[0]) - A) @ mu_X
b = np.tile(b[:, np.newaxis], (1, 999))

reconstructed_X_afine = A @ X_np + b
diff_afine = (reconstructed_X_afine - X_np) / sp.linalg.norm(reconstructed_X_sampleMean, ord='fro') # normalized error
error_afine = np.mean(sp.linalg.norm(diff_afine, axis=1, ord=2)**2)

#### Optimal AutoEncoder with Linear Map, assuming Noisy Data

In [None]:
# assume that the noise is Gaussian, represented by eps. 
noise_amp = 1e-3
eps = noise_amp * np.random.randn(X_np.shape[0], X_np.shape[1]) # generate noise with the same shape as X_np
X_true = X_np - eps # approximate the true image by subtracting the noise

gamma_X = (X_np @ X_np.T) # unbiased second moment
gamma_X += 1e-5*np.eye(gamma_X.shape[0]) # add the regularization term to ensure SPD
gamma_eps = (eps @ eps.T) + (noise_amp/2) # calculate gamma_eps

M = gamma_X @ np.linalg.inv(gamma_X @ gamma_eps)
U, Sigma, V_t = np.linalg.svd(M)
U_r = U[: , :r]
A = U_r @ U_r.T

reconstructed_X_noisy = A @ X_np 
diff_noisy = (reconstructed_X_noisy - X_np) / sp.linalg.norm(reconstructed_X_sampleMean, ord='fro') # normalized error
error_noisy = np.mean(sp.linalg.norm(diff_noisy, axis=1, ord=2)**2)


##### Visualizing Reconstructions

In [None]:
fig, ax = plt.subplots(5, 5)
size = (1024, 1024) # common size for CT scan images

for i in range(5):
    img_original = X_np[:, i]
    img_linear_sampleMean = reconstructed_X_sampleMean[:, i]
   ''' img_linear_covariance = reconstructed_X_covariance[:, i]
    img_noisy = reconstructed_X_noisy[:, i]
    img_afine = reconstructed_X_afine[:, i] '''

    ax[0,i].imshow(img_original.reshape(size), cmap='gray')
    ax[0,i].axis('off')
    ax[1,i].imshow(img_linear_sampleMean.reshape(size), cmap='gray')
    ax[1,i].axis('off')
   ''' ax[2,i].imshow(img_linear_covariance.reshape(size), cmap='gray')
    ax[2,i].axis('off')
    ax[3,i].imshow(img_noisy.reshape(size), cmap='gray')
    ax[3,i].axis('off')
    ax[4,i].imshow(img_afine.reshape(size), cmap='gray')
    ax[4,i].axis('off') '''

print(f'Error with Linear Map, sample mean Estimation: {error_sampleMean}')
print(f'Error with Linear Map, second moment Estimation: {error_covariance}')
print(f'Error with Affine Linear Map: {error_afine}')
print(f'Error with Linear Map, nosiy data: {error_noisy}')
# plt.tight_layout()
plt.show()
fig.set_size_inches(15, 5)