In [None]:
#imports
import matplotlib.pyplot as plt
import scipy.io
import numpy as np
!pip install torch-summary
import torchsummary
import torch
import torchvision 
import torch.nn as nn
import torch.functional as f
import torch.optim as optim


In [None]:
#the data is here: https://cs.nyu.edu/~roweis/data.html
images=scipy.io.loadmat("/kaggle/input/frey-rawface/frey_rawface_k.mat", squeeze_me=True, struct_as_record=False)
images=images["ff"].T.reshape((-1, 28, 20))

In [None]:
images.shape

In [None]:
plt.imshow(images[115])

In [None]:
np.random.seed(40)# so we generate the same random numbers
n_pixels = 28 * 20#number of features
X_train = images[:1800]#train_data
X_val = images[1865:1965]#val_data
X_train = X_train.astype('float32') / 255. #normalizing and making it a real number
X_val = X_val.astype('float32') / 255.  #making it a float because most of ml algorithms require  data to be float 
X_train = X_train.reshape((len(X_train), n_pixels))#reshaping it => (1800, n_pixels) <==> (number of samples, number of features)
X_val = X_val.reshape((len(X_val), n_pixels))
X_train=torch.tensor(X_train)
X_val=torch.tensor(X_val)

In [None]:
 # here the preprocessing ends
 # and The Model construction starts!   


In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim, intermediate_dim, latent_dim):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, intermediate_dim)#intermediate_dim extracts the imporatant features of our high dim image 
        self.fc2 = nn.Linear(intermediate_dim, latent_dim)
        self.fc3 = nn.Linear(intermediate_dim, latent_dim)
    
    def forward(self, x):
        h = torch.tanh(self.fc1(x))#I used tanh as the paper said C.2, we are encoding x in the latent space z
        z_mean = self.fc2(h)# decoder part is when x is swapped with z (according to C.2 appendix)
        z_log_var = self.fc3(h) 
        return z_mean, z_log_var

input_dim = n_pixels
intermediate_dim = 256
latent_dim = 2
encoder = Encoder(input_dim, intermediate_dim, latent_dim)
    

In [None]:
#We need to sample z from the q_phi(z|x)
def sampling(z_mean, z_log_var):
   
    epsilon = torch.randn(100, latent_dim, dtype=torch.float32)#100=batch size (according to the paper it's the best)
    epsilon *= torch.exp(.5 * z_log_var)#ensure that the standard deviation of the noise is not too large or too small
    epsilon += z_mean
    return epsilon


In [None]:
class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(latent_dim, intermediate_dim)
        self.h2 = nn.Linear(intermediate_dim, n_pixels)

    def forward(self, z):
        x = torch.tanh(self.h1(z))#tanh like the paper (this is the decoded z)
        x = torch.sigmoid(self.h2(x)) #this is the decoded x
        return x



In [None]:
#now the VAE
class VAE(nn.Module):
    def __init__(self,n_pixels, intermediate_dim, latent_dim):
        super().__init__()
        self.encoder = Encoder(n_pixels, intermediate_dim, latent_dim)
        self.decoder = Decoder()
        
    def forward(self, x):
        mean, log_var = self.encoder(x)
        z = sampling(mean, log_var)
        return self.decoder(z), mean, log_var
        

In [None]:
vae=VAE(input_dim, intermediate_dim, latent_dim)
torchsummary.summary(vae)

In [None]:
class VAELoss(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, x, x_decoded, z_mean, z_log_var):
        loss = nn.MSELoss()(x, x_decoded)
        kl_regu = -0.5 * torch.sum(1. + z_log_var - z_mean.pow(2) - z_log_var.exp(), dim=-1)
        
        return loss+ kl_regu#according to the formula in the paper


In [None]:
optimizer = optim.Adam(vae.parameters())
loss_fn = VAELoss()

In [None]:
for epoch in range(100):
    for i, t in enumerate(X_train):
        # Forward pass
        vae.train()
        x_decoded, z_mean, z_log_var = vae(t)
        loss = loss_fn(t, x_decoded[epoch], z_mean, z_log_var)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    with torch.no_grad():
        vae.eval()
        val_loss = 0
        for inputs in X_val:
            outputs, z_mean_val, z_log_var_val = vae(inputs)
            val_loss += loss_fn(inputs, outputs[epoch], z_mean_val, z_log_var_val).item()
        avg_val_loss = val_loss / len(X_val)
        print("Epoch {}: validation loss = {:.4f}".format(epoch+1, avg_val_loss))    

In [None]:
'''
Notes:
I used C.2 Gaussian MLP  for my Encoder and Decoder class
The paper is: https://arxiv.org/pdf/1312.6114.pdf
This is an implementation of the paper so basically it needs a hyperparameter tuning, I mostly followed the numbers that 
were suggested by the authors.

If you want to improve the implementation(train on gpu,hyperparameter tuning,choice of loss function....) I would love to help.

Thank you for following up

'''