# Autoencoder

In [35]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
drivepath = '/content/gdrive/MyDrive/Spring_2021/11785_Intro_to_Deep_Learning/DL_Group_Project/Dataset/Hw1p2_frames'

In [36]:
phoneme = "SIL"

Reference: https://medium.com/pytorch/implementing-an-autoencoder-in-pytorch-19baa22647d1


In [None]:
!pip install torch

In [9]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [10]:
class AE(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.encoder_hidden_layer = nn.Linear(
            in_features=in_features, out_features=128
        )
        self.encoder_output_layer = nn.Linear(
            in_features=128, out_features=128
        )
        self.decoder_hidden_layer = nn.Linear(
            in_features=128, out_features=128
        )
        self.decoder_output_layer = nn.Linear(
            in_features=128, out_features=in_features
        )

    def forward(self, features):
        activation = self.encoder_hidden_layer(features)
        activation = torch.relu(activation)

        code = self.encoder_output_layer(activation)
        code = torch.relu(code)

        activation = self.decoder_hidden_layer(code)
        activation = torch.relu(activation)

        activation = self.decoder_output_layer(activation)
        reconstructed = torch.relu(activation)
        
        return reconstructed

Explaining some of the components in the code snippet above,

*   The torch.nn.Linear layer creates a linear function (θx + b), with its parameters initialized (by default) with He/Kaiming uniform initialization, as it can be confirmed here. This means we will call an activation/non-linearity for such layers.
*   The in_features parameter dictates the feature size of the input tensor to a particular layer, e.g. in self.encoder_hidden_layer, it accepts an input tensor with the size of [N, input_shape] where N is the number of examples, and input_shape is the number of features in one example.
*   The out_features parameter dictates the feature size of the output tensor of a particular layer. Hence, in the self.decoder_output_layer, the feature size is kwargs["input_shape"], denoting that it reconstructs the original data input.
*   The forward() function defines the forward pass for a model, similar to call in tf.keras.Model. This is the function invoked when we pass input tensors to an instantiated object of a torch.nn.Module class.



In [13]:
#  use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# create a model from `AE` autoencoder class
# load it to the specified device, either gpu or cpu
model = AE(in_features=40).to(device)

# create an optimizer object
# Adam optimizer with learning rate 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# mean-squared error loss
criterion = nn.MSELoss()

In [14]:
class PhonemeDataset(Dataset):
    def __init__(self, x_path):
        # shapes of x in x_path: (timestep, 40)
        self.X = np.load(x_path, allow_pickle=True)

    # number of rows in the dataset
    def __len__(self):
        return len(self.X)
 
    # get a row at an index
    def __getitem__(self, index):
        x = torch.Tensor(self.X[index])
    
        return x

In [37]:
train_dataset = PhonemeDataset(f"{drivepath}/{phoneme}.npy")  # TODO: rename to train_{phoneme}.npy
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=1, shuffle=True, num_workers=4, pin_memory=True
)

  cpuset_checked))


In [None]:
test_dataset = PhonemeDataset(f"{drivepath}/test_{phoneme}.npy")
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=32, shuffle=False, num_workers=4
)

In [38]:
epochs = 50
print(len(train_loader))

264389


In [None]:
for epoch in range(epochs):
    loss = 0
    for batch_features in train_loader:
        # load it to the active device
        batch_features = batch_features.view(-1, 40).to(device)
        
        # reset the gradients back to zero
        # PyTorch accumulates gradients on subsequent backward passes
        optimizer.zero_grad()
        
        # compute reconstructions
        outputs = model(batch_features)
        
        # compute training reconstruction loss
        train_loss = criterion(outputs, batch_features)
        
        # compute accumulated gradients
        train_loss.backward()
        
        # perform parameter update based on current gradients
        optimizer.step()
        
        # add the mini-batch training loss to epoch loss
        loss += train_loss.item()
    
    # compute the epoch training loss
    loss = loss / len(train_loader)
    
    # display the epoch training loss
    print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))

  cpuset_checked))


epoch : 1/50, loss = 88.116689
epoch : 2/50, loss = 88.102177
epoch : 3/50, loss = 88.100266
epoch : 4/50, loss = 88.099162
epoch : 5/50, loss = 88.098444
