In [0]:
%matplotlib inline
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
import numpy as np
from PIL import Image
import copy
import time


### Downloading Dataset

In [0]:
trainLoader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('./fmnist/', train=True, download=True, 
                          transform=transforms.Compose([
                                transforms.ToTensor()
                          ])), batch_size=1024, shuffle=True, num_workers=1 ######, pin_memory...
)

testLoader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('./fmnist/', train=False,
                          transform=transforms.Compose([
                              transforms.ToTensor()
                          ])), batch_size=1024, shuffle=True, num_worders=1 ######, pin_memory...
)


In [0]:
# Size of train and test datasets
print("Number of samples in train set: " + str(len(trainLoader.dataset)))
print("Number of samples in test set: " + str(len(testLoader.dataset)))


In [0]:
# Sample image from the dataset
img = trainLoader.dataset[0][0]
img_np = img.squeeze(0).numpy()
plt.imshow(img_np, cmap='gray')


In [0]:
use_gpu = torch.cuda.is_available()


### Define the Autoencoder class

In [0]:
class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28*28, 100),
            nn.ReLU(),
            nn.Linear(100, 100),
            nn.ReLU())
        )
        self.decoder = nn.Sequential(
            nn.Linear(100, 100),
            nn.ReLU(),
            nn.Linear(100, 28*28),
            nn.ReLU()
        )


    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


### Defining function for training the network for reconstruction

In [0]:
# Defining the training routine
def train_model_recon(model, criterion, optimizer, num_epochs):
    start = time.time()
    # List for saving the loss per epoch:
    train_loss = []
    
    for epoch in range(num_epochs):
        epochStartTime = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))

        running_loss = 0.0
        # Loading data in batches:
        batch = 0
        
        for data in trainLoader:
            inputs, labels = data
            # Wrap them in Variable:
            if use_gpu:
                inputs, labels = Variable(inputs.view(inputs.size(0),-1).cuda)#########
            else:
                inputs, labels = Variable(inputs.view(inputs.size(0),-1))######, Variable
            # Initializing model gradients to zero:
            mode.zero_grad()
            # Data feed-forward through the network
            outputs = model(inputs)
            # Finding the MSE:
            loss = criterion(outputs, labels)

            # Backpropagating the loss and updating the model parameters:
            loss.backward()
            optimizer.step()

            # Accumulating the loss for each batch:
            running_loss += loss.data[0]
            if batch == 0:
                totalLoss = loss
                batch += 1
            else:
                totalLss += loss
                batch += 1
        
        # Total loss for one epoch
        epoch_loss = running_loss/50000
        # Saving the loss over epochs for plotted
        train_loss.append(epoch_loss) 

        print('Epoch loss: {:.6f}'.format(epoch_loss))
        epochTimeEnd = time.time() - epochStartTime
        print('Epoch complete in {:.0f}m {:.0f}s'.format(
            epochTimeEnd // 60, epochTimeEnd % 60
        ))
        print('-' * 25)

        # Plotting Loss vs Epochs
        fig1 = plt.figure(1)
        plt.plot(range(epoch + 1), train_loss, 'r--', label='train')

        if epoch == 0:
            plt.legend(loc='upper left')
            plt.xlabel('Epochs')
            plt.ylabel('Loss')
        fig1.savefig('aeRecon_lossPlot.png')

    time_elapsed = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60)
    )
    return model


### Model Initialization and Training

In [0]:
net = autoencoder()
print(net)
if use_gpu:
    net = net.cuda()
init_weights = copy.deepcopy(net.encoder[0].weight.data)


In [0]:
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.5, momentum=0.9)
# Training the model:
net = train_model_recon(net, criterion, optimizer, num_epochs=2)


### Reconstruction using trained Autoencoder

In [0]:
TestImg = testLoader.dataset[0][0]

if use_gpu:
    outputImg = net(Variable(TestImg.view(TestImg.size(0), -1)).cuda())
else:
    outputImg = net(Variable(TestImg.view(TestImg.size(0), -1)))

outputImg = outputImg.data.view(-1, 28, 28).cpu()

fig = plt.figure()
plot = fig.add_subplot(1, 2, 1)
img = np.array(TestImg.numpy())[0]
plot.set_title('Reconstructed Image')
imgplot = plt.imshow(img, cmap='gray')
plt.show()


### Visualization of weights of encoder

In [0]:
trained_weights = copy.deepcopy(net.encoder[0].weight.data)

In [0]:
init_weights = (1 + init_weights) * 127.5
trained_weights = (1 + trained_weights) * 127.5

if use_gpu:
    init_weights = init_weights.view(-1, 280, 280).byte().cpu()
    trained_weights = trained_weights.view(-1, 280, 280).byte().cpu()
else:
    init_weights = init_weights.view(-1, 280, 280).byte()
    trained_weights = trained_weights.view(-1, 280, 280).byte()

d_weights = init_weights - trained_weights

fig = plt.figure()
plot = fig.add_subplot(1, 3, 1)
img = np.array(init_weights.numpy())[0]
plot.set_title('Initial Weights')
imgplot = plt.imshow(img, cmap='gray')

plot = fig.add_subplot(1, 3, 2)
img = np.array(trained_weights.numpy())[0]
plot.set_title('Trained Weights')
imgplot = plt.imshow(img, cmap='gray')

plot = fig.add_subplot(1, 3, 3)
img = np.array(d_weights.numpy())[0]
plot.set_title('Weight Update')
imgplot = plt.imshow(img, cmap='gray')
plt.show()


### Autoencoder for Classification

In [0]:
new_classifier = nn.Sequential(*list(net.children())[:-1])
net = new_classifier
net.add_module('classifier', nn.Sequential(nn.Linear(100, 10), nn.LogSoftmax()))
print(net)

if use_gpu:
    net = net.cuda()
cll_weights = copy.deepcopy(net[0][0].weight.data)
init_classifier_weights = copy.deepcopy(net.classifier[0].weight.data)


### Defining functions for training the network for classification

In [0]:
# Defining the training routine
def train_model_clasif(model, criterion, optimizer, num_epochs):
    start = time.time()
    # List for saving the loss per epoch
    train_loss = []
    # List for saving the accuracy per epoch
    train_acc = []

    for epoch in range(num_epochs):
        epochStartTime = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))

        running_loss = 0.0
        running_corrects = 0.0
        # Loading data in batches
        batch = 0
        for data in trainLoader:
            inputs, labels = data
            # Wrap them in Variable:
            if use_gpu:
                inputs, labels = Variable(inputs.view(inputs.size(0), -1).cuda) ##########
                    Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs.view(inputs.size(0), -1))########, Variable
            # Initializing model gradients to zero
            model.zero_grad()
            # Data feed-forward through the network:
            outputs = model(inputs)
            # Finding the MSE
            loss = criterion(ouputs, labels)

            # Backpropagating the loss and updating the model parameters
            loss.backward()
            optimizer.step()

            # Accumulating the loss for each batch
            running_loss += loss.data[0]
            _, preds = torch.max(outputs.data, 1)

            # Accumulating correct predictions for each batch
            running_corrects += torch.sum(preds == labels.data)

        # Average loss for one epoch:
        epoch_loss = running_loss/60000.0
        # Saving the loss over epochs for plotting:
        train_loss.append(epoch_loss) 
        # Average accuracy for
        epoch_acc = float(running_corrects) / 60000.0
        # Saving the accuracy over
        train_acc.append(epoch_acc)

        print('Epoch loss: {:.6f}, Epoch accuracy: {:.6f}'.format())
        epochTimeEnd = time.time() - epochStartTime
        print('Epoch complete in {:.0f}m {:.0f}s'.format(
            epochTimeEnd // 60, epochTimeEnd % 60
        ))
        print('-' * 25)
        
        ####################### Codes left out here:
        # fig1 = plt.figure(1)
        # plt.plot(range(epoch + 1), train_loss, 'r--', label='train')

        # if epoch == 0:
        #     plt.legend(loc='upper left')
        #     plt.xlabel('Epochs')
        #     plt.ylabel('Accuracy')
        # fig1.savefig('aeClassif_accPlot.png')

    time_elapsed = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60)
    )
    return model

### Defining loss function and training the network

In [0]:
criterion = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9)
net = train_model_clasif(net, criterion, optimizer, num_epochs=10)

### Performance evaluation of trained network

In [0]:
# Finding testing accuracy
test_running_corr = 0
# Loading data in batches
batches = 0
running_corrects = 0.0
for tsData in testLoader:
    inputs, labels = tsData
    # Wrap them in Variable
    if use_gpu:
        inputs, labels = Variable(inputs.view(inputs.size(0), -1).cuda()), ######## Variable
    else:
        inputs, labels = Variable(inputs.view(inputs.size(0), -1)), ######Variable(label)
    # Feedforward train data batch through model:
    output = net(inputs)
    # Predicted class is the one with maximum probability
    _, preds = output.data.max(1)
    running_corrects += torch.sum(preds == labels.data)

# Finding total number of correct predictions
ts_acc = running_corrects/10000.0
print('Accuracy on test set = ' + str(ts_acc))

### Visualization of weights of encoder

In [0]:
cll_weights_ft = copy.deepcopy(net[0][0].weight.data)

In [0]:
cll_weights = (1 + cll_weights) * 127.5
cll_weights_ft = (1 + cll_weights_ft) * 127.5

if use_gpu:
    cll_weights = cll_weights.view(-1, 280, 280).byte().cpu()
    cll_weights_ft = cll_weights_ft.view(-1, 280, 280).byte().cpu()
else:
    cll_weights = cll_weights.view(-1, 280, 280).byte()
    cll_weights_ft = cll_weights_ft.view(-1, 280, 280).byte()

d_weights = cll_weights - cll_weights_ft

fig = plt.figure()
plot = fig.add_subplot(1, 3, 1)
img = np.array(cll_weights.numpy())[0]
plot.set_title('Encoder Weights')
imgplot = plt.imshow(img, cmap='gray')

plot = fig.add_subplot(1, 3, 2)
img = np.array(cll_weights_ft.numpy())[0]
plot.set_title('Finetuned Weights')
imgplot = plt.imshow(img, cmap='gray')

plot = fig.add_subplot(1, 3, 3)
img = np.array(d_weights.numpy())[0]
plot.set_title('Weight Update')
imgplot = plt.imshow(img, cmap='gray')
plt.show()


### Variation of weights of Classifier

In [0]:
trained_classifier_weights = copy.deepcopy(net.classifier[0].weight.data)

In [0]:
init_classifier_weights = (1 + init_classifier_weights) * 255
trained_classifier_weights = (1 + trained_classifier_weights) * 255

if use_gpu:
    init_classifier_weights = init_classifier_weights.view(-1, 40, 25).byte().cpu()
    trained_classifier_weights = trained_classifier_weights.view(-1, 40, 25).byte().cpu()
else:
    init_classifier_weights = init_classifier_weights.view(-1, 40, 25).byte()
    trained_classifier_weights = trained_classifier_weights.view(-1, 40, 25).byte()

d_weights = init_classifier_weights - trained_classifier_weights

fig = plt.figure()
plot = fig.add_subplot(1, 3, 1)
img = np.array(init_classifier_weights.numpy())[0]
plot.set_title('Initial Weights')
imgplot = plt.imshow(img, cmap='gray')

fig = plt.figure()
plot = fig.add_subplot(1, 3, 1)
img = np.array(trained_classifier_weights.numpy())[0]
plot.set_title('Trained Weights')
imgplot = plt.imshow(img, cmap='gray')

fig = plt.figure()
plot = fig.add_subplot(1, 3, 1)
img = np.array(d_weights.numpy())[0]
plot.set_title('Weight Update')
imgplot = plt.imshow(img, cmap='gray')
