In [1]:
import torch
import numpy as np
from torch import nn
import torchvision
from torchvision import transforms, datasets
from torch import optim
from torch.utils.data.sampler import SubsetRandomSampler

from matplotlib import pyplot as plt

import torch.nn.functional as F

In [2]:
transform = transforms.Compose([transforms.ToTensor()])

In [3]:
trainset= datasets.MNIST('~/.pytorch/MNIST/', train=True, download=True, transform=transform)

validset= datasets.MNIST('~/.pytorch/MNIST/', train=True, download=True, transform=transform)

testset= datasets.MNIST('~/.pytorch/MNIST/', train=False, download=True,transform=transform)

In [4]:
batch_size = 100
pin_memory = 1
num_workers = 1

In [5]:
num_train = len(trainset)
num_test = len(testset)
indices = list(range(num_train))
split = int(np.floor(0.3 * num_train))

np.random.seed(42)
np.random.shuffle(indices)

In [6]:
train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
train_loader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, sampler=train_sampler,
    num_workers=num_workers, pin_memory=pin_memory,
)
valid_loader = torch.utils.data.DataLoader(
    validset, batch_size=batch_size, sampler=valid_sampler,
    num_workers=num_workers, pin_memory=pin_memory,
)

test_loader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, num_workers=num_workers, 
    pin_memory=pin_memory, shuffle=True
)

In [7]:
for imgs,labels in train_loader:
    print(imgs.shape,labels.shape)
    break

torch.Size([100, 1, 28, 28]) torch.Size([100])


In [8]:
class MLPClassifier(nn.Module):
    def __init__(self, hidden_channels=128):
        super().__init__()
        self.dense1 = nn.Linear(784,500)
        self.dense2 = nn.Linear(500,100)
        self.dense3 = nn.Linear(100,100)
        self.dense4 = nn.Linear(100,10)
        
    def forward(self,x):
        out = x.view(x.shape[0],-1)
        out = F.relu(self.dense1(out))
        out = F.relu(self.dense2(out))
        out = F.relu(self.dense3(out))
        out = self.dense4(out)
        
        return out
        

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [41]:
def test(model):
    total = 0
    correct = 0
    for imgs,labels in valid_loader:
        imgs = imgs.to(device)
        labels = labels.to(device)

        preds = model(imgs)
        outputs = torch.argmax(preds.data,1)

        total += imgs.shape[0]

        correct += 100.0*(outputs.cpu() == labels.cpu()).sum()
    accuracy = correct/total
    return accuracy.numpy()


In [15]:
def XavierUniformInit(model):
    if 'module' in model._modules.keys():
        # in case of DataParallel
        for key,val in model._modules['module']._modules.items():
            if hasattr(val,'weight'):
                try:
                    nn.init.xavier_uniform_(val.weight.data)
                    nn.init.zeros_(val.bias.data)
                except:
                    print('Couldn\'t initialize for {}'.format(key))
    else:
        for key,val in model._modules.items():
            if hasattr(val,'weight'):
                try:
                    nn.init.xavier_uniform_(val.weight.data)
                    nn.init.zeros_(val.bias.data)
                except:
                    print('Couldn\'t initialize for {}'.format(key))

In [18]:
def UniformInit(model):
    if 'module' in model._modules.keys():
        # in case of DataParallel
        for key,val in model._modules['module']._modules.items():
            if hasattr(val,'weight'):
                try:
                    nn.init.uniform_(val.weight.data)
                    nn.init.zeros_(val.bias.data)
                except:
                    print('Couldn\'t initialize for {}'.format(key))
    else:
        for key,val in model._modules.items():
            if hasattr(val,'weight'):
                try:
                    nn.init.uniform_(val.weight.data)
                    nn.init.zeros_(val.bias.data)
                except:
                    print('Couldn\'t initialize for {}'.format(key))

In [28]:
def NormalInit(model):
    if 'module' in model._modules.keys():
        # in case of DataParallel
        for key,val in model._modules['module']._modules.items():
            if hasattr(val,'weight'):
                try:
                    nn.init.normal_(val.weight.data)
                    nn.init.zeros_(val.bias.data)
                except:
                    print('Couldn\'t initialize for {}'.format(key))
    else:
        for key,val in model._modules.items():
            if hasattr(val,'weight'):
                try:
                    nn.init.normal_(val.weight.data)
                    nn.init.zeros_(val.bias.data)
                except:
                    print('Couldn\'t initialize for {}'.format(key))

In [29]:
def ZeroInit(model):
    if 'module' in model._modules.keys():
        # in case of DataParallel
        for key,val in model._modules['module']._modules.items():
            if hasattr(val,'weight'):
                try:
                    nn.init.normal_(val.weight.data)
                    nn.init.zeros_(val.bias.data)
                except:
                    print('Couldn\'t initialize for {}'.format(key))
    else:
        for key,val in model._modules.items():
            if hasattr(val,'weight'):
                try:
                    nn.init.zeros_(val.weight.data)
                    nn.init.zeros_(val.bias.data)
                except:
                    print('Couldn\'t initialize for {}'.format(key))

In [50]:
def DefaultInit(model):
    pass

## Adam

In [56]:
for func in [DefaultInit,XavierUniformInit,UniformInit,NormalInit]:
    model = MLPClassifier().to(device)
    func(model)

    lr = 1e-4
    optimizer = optim.Adam(model.parameters(),lr= lr,weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.995)
    epochs = 25
    iter_n = 0
    for e in range(epochs):
        iter_n = 0
        cumu_loss = 0
    #     print("Epoch %d of %d"%(e+1,epochs))
        for imgs,labels in train_loader:

            imgs = imgs.to(device)
            labels = labels.to(device)
    #         print(imgs.shape)
            optimizer.zero_grad()

            preds = model(imgs)
            loss = criterion(preds,labels)

            loss.backward()
            optimizer.step()
            cumu_loss += loss.item()

            iter_n+=1
    #         print(iter_n)
            if iter_n % 420 == 0:
                total = 0
                correct = 0

                for imgs,labels in valid_loader:
                    imgs = imgs.to(device)
                    labels = labels.to(device)

                    preds = model(imgs)
                    outputs = torch.argmax(preds.data,1)

                    total += imgs.shape[0]

                    correct += 100.0*(outputs.cpu() == labels.cpu()).sum()

                    if total>= 50:
                        break
                accuracy = correct/total
                print('\rEpoch {} of {}. Iteration: {}. Train Loss: {:.6f}. Test Loss: {:.6f}. Test Accuracy: {:.2f}'.format(e+1, epochs, iter_n, cumu_loss/iter_n, loss.item(), accuracy), end='')
        scheduler.step()

    print('\nInitialization: {} .Final Validation Accuracy: {}'.format(func.__name__,test(model)))

Epoch 25 of 25. Iteration: 420. Train Loss: 0.040975. Test Loss: 0.060895. Test Accuracy: 95.00
Initialization: DefaultInit .Final Validation Accuracy: 97.28888702392578
Epoch 25 of 25. Iteration: 420. Train Loss: 0.007937. Test Loss: 0.025502. Test Accuracy: 97.000
Initialization: XavierUniformInit .Final Validation Accuracy: 97.55000305175781
Epoch 25 of 25. Iteration: 420. Train Loss: 89.795143. Test Loss: 68.250000. Test Accuracy: 73.000000.00
Initialization: UniformInit .Final Validation Accuracy: 67.22777557373047
Epoch 25 of 25. Iteration: 420. Train Loss: 80.914796. Test Loss: 132.954391. Test Accuracy: 84.0000
Initialization: NormalInit .Final Validation Accuracy: 88.69999694824219


## RMSprop

In [59]:
for func in [DefaultInit,XavierUniformInit,UniformInit,NormalInit]:
    model = MLPClassifier().to(device)
    func(model)

    lr = 1e-4
    optimizer = optim.RMSprop(model.parameters(),lr= lr,weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.995)
    epochs = 25
    iter_n = 0
    for e in range(epochs):
        iter_n = 0
        cumu_loss = 0
    #     print("Epoch %d of %d"%(e+1,epochs))
        for imgs,labels in train_loader:

            imgs = imgs.to(device)
            labels = labels.to(device)
    #         print(imgs.shape)
            optimizer.zero_grad()

            preds = model(imgs)
            loss = criterion(preds,labels)

            loss.backward()
            optimizer.step()
            cumu_loss += loss.item()

            iter_n+=1
    #         print(iter_n)
            if iter_n % 420 == 0:
                total = 0
                correct = 0

                for imgs,labels in valid_loader:
                    imgs = imgs.to(device)
                    labels = labels.to(device)

                    preds = model(imgs)
                    outputs = torch.argmax(preds.data,1)

                    total += imgs.shape[0]

                    correct += 100.0*(outputs.cpu() == labels.cpu()).sum()

                    if total>= 50:
                        break
                accuracy = correct/total
                print('\rEpoch {} of {}. Iteration: {}. Train Loss: {:.6f}. Test Loss: {:.6f}. Test Accuracy: {:.2f}'.format(e+1, epochs, iter_n, cumu_loss/iter_n, loss.item(), accuracy), end='')
        scheduler.step()

    print('\nInitialization: {} .Final Validation Accuracy: {}'.format(func.__name__,test(model)))

Epoch 2 of 25. Iteration: 420. Train Loss: 0.341955. Test Loss: 0.341190. Test Accuracy: 91.00

KeyboardInterrupt: 

## SGD with momentum

In [60]:
for func in [DefaultInit,XavierUniformInit,UniformInit,NormalInit]:
    model = MLPClassifier().to(device)
    func(model)

    lr = 0.1
    optimizer = optim.SGD(model.parameters(),lr= lr,weight_decay=1e-5,momentum=0.7)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.995)
    epochs = 25
    iter_n = 0
    for e in range(epochs):
        iter_n = 0
        cumu_loss = 0
    #     print("Epoch %d of %d"%(e+1,epochs))
        for imgs,labels in train_loader:

            imgs = imgs.to(device)
            labels = labels.to(device)
    #         print(imgs.shape)
            optimizer.zero_grad()

            preds = model(imgs)
            loss = criterion(preds,labels)

            loss.backward()
            optimizer.step()
            cumu_loss += loss.item()

            iter_n+=1
    #         print(iter_n)
            if iter_n % 420 == 0:
                total = 0
                correct = 0

                for imgs,labels in valid_loader:
                    imgs = imgs.to(device)
                    labels = labels.to(device)

                    preds = model(imgs)
                    outputs = torch.argmax(preds.data,1)

                    total += imgs.shape[0]

                    correct += 100.0*(outputs.cpu() == labels.cpu()).sum()

                    if total>= 50:
                        break
                accuracy = correct/total
                print('\rEpoch {} of {}. Iteration: {}. Train Loss: {:.6f}. Test Loss: {:.6f}. Test Accuracy: {:.2f}'.format(e+1, epochs, iter_n, cumu_loss/iter_n, loss.item(), accuracy), end='')
        scheduler.step()

    print('\nInitialization: {} .Final Validation Accuracy: {}'.format(func.__name__,test(model)))

Epoch 3 of 25. Iteration: 420. Train Loss: 0.090023. Test Loss: 0.073090. Test Accuracy: 97.00

KeyboardInterrupt: 

## SGD without momentum

In [None]:
for func in [DefaultInit,XavierUniformInit,UniformInit,NormalInit]:
    model = MLPClassifier().to(device)
    func(model)

    lr = 0.2
    optimizer = optim.SGD(model.parameters(),lr= lr,weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.995)
    epochs = 25
    iter_n = 0
    for e in range(epochs):
        iter_n = 0
        cumu_loss = 0
    #     print("Epoch %d of %d"%(e+1,epochs))
        for imgs,labels in train_loader:

            imgs = imgs.to(device)
            labels = labels.to(device)
    #         print(imgs.shape)
            optimizer.zero_grad()

            preds = model(imgs)
            loss = criterion(preds,labels)

            loss.backward()
            optimizer.step()
            cumu_loss += loss.item()

            iter_n+=1
    #         print(iter_n)
            if iter_n % 420 == 0:
                total = 0
                correct = 0

                for imgs,labels in valid_loader:
                    imgs = imgs.to(device)
                    labels = labels.to(device)

                    preds = model(imgs)
                    outputs = torch.argmax(preds.data,1)

                    total += imgs.shape[0]

                    correct += 100.0*(outputs.cpu() == labels.cpu()).sum()

                    if total>= 50:
                        break
                accuracy = correct/total
                print('\rEpoch {} of {}. Iteration: {}. Train Loss: {:.6f}. Test Loss: {:.6f}. Test Accuracy: {:.2f}'.format(e+1, epochs, iter_n, cumu_loss/iter_n, loss.item(), accuracy), end='')
        scheduler.step()

    print('\nInitialization: {} .Final Validation Accuracy: {}'.format(func.__name__,test(model)))

## Part 2

In [67]:
class AutoEncoder(nn.Module):
    def __init__(self, hidden_channels=128):
        super().__init__()
        self.cnn1 = nn.Conv2d(1,32,kernel_size=3,padding=1)
        self.cnn2 = nn.Conv2d(32,64,kernel_size=3,padding=1)
        self.cnn3 = nn.Conv2d(64,hidden_channels,kernel_size=7)

        self.pool = nn.MaxPool2d(2,return_indices=True)
        
        self.unpool = nn.MaxUnpool2d(2)
        
        self.t_cnn1 = nn.ConvTranspose2d(hidden_channels,64,kernel_size=7)
        self.t_cnn2 = nn.ConvTranspose2d(64,32,kernel_size=3,padding=1)
        self.t_cnn3 = nn.ConvTranspose2d(32,1,kernel_size=3,padding=1)
        
    def forward(self,x):
        out = F.relu(self.cnn1(x))
        out,ind1 = self.pool(out)
        
        out = F.relu(self.cnn2(out))
        out,ind2 = self.pool(out)

        out = self.cnn3(out)
        
        out = F.relu(self.t_cnn1(out))
        out = self.unpool(out,ind2)

        out = F.relu(self.t_cnn2(out))
        out = self.unpool(out,ind1)

        out = torch.sigmoid(self.t_cnn3(out))
        
        return out
    def encoder(self,x):
        out = F.relu(self.cnn1(x))
        out,ind1 = self.pool(out)
        
        out = F.relu(self.cnn2(out))
        out,ind2 = self.pool(out)

        out = self.cnn3(out)
        return out

In [72]:
ae = AutoEncoder(hidden_channels=64).to(device)
ae.load_state_dict(torch.load('q2.pth'))
ae.eval()

AutoEncoder(
  (cnn1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cnn2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cnn3): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (unpool): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))
  (t_cnn1): ConvTranspose2d(64, 64, kernel_size=(7, 7), stride=(1, 1))
  (t_cnn2): ConvTranspose2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (t_cnn3): ConvTranspose2d(32, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)

In [80]:
class MLP_encoded(nn.Module):
    def __init__(self,ae,hidden_channels=64):
        super().__init__()
        self.ae = ae
        ae.eval()
        self.dense1 = nn.Linear(64,32)
        self.dense2 = nn.Linear(32,10)
    def forward(self,x):
        out = ae.encoder(x)
        out = out.view(out.shape[0],-1)
        out = F.relu(self.dense1(out))
        out = self.dense2(out)
        return out

In [81]:
model = MLP_encoded(ae,hidden_channels=64).to(device)

In [82]:

lr = 1e-4
optimizer = optim.RMSprop(model.parameters(),lr= lr,weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.995)
epochs = 25
iter_n = 0
for e in range(epochs):
    iter_n = 0
    cumu_loss = 0
#     print("Epoch %d of %d"%(e+1,epochs))
    for imgs,labels in train_loader:

        imgs = imgs.to(device)
        labels = labels.to(device)
#         print(imgs.shape)
        optimizer.zero_grad()

        preds = model(imgs)
        loss = criterion(preds,labels)

        loss.backward()
        optimizer.step()
        cumu_loss += loss.item()

        iter_n+=1
#         print(iter_n)
        if iter_n % 420 == 0:
            total = 0
            correct = 0

            for imgs,labels in valid_loader:
                imgs = imgs.to(device)
                labels = labels.to(device)

                preds = model(imgs)
                outputs = torch.argmax(preds.data,1)

                total += imgs.shape[0]

                correct += 100.0*(outputs.cpu() == labels.cpu()).sum()

                if total>= 50:
                    break
            accuracy = correct/total
            print('\rEpoch {} of {}. Iteration: {}. Train Loss: {:.6f}. Test Loss: {:.6f}. Test Accuracy: {:.2f}'.format(e+1, epochs, iter_n, cumu_loss/iter_n, loss.item(), accuracy), end='')
    scheduler.step()

print('\nFinal Validation Accuracy: {}'.format(test(model)))

Epoch 25 of 25. Iteration: 420. Train Loss: 0.034926. Test Loss: 0.034584. Test Accuracy: 97.000Final Validation Accuracy: 98.2388916015625
