In [None]:
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt 
import numpy as np
%matplotlib inline
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

In [None]:
# load the digits dataset 
digits = load_digits()
x = digits.data
y = digits.target
n_samples, n_features = x.shape

print("data shape: ",x.shape)
print("class shape: ",y.shape)

N_train = int(0.8 * x.shape[0])
x_train = x[:N_train,:]
y_train = y[:N_train]
x_val = x[N_train:,:]
y_val = y[N_train:]

# Add the bias term
x_train = np.hstack((x_train, np.ones((x_train.shape[0], 1))))
x_val = np.hstack((x_val, np.ones((x_val.shape[0], 1))))
plt.matshow(digits.images[0])
plt.show()

In [None]:
X_train = torch.tensor(x_train, dtype=torch.float)
Y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(x_val, dtype=torch.float)
Y_val = torch.tensor(y_val, dtype=torch.long)

In [None]:
# create Dataset object to support batch training
class Dataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):
        self.features = features             
        self.labels = labels
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (self.features[idx], self.labels[idx])

In [None]:
class simpleMLP(nn.Module):
    # define model elements
    def __init__(self, n_inputs,n_classes,hidden_size):
        super(simpleMLP, self).__init__()
        self.fc1 = nn.Linear(n_inputs, hidden_size)
        self.output  = nn.Linear(hidden_size, n_classes)

    # forward propagate input
    def forward(self, x):
        x = self.fc1(x)        
        x = torch.nn.functional.relu(x)
        x=self.output(x)
        return x

In [None]:
class complexMLP(nn.Module):
    # define model elements
    def __init__(self, n_inputs,n_classes,hidden_size=None):
        super(complexMLP, self).__init__()
        hidden_size=100
        dropout=0.2
        # Add the first fully connected layer with 100 hidden units with batch norm, dropout of 0.2
        self.fc1 = nn.Linear(n_inputs, hidden_size)
        self.batchnormfc1=nn.BatchNorm1d(hidden_size)  
        
        self.dropoutfc1 = nn.Dropout(dropout)
        hidden_size_2=32
        # Add the second fully connected layer with 32 hidden units with batch norm, dropout of 0.2
        self.fc2 = nn.Linear(hidden_size, hidden_size_2)
        self.batchnormfc2=nn.BatchNorm1d(hidden_size_2)  
        self.dropoutfc2 = nn.Dropout(dropout)
       
        self.output  = nn.Linear(hidden_size_2, 10)

    # forward propagate input
    def forward(self, X):
        
        X = self.fc1(X)
        X=self.batchnormfc1(X)
        X = torch.nn.functional.relu(X)
        X = self.dropoutfc1(X)
       
        X = self.fc2(X)
        X=self.batchnormfc2(X)
        X = torch.nn.functional.relu(X)
        X = self.dropoutfc2(X)
        
        X=self.output(X)
       
        return X

In [None]:
def get_predictions(model,data):
    return model(data).numpy().argmax(axis=1)  

def compute_accuracy(predictions,ground_truth):
    # Find the prediction (as the classes with highest probabilities)
    return (predictions == ground_truth.numpy()).mean()

In [None]:
"""
In PyTorch, we could implement regularization pretty easily by adding a term to
the loss. After computing the loss, whatever the loss function is, we can iterate the
parameters of the model, sum their respective square (for L2) or abs (for L1), and
backpropagate.
"""
def l2_regularizer(model,l2_lambda):
    #This is for step 9: implement the l2 regularization 
    l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())
    return l2_lambda * l2_norm
def l1_regularizer(model,l1_lambda):
    #This is for step 9: implement the l2 regularization 
    l1_norm = sum(p.abs().sum() for p in model.parameters())    
    return l1_lambda * l1_norm

In [None]:
def run_model(func,optimizer,learning_rate,num_epochs,hidden_size=None,regularization=None):
    model = func(X_train.shape[1],len(list(set(Y_train.numpy()))),hidden_size)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optimizer(model.parameters(),lr=learning_rate)
    batch_size = 16

    # enable batching of training data
    train_dataset = Dataset(X_train, Y_train)
    dataloader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True)

    # keep the accuracy values for each training step
    val_accs = np.zeros(num_epochs)

    for i_epoch in range(num_epochs):
        model.train()
        for i_batch, (X_batch, Y_batch) in enumerate(dataloader):
            model.zero_grad()  # reset model gradients
            output = model(X_batch)  # conduct forward pass  

            loss=criterion(output, Y_batch) 
            if regularization =="l2":
                loss+=l2_regularizer(model,0.001)
            if regularization =="l1":
                loss+=l1_regularizer(model,0.001)

            loss.backward()  # backpropogate loss to calculate gradients
            optimizer.step()  # update model weights

        with torch.no_grad():  # no need to calculate gradients when assessing accuracy

            model.eval()        
            pred_train = get_predictions(model,X_train)        
            train_acc = compute_accuracy(pred_train, Y_train)
            #print("Training accuracy: {}".format(train_acc))       
            pred_val = get_predictions(model,X_val)        
            val_acc = compute_accuracy(pred_val, Y_val)
            val_accs[i_epoch]=val_acc
            #print("Validation accuracy: {}".format(val_acc))
    
    return val_accs

In [None]:
optimizer=torch.optim.Adam
val_acc=run_model(simpleMLP,optimizer,learning_rate=0.001,num_epochs=50,hidden_size=32)
plt.plot(val_acc)  
plt.xlabel('Epochs')
plt.ylabel('Val Accuracy')
plt.show()

In [None]:
optimizer=torch.optim.Adam
lrs=[1e-1,1e-2, 1e-3, 1e-4, 1e-5]
fig = plt.subplot(111)
ax = plt.gca()
for lr in lrs:
    val_acc=run_model(simpleMLP,optimizer,learning_rate=lr,num_epochs=50,hidden_size=32)
    color = next(ax._get_lines.prop_cycler)['color']
    plt.plot(val_acc,color=color)
    
    
plt.xlabel('Epochs')
plt.ylabel('Val Accuracy')

labels=[]
for lr in lrs:
    labels.append('lr_' + str(lr))

plt.legend(labels, loc = 'best')
plt.show()

In [None]:
optimizer=torch.optim.Adam
hss=[16,32,64,128,256,512]
fig = plt.subplot(111)
ax = plt.gca()
for hs in hss:
    val_acc=run_model(simpleMLP,optimizer,learning_rate=0.001,num_epochs=50,hidden_size=hs)
    color = next(ax._get_lines.prop_cycler)['color']
    plt.plot(val_acc,color=color) 
    
plt.xlabel('Epochs')
plt.ylabel('Val Accuracy')

labels=[]
for hs in hss:
    labels.append('bs_' + str(hs))

plt.legend(labels, loc = 'best')
plt.show()

In [None]:
optimizers=[torch.optim.Adam,torch.optim.SGD,torch.optim.Adagrad,torch.optim.Adadelta,torch.optim.RMSprop]
fig = plt.subplot(111)
ax = plt.gca()
for opt in optimizers:
    val_acc=run_model(simpleMLP,opt,learning_rate=0.001,num_epochs=50,hidden_size=256)
    color = next(ax._get_lines.prop_cycler)['color']
    plt.plot(val_acc,color=color) 
    
plt.xlabel('Epochs')
plt.ylabel('Val Accuracy')

labels=[]
for opt in optimizers:
    labels.append(str(opt))

plt.legend(labels, loc = 'best')
plt.show()

In [None]:
#For step 9
val_acc=run_model(simpleMLP,optimizer,learning_rate=0.001,num_epochs=50,hidden_size=32,regularization="l2")
plt.plot(val_acc)  
plt.xlabel('Epochs')
plt.ylabel('Val Accuracy')
plt.show()

In [None]:
#For step 9
val_acc=run_model(simpleMLP,optimizer,learning_rate=0.001,num_epochs=50,hidden_size=32,regularization="l1")
plt.plot(val_acc)  
plt.xlabel('Epochs')
plt.ylabel('Val Accuracy')
plt.show()

In [None]:
#For step 10
val_acc=run_model(complexMLP,optimizer,learning_rate=0.001,num_epochs=50,hidden_size=None)
plt.plot(val_acc)  
plt.xlabel('Epochs')
plt.ylabel('Val Accuracy')
plt.show()