In [1]:
import torch
import torchvision
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os

# Parameters
learning_rate = 0.00001
num_epochs = 100
batch_size = 32
dropout_rate = 0.5
reuse_model = True

#Comment this out for L2 regularization
regularization = 'l1'
l1 = 0.9

device = ('cuda:0' if torch.cuda.is_available() else 'cpu:0')

filename = "adam_model-softmax-100-{}".format(regularization)

# Compute accuracy
def Accuracy(dataLoader):
    total_samples, score = 0, 0
    for i, data in enumerate(dataLoader):
        with torch.no_grad():
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            outputs = torch.argmax(outputs, dim=1)
            correct = sum(outputs == labels).data.to('cpu').numpy()

            total_samples = total_samples + batch_size
            score = score + correct

    accuracy = score * 1.0 / total_samples
    return accuracy

class Net(nn.Module):

    # Defining the network
    def __init__(self):
        super(Net, self).__init__()
        
        # 28*28 input image channel, 10 output channels
        self.input_size = 28*28
        self.hidden1_size = 100
        self.hidden2_size = 100
        self.output_size = 10
        
        # Defining the layers and activation functions
        self.layer1 = nn.Linear(self.input_size, self.hidden1_size)
        self.layer2 = nn.Linear(self.hidden1_size, self.hidden2_size)
        self.layer3 = nn.Linear(self.hidden2_size, self.output_size)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.softmax = nn.Softmax()
        
    # Forward propagation
    def forward(self, x):
        x = x.view(-1, self.input_size) # Reshaping to a vector
        
        # Activation functions
        x = F.relu(self.layer1(x))
        x = self.dropout(x)
        x = F.relu(self.layer2(x))
        x = self.dropout(x)
        x = self.softmax(self.layer3(x))
        return x

# To transform to tensor
transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

# Dataset for training, validation and test sets as tensors
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms)
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms)
mnist_trainset, mnist_validationset = torch.utils.data.random_split(mnist_trainset, [50000, 10000])

num_training_samples = mnist_trainset.__len__()
l1 = (l1 * 1.0) / num_training_samples

# Data loader for train, test and validation sets
trainloader = torch.utils.data.DataLoader(mnist_trainset, batch_size=batch_size, num_workers=2, shuffle=True)
testloader = torch.utils.data.DataLoader(mnist_testset, batch_size=batch_size, num_workers=2, shuffle=True)
validationloader = torch.utils.data.DataLoader(mnist_validationset, batch_size=batch_size, num_workers=2, shuffle=True)

# Use pretrained model or train new
model = Net()
if reuse_model == True:
    if os.path.exists(filename):
        model.load_state_dict(torch.load(f=filename))
    else:
        print("No pre-trained model detected. Starting fresh model training.")

model.to(device)

# Defining optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Adam
loss_func = nn.CrossEntropyLoss() # Cross Entropy

# Starting Training
for epoch in range (0, num_epochs):
    epoch_loss = 0
    for i, data in enumerate(trainloader):
        x, label = data
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        output = model(inputs)

        model.zero_grad()
        loss = loss_func(output, labels)
        
        # For L1 regularization, SGD already performs L2 by default
        if regularization == "l1":
            for param in model.parameters():
                loss = loss + l1 * torch.sum(torch.abs(param)).data.to('cpu').numpy()
        
        loss.backward()
        optimizer.step()
        
        epoch_loss = epoch_loss + loss
        
    print("Epoch {}. Loss/Training Cost = {}".format(epoch, "%.2f" % epoch_loss))

    # Validation accuracy every 10 epochs
    if (epoch) % 10 == 0:
        print("Epoch {}. Validation Accuracy = {}".format(epoch, "%.2f" % Accuracy(validationloader)))
    
    # Save the model every ten epochs
    if epoch % 10 == 0:
        torch.save(model.state_dict(), f=filename)
        print()

# Test set accuracy
print("Test Accuracy = {}".format("%.2f" % Accuracy(testloader)))

No pre-trained model detected. Starting fresh model training.




Epoch 0. Loss/Training Cost = 3636.24
Epoch 0. Validation Accuracy = 0.28

Epoch 1. Loss/Training Cost = 3523.90
Epoch 2. Loss/Training Cost = 3364.16
Epoch 3. Loss/Training Cost = 3226.62
Epoch 4. Loss/Training Cost = 3136.25
Epoch 5. Loss/Training Cost = 3074.48
Epoch 6. Loss/Training Cost = 3029.39
Epoch 7. Loss/Training Cost = 2988.72
Epoch 8. Loss/Training Cost = 2954.31
Epoch 9. Loss/Training Cost = 2922.89
Epoch 10. Loss/Training Cost = 2900.52
Epoch 10. Validation Accuracy = 0.70

Epoch 11. Loss/Training Cost = 2879.51
Epoch 12. Loss/Training Cost = 2857.55
Epoch 13. Loss/Training Cost = 2838.86
Epoch 14. Loss/Training Cost = 2823.90
Epoch 15. Loss/Training Cost = 2811.66
Epoch 16. Loss/Training Cost = 2798.65
Epoch 17. Loss/Training Cost = 2787.59
Epoch 18. Loss/Training Cost = 2779.26
Epoch 19. Loss/Training Cost = 2767.85
Epoch 20. Loss/Training Cost = 2758.99
Epoch 20. Validation Accuracy = 0.76

Epoch 21. Loss/Training Cost = 2753.81
Epoch 22. Loss/Training Cost = 2744.53


KeyboardInterrupt: 