# Regularization

In [1]:
import torch
from torchvision.datasets import mnist
from torch.utils.data import DataLoader
import numpy as np
import torch.nn.functional as F
from torch import nn
import matplotlib.pyplot as plt

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

def data_transform(x):
    x = np.array(x, dtype = 'float32') / 255
    x = x.reshape((-1, ))
    x = torch.from_numpy(x)
    return x

trainset = mnist.MNIST('./dataset/mnist', train=True, transform=data_transform, download=True)
testset = mnist.MNIST('./dataset/mnist', train = False, transform=data_transform, download=True)

train_data = DataLoader(trainset, batch_size=64, shuffle=True)
test_data = DataLoader(testset, batch_size=128, shuffle=False)

class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 500)
        self.fc2 = nn.Linear(500, 250)
        self.fc3 = nn.Linear(250, 125)
        self.fc4 = nn.Linear(125, 10)
        

        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# build model
model = DNN().to(device)

# define cost
criterion = nn.CrossEntropyLoss() 


# add L2 regularization 
optimizer = torch.optim.SGD(model.parameters(), 0.1, weight_decay=1e-5)
#optimizer = torch.optim.RMSprop(model.parameters(), 0.01, alpha=0.9)
#optimizer = torch.optim.Adam(model.parameters(), 0.01, betas=(0.9, 0.99))

losses = []
acces = []
eval_losses = []
eval_acces = []


for epoch in range(5):
    train_loss = 0
    train_acc = 0
    model.train()       
    for im, label in train_data:
        
        im = im.to(device)
        label = label.to(device)
        pred = model(im)
        
        loss = criterion(pred, label)
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        train_loss += loss.item()
        

        _, pred = pred.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        train_acc += acc
        
    losses.append(train_loss / len(train_data))
    acces.append(train_acc / len(train_data))
    

    eval_loss = 0
    eval_acc = 0
    model.eval()
    
    for im, label in test_data:

        im = im.to(device)
        label = label.to(device)
        
        pred = model(im)
        loss = criterion(pred, label)

        
        eval_loss += loss.item()

        
        _, pred = pred.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        eval_acc += acc
    
    eval_losses.append(eval_loss / len(test_data))
    eval_acces.append(eval_acc / len(test_data))
    print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
          .format(epoch, train_loss / len(train_data), train_acc / len(train_data), eval_loss / len(test_data), eval_acc / len(test_data)))

Using cpu device
epoch: 0, Train Loss: 0.600836, Train Acc: 0.820879, Eval Loss: 0.184837, Eval Acc: 0.945510
epoch: 1, Train Loss: 0.154109, Train Acc: 0.953925, Eval Loss: 0.116627, Eval Acc: 0.963212
epoch: 2, Train Loss: 0.099203, Train Acc: 0.969133, Eval Loss: 0.175936, Eval Acc: 0.943038
epoch: 3, Train Loss: 0.072271, Train Acc: 0.978628, Eval Loss: 0.085943, Eval Acc: 0.972013
epoch: 4, Train Loss: 0.054961, Train Acc: 0.983292, Eval Loss: 0.080018, Eval Acc: 0.975277


# Dropout

In [2]:
import torch
from torchvision.datasets import mnist
from torch.utils.data import DataLoader
import numpy as np
import torch.nn.functional as F
from torch import nn
import matplotlib.pyplot as plt

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

def data_transform(x):
    x = np.array(x, dtype = 'float32') / 255
    x = x.reshape((-1, ))
    x = torch.from_numpy(x)
    return x

trainset = mnist.MNIST('./dataset/mnist', train=True, transform=data_transform, download=True)
testset = mnist.MNIST('./dataset/mnist', train = False, transform=data_transform, download=True)

train_data = DataLoader(trainset, batch_size=64, shuffle=True)
test_data = DataLoader(testset, batch_size=128, shuffle=False)

class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 500)
        self.fc2 = nn.Linear(500, 250)
        self.fc3 = nn.Linear(250, 125)
        self.fc4 = nn.Linear(125, 10)
        
        # Define proportion or neurons to dropout
        self.dropout = nn.Dropout(0.25)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

# build model
model = DNN().to(device)

# define cost
criterion = nn.CrossEntropyLoss() 


# optimization
optimizer = torch.optim.SGD(model.parameters(), 0.1)
#optimizer = torch.optim.RMSprop(model.parameters(), 0.01, alpha=0.9)
#optimizer = torch.optim.Adam(model.parameters(), 0.01, betas=(0.9, 0.99))

losses = []
acces = []
eval_losses = []
eval_acces = []


for epoch in range(5):
    train_loss = 0
    train_acc = 0
    model.train()       
    for im, label in train_data:
        
        im = im.to(device)
        label = label.to(device)
        pred = model(im)
        
        loss = criterion(pred, label)
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        train_loss += loss.item()
        

        _, pred = pred.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        train_acc += acc
        
    losses.append(train_loss / len(train_data))
    acces.append(train_acc / len(train_data))
    

    eval_loss = 0
    eval_acc = 0
    model.eval()
    
    for im, label in test_data:

        im = im.to(device)
        label = label.to(device)
        
        pred = model(im)
        loss = criterion(pred, label)

        
        eval_loss += loss.item()

        
        _, pred = pred.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        eval_acc += acc
    
    eval_losses.append(eval_loss / len(test_data))
    eval_acces.append(eval_acc / len(test_data))
    print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
          .format(epoch, train_loss / len(train_data), train_acc / len(train_data), eval_loss / len(test_data), eval_acc / len(test_data)))

Using cpu device
epoch: 0, Train Loss: 0.668981, Train Acc: 0.796942, Eval Loss: 0.198941, Eval Acc: 0.937006
epoch: 1, Train Loss: 0.197619, Train Acc: 0.942547, Eval Loss: 0.126229, Eval Acc: 0.959454
epoch: 2, Train Loss: 0.141570, Train Acc: 0.959322, Eval Loss: 0.100606, Eval Acc: 0.968651
epoch: 3, Train Loss: 0.107955, Train Acc: 0.968217, Eval Loss: 0.096801, Eval Acc: 0.970233
epoch: 4, Train Loss: 0.090512, Train Acc: 0.973398, Eval Loss: 0.079454, Eval Acc: 0.976365


# Batch Normalization

In [3]:
import torch
from torchvision.datasets import mnist
from torch.utils.data import DataLoader
import numpy as np
import torch.nn.functional as F
from torch import nn
import matplotlib.pyplot as plt

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

def data_transform(x):
    x = np.array(x, dtype = 'float32') / 255
    x = x.reshape((-1, ))
    x = torch.from_numpy(x)
    return x

trainset = mnist.MNIST('./dataset/mnist', train=True, transform=data_transform, download=True)
testset = mnist.MNIST('./dataset/mnist', train = False, transform=data_transform, download=True)

train_data = DataLoader(trainset, batch_size=64, shuffle=True)
test_data = DataLoader(testset, batch_size=128, shuffle=False)

class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 500)
        self.bn1 = nn.BatchNorm1d(500)
        self.fc2 = nn.Linear(500, 250)
        self.bn2 = nn.BatchNorm1d(250)
        self.fc3 = nn.Linear(250, 125)
        self.bn3 = nn.BatchNorm1d(125)

        self.fc4 = nn.Linear(125, 10)
        

        
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = F.relu(x)

        
        x = self.fc2(x)
        x = self.bn2(x)
        x = F.relu(x)
        
        x = self.fc3(x)
        x = self.bn3(x)
        x = F.relu(x)        
        
        x = self.fc4(x)
        return x

# build model
model = DNN().to(device)

# define cost
criterion = nn.CrossEntropyLoss() 


# optimization
optimizer = torch.optim.SGD(model.parameters(), 0.1)
#optimizer = torch.optim.RMSprop(model.parameters(), 0.01, alpha=0.9)
#optimizer = torch.optim.Adam(model.parameters(), 0.01, betas=(0.9, 0.99))

losses = []
acces = []
eval_losses = []
eval_acces = []


for epoch in range(5):
    train_loss = 0
    train_acc = 0
    model.train()       
    for im, label in train_data:
        
        im = im.to(device)
        label = label.to(device)
        pred = model(im)
        
        loss = criterion(pred, label)
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        train_loss += loss.item()
        

        _, pred = pred.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        train_acc += acc
        
    losses.append(train_loss / len(train_data))
    acces.append(train_acc / len(train_data))
    

    eval_loss = 0
    eval_acc = 0
    model.eval()
    
    for im, label in test_data:

        im = im.to(device)
        label = label.to(device)
        
        pred = model(im)
        loss = criterion(pred, label)

        
        eval_loss += loss.item()

        
        _, pred = pred.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        eval_acc += acc
    
    eval_losses.append(eval_loss / len(test_data))
    eval_acces.append(eval_acc / len(test_data))
    print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
          .format(epoch, train_loss / len(train_data), train_acc / len(train_data), eval_loss / len(test_data), eval_acc / len(test_data)))

Using cpu device
epoch: 0, Train Loss: 0.188029, Train Acc: 0.946129, Eval Loss: 0.090995, Eval Acc: 0.970530
epoch: 1, Train Loss: 0.078610, Train Acc: 0.976079, Eval Loss: 0.080730, Eval Acc: 0.973497
epoch: 2, Train Loss: 0.053348, Train Acc: 0.983209, Eval Loss: 0.063259, Eval Acc: 0.980518
epoch: 3, Train Loss: 0.037977, Train Acc: 0.988223, Eval Loss: 0.057493, Eval Acc: 0.983683
epoch: 4, Train Loss: 0.027409, Train Acc: 0.991771, Eval Loss: 0.059569, Eval Acc: 0.981507
