In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import random as rand
import matplotlib.pyplot as plt 
%matplotlib inline


torch.manual_seed(24)    



<torch._C.Generator at 0x7f5e54a15b10>

In [2]:
'''
STEP 1: LOADING DATASET
'''

dataset = dsets.CIFAR10('./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = dsets.CIFAR10('./data', train=False, download=True, transform=transforms.ToTensor())
CLASSES = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [3]:
'''
STEP 2: SPLIT DATA TO TRAINING AND VALIDATION
'''
#validation set size 5000 
val_size = 5000
train_size = len(dataset) - val_size

#creating training & validation set using random_split
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
len(train_dataset), len(val_dataset)

(45000, 5000)

In [4]:
'''
STEP 3: MAKING DATASET ITERABLE
'''
#Creating data loader to load data in batches
batch_size=124
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size*2, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size*2, num_workers=4, pin_memory=True)


In [5]:
'''
Step 4 : Creat Model class
'''

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class CIFAR10Model(nn.Module):
    def __init__(self):
        super().__init__()
        # hidden layer
        self.linear1 = nn.Linear(input_size, 1536)
        self.linear1_drop = nn.Dropout(0.2)
        self.linear2 = nn.Linear(1536, 768)
        self.linear2_drop = nn.Dropout(0.2)
        self.linear3 = nn.Linear(768, 576)
        self.linear3_drop = nn.Dropout(0.1)
        # output layer
        self.linear4 = nn.Linear(576, output_size)
        
    def forward(self, x):
        # Flatten images into vectors
        out = x.view(x.size(0), -1)
        # Apply layers & activation functions
        out = self.linear1(out)
        # Apply activation function
        out = torch.nn.functional.relu(out)
        # Apply Dropout
        out = self.linear1_drop(out)

        out = self.linear2(out)
        # Apply activation function
        out = torch.nn.functional.relu(out)
        # Apply Dropout
        out = self.linear2_drop(out)
        
        out = self.linear3(out)
        # Apply activation function
        out = torch.nn.functional.relu(out)
        # Apply Dropout
        out = self.linear3_drop(out)
        # Get predictions using output layer
        out = self.linear4(out)
        
        return out
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = torch.nn.functional.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = torch.nn.functional.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss, 'val_acc': acc}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))

input_size = 3*32*32
output_size = 10
model = CIFAR10Model()
log_interval=100

In [6]:
lambda2 = 0.001 # regularization hyperparameters
lambda1 = 0
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

# we train with l1 and l2 regularization
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch_idx,batch in enumerate(train_loader):
            loss = model.training_step(batch)
            l2_regularization = 0
            l1_regularization = 0
            for param in model.parameters():
              l2_regularization += torch.norm(param)**2
              l1_regularization += torch.sum(torch.abs(param))

            loss = loss + lambda2*l2_regularization + lambda1*l1_regularization

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(batch[0]), len(train_dataset),
                    loss.item()))
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [7]:
'''
Step 5: Move model and data to GPU if availble 
'''

def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)
class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)
def __len__(self):
        """Number of batches"""
        return len(self.dl)
    
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    
train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(val_loader, device)
test_loader = DeviceDataLoader(test_loader, device)

model = to_device(model, device)
l2_regularization = torch.tensor(0)
l2_regularization = to_device(l2_regularization, device).float()
l1_regularization = torch.tensor(0)
l1_regularization = to_device(l1_regularization, device).float()

In [8]:
'''
Step 5: Train Model
'''
history = [evaluate(model, val_loader)]
history # initial Loss and accuracy

[{'val_acc': 0.10526114702224731, 'val_loss': 2.3032896518707275}]

In [9]:
#history += fit(10, 0.05, model, train_loader, val_loader)


In [10]:
def plot_accuracies(history):
    accuracies = [x['val_acc'] for x in history]
    plt.plot(accuracies, '-x')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs. No. of epochs');


In [11]:
def plot_losses(history):
    losses = [x['val_loss'] for x in history]
    plt.plot(losses, '-x')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.title('Loss vs. No. of epochs');
    

In [None]:
history = [evaluate(model, val_loader)]
#RMSProp 
model = CIFAR10Model()
model = to_device(model, device)

history += fit(25, 0.06, model, train_loader, val_loader,opt_func=torch.optim.RMSprop)
plot_losses(history)


Train Epoch: 0 [0/45000]	Loss: 3.268550
Train Epoch: 0 [12400/45000]	Loss: 1604.262573
Train Epoch: 0 [24800/45000]	Loss: 1488.864990
Train Epoch: 0 [37200/45000]	Loss: 1408.262329
Epoch [0], val_loss: 17.9242, val_acc: 0.0993
Train Epoch: 1 [0/45000]	Loss: 1396.497681
Train Epoch: 1 [12400/45000]	Loss: 1296.700562
Train Epoch: 1 [24800/45000]	Loss: 1237.309326
Train Epoch: 1 [37200/45000]	Loss: 1184.994507
Epoch [1], val_loss: 2.9440, val_acc: 0.0989
Train Epoch: 2 [0/45000]	Loss: 1144.161987
Train Epoch: 2 [12400/45000]	Loss: 1079.814209


In [None]:
plot_accuracies(history)


In [None]:
evaluate(model, test_loader)


In [None]:
history = [evaluate(model, val_loader)]
#Adadelta
model = CIFAR10Model()
model = to_device(model, device)

history += fit(25, 0.06, model, train_loader, val_loader,opt_func=torch.optim.Adadelta)
plot_losses(history)




In [None]:
plot_accuracies(history)


In [None]:
evaluate(model, test_loader)

In [None]:
history = [evaluate(model, val_loader)]
#Adafrad
model = CIFAR10Model()
model = to_device(model, device)

history += fit(25, 0.06, model, train_loader, val_loader,opt_func=torch.optim.Adagrad)
plot_losses(history)




In [None]:
plot_accuracies(history)


In [None]:
evaluate(model, test_loader)

In [None]:
history = [evaluate(model, val_loader)]
#Adam
model = CIFAR10Model()
model = to_device(model, device)

history += fit(25, 0.06, model, train_loader, val_loader,opt_func=torch.optim.Adam)
plot_losses(history)




In [None]:
plot_accuracies(history)


In [None]:
evaluate(model, test_loader)

In [None]:
history = [evaluate(model, val_loader)]
#SGD
model = CIFAR10Model()
model = to_device(model, device)

history += fit(25, 0.06, model, train_loader, val_loader,opt_func=torch.optim.SGD)
plot_losses(history)



In [None]:
plot_accuracies(history)


In [None]:
evaluate(model, test_loader)

In [None]:
# best model found was 
history += fit(25, 0.06, model, train_loader, val_loader,opt_func=torch.optim.SGD)


In [None]:
"""
def plot_losses(history):
    losses = [x['val_loss'] for x in history]
    plt.plot(losses, '-x')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.title('Loss vs. No. of epochs');
    
plot_losses(history)
"""

In [None]:
"""
def plot_accuracies(history):
    accuracies = [x['val_acc'] for x in history]
    plt.plot(accuracies, '-x')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs. No. of epochs');
    
plot_accuracies(history)
"""

In [None]:
evaluate(model, test_loader) # its displayed as validation loss and accuracy, but this is the test loss

###Confusion Matrix

In [None]:
import numpy as np
cm = np.zeros((10, 10), int)
model = to_device(model, "cpu")
for i in range(len(test_dataset)):
  data = test_dataset[i][0]
  label = test_dataset[i][1]
  data = data.view(1,data.shape[0],data.shape[1],data.shape[2])
  predict = model.forward(data)
  target = label
  cm[predict.argmax(), target] += 1
print(cm)
    #print(data)



### Hyper Parameter Optimization

In [None]:
"""
from hyperopt import hp, tpe, fmin
def run_model(learning_rate):
  model = CIFAR10Model()
  model = to_device(model, device)
  error = fit(1, learning_rate, model, train_loader, val_loader)
  return error[-1]["val_loss"]


best = fmin(fn=lambda x: run_model(x),
            space=hp.uniform('x', 0.01, 1),
            algo=tpe.suggest, 
            max_evals=100)
print("Best Learning Rate",best)
"""

In [None]:
"""
def run_model(batch_size):
  model = CIFAR10Model()
  batch_size=batch_size
  train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=4, pin_memory=True)
  val_loader = torch.utils.data.DataLoader(val_dataset, batch_size*2, num_workers=4, pin_memory=True)
  test_loader = torch.utils.data.DataLoader(test_dataset, batch_size*2, num_workers=4, pin_memory=True)
  train_loader = DeviceDataLoader(train_loader, device)
  val_loader = DeviceDataLoader(val_loader, device)
  test_loader = DeviceDataLoader(test_loader, device)
  model = to_device(model, device)
  error = fit(1, 0.06, model, train_loader, val_loader)
  return error[-1]["val_loss"]


best = fmin(fn=lambda x: run_model(x),
            space=hp.choice('x',[1,32,64,124,256,512,1024]),
            algo=tpe.suggest, 
            max_evals=10)
print("Best Batch Size",best)
"""