<a href="https://colab.research.google.com/github/jackyjack00/Machine_Learning_Deep_Learning_LAB/blob/main/Es5_Alexnet_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
from torchvision import transforms 


Crate a data loader for the cifar10 dataset

In [10]:
def get_data(batch_size, test_batch_size=256):
  # Prepare data transformations and then combine them sequentially
  transform = list() 
  transform.append(transforms.Resize((227,227)))
  #Converts Numpy to Pytorch Tensor
  transform.append( transforms.ToTensor() )
  # Normalizes the Tensors between [-1, 1]
  transform.append( transforms.Normalize(mean=[0.5], std=[0.5]) )
  #combine the transformations defined and stucked in the list
  transform = transforms.Compose(transform)
 
  #Create train and validation splits of CIFAR10
  full_training_data = torchvision.datasets.CIFAR10('./data', train=True, transform=transform, download=True)
  test_data = torchvision.datasets.CIFAR10('./data', train=False, transform=transform, download=True)
  #define dimensions of split
  num_samples = len(full_training_data)
  training_samples = int(num_samples*0.5+1)
  validation_samples = num_samples - training_samples
  #generate splits
  training_data, validation_data = torch.utils.data.random_split(full_training_data, [training_samples, validation_samples])

  # Initialize dataloaders 
  train_loader = torch.utils.data.DataLoader(training_data, batch_size, shuffle=True)
  val_loader = torch.utils.data.DataLoader(validation_data, test_batch_size, shuffle=False)
  test_loader = torch.utils.data.DataLoader(test_data, test_batch_size, shuffle=False)

  return train_loader, val_loader, test_loader

Build AlexNet architechture from scratch

In [46]:
class LeNet(torch.nn.Module):
  def __init__(self , num_classes):
    super(LeNet, self).__init__()
    self.num_classes = num_classes

    self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            torch.nn.BatchNorm2d(96),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 3, stride = 2))
    
    self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 3, stride = 2))
    
    self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(384),
            torch.nn.ReLU())
    
    self.layer4 = torch.nn.Sequential(
            torch.nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(384),
            torch.nn.ReLU())
    
    self.layer5 = torch.nn.Sequential(
            torch.nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 3, stride = 2))
    
    self.fc = torch.nn.Sequential(
            torch.nn.Dropout(0.5),
            torch.nn.Linear(9216, 4096),
            torch.nn.ReLU())
    
    self.fc1 = torch.nn.Sequential(
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, 4096),
            torch.nn.ReLU())
    
    self.fc2 = torch.nn.Sequential(
            torch.nn.Linear(4096, num_classes))
  
  def forward(self, x):
    #convo layers
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = self.layer5(out)

    #flat the output, from ocnvo to fully connected
    out = out.reshape(out.size(0), -1)
    
    #fully connected
    out = self.fc(out)
    out = self.fc1(out)
    out = self.fc2(out)    

    return out

Define some functions to set LossFunction and Optimizer

In [4]:
#Use SGD optimizer for weights update
def get_optimizer(net, lr, wd, momentum):
  optimizer = torch.optim.SGD(net.parameters(), lr=lr,  weight_decay=wd, momentum=momentum)
  return optimizer

#Use CrossEntropy as Loss function
def get_loss_function():
  loss_function = torch.nn.CrossEntropyLoss()
  return loss_function

Some parameters settings

In [5]:
batch_size=128
device='cuda:0'
learning_rate=0.01
weight_decay=0.000001
momentum=0.9
epochs=50

Load Data

In [11]:
train_loader, val_loader, test_loader = get_data(batch_size)

Files already downloaded and verified
Files already downloaded and verified


Define the train and test operations

In [42]:
def train(net, data_loader, optimizer, loss_function, device="cuda:0"):
  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.
  # Strictly needed if network contains layers which has different behaviours between train and test
  net.train() 
  for batch_idx, (inputs, targets) in enumerate(data_loader):
    #Load data into GPU
    net = net.to(device)
    inputs = inputs.to(device)
    targets = targets.to(device) 
    
    # Forward pass
    outputs = net(inputs) 
    
    # Apply the loss
    loss = loss_function(outputs,targets) 

    # Backward pass
    loss.backward() 

    # Update parameters
    optimizer.step()

    # Reset optimizer
    optimizer.zero_grad()

    samples+=inputs.shape[0]
    cumulative_loss += loss.item()
    _, predicted = outputs.max(1)
    cumulative_accuracy += predicted.eq(targets).sum().item()

  return cumulative_loss/samples, cumulative_accuracy/samples*100

def test(net, data_loader, cost_function, device="cuda:0"):
  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.
  # Strictly needed if network contains layers which has different behaviours between train and test
  net.train() 
  # memory efficient trick
  with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(data_loader):
      # Load data into GPU 
      net = net.to(device)
      inputs = inputs.to(device)
      targets = targets.to(device)
      # Forward pass
      outputs = net(inputs)
      _, predicted = outputs.max(1)
      samples+=inputs.shape[0]
      cumulative_accuracy += predicted.eq(targets).sum().item()
  return cumulative_loss/samples, cumulative_accuracy/samples*100

Main train test scope

In [13]:
train_loader.dataset.dataset.data.shape

(50000, 32, 32, 3)

In [None]:
net = LeNet( num_classes = 10 )
optimizer = get_optimizer(net, learning_rate, weight_decay, momentum)
loss_function = get_loss_function() 
for e in range(epochs):
  train_loss, train_accuracy = train(net, train_loader, optimizer, loss_function)
  val_loss, val_accuracy = test(net, val_loader, loss_function)
  print('Epoch: {:d}'.format(e+1))
  print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss,  train_accuracy))
  print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss,  val_accuracy))
  print("-----------------------------------------------------")

dimension out layer 1: [128 , 96 , 27 , 27 ]
*   128 samples in batch
*   96 channels out in layer1
*   27*27 spacial dimension of matrices after convolution

AFTER TRAINING RESULTS

In [None]:
  print("After training:")
  train_loss, train_accuracy = test(net, train_loader, loss_function)
  val_loss, val_accuracy = test(net, val_loader, loss_function)
  test_loss, test_accuracy = test(net, test_loader, loss_function)
  print("\t Training loss {:.5f}, Training accuracy {:.2f}".format(train_loss,  train_accuracy))
  print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss,  val_accuracy))
  print('\t Test loss {:.5f}, Test accuracy {:.2f}'.format(test_loss, test_accuracy))
  print('-----------------------------------------------------')