# FineTune_LR_scheduler - S5_v6


# Target:

1. FineTune LR scheduler. Set LR=0.1 as before but updated StepSize = 12 and Gamma = 0.2

# Results:

1. Parameters: 7,612
2. Best Train Accuracy: 99.41
3. Best Test Accuracy: 99.49

# Analysis:
1. To get best combination values StepSize = 12 and Gamma =0.2, we tried many trails of these two values.
2. The intuition behind above values is, we observed the accuracy is gradually increasing till around 10 epochs and getting stall from there. So we would like to update LR around 10-12 epochs.
3. We tried with StepSize and Gamma combinations - (10, 0.1), (11, 0.1), (12, 0.1) But didn't help to get the target accuracy consistently at last few epochs.
4. So we thought to increase the speed a little bit after 10-12 epochs by updating gamma = 0.2 and tried these StepSize and Gamma combinations - (10, 0.2), (11, 0.2), (12, 0.2) And finaally Stepsize=12, Gamma=0.2 gave best consistency of >=99.4% in the last 3 epochs and hit maximum of 99.49% with less than 8000 parameters


# Import Libraries

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
import numpy as np
import random
import time 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
import logging
  
logger = logging.getLogger("")
#logging.basicConfig(level=logging.DEBUG)
filename = '/content/drive/My Drive/Final_GridSearch_S_E_EVA5_S5_v6_FineTune_LR_scheduler_final_S6_L1&L2_BN_v3'+time.ctime().replace(' ','_')+'.txt'
logging.basicConfig(level = logging.DEBUG, filename = filename)
# logger.debug('Loging %s lewel', 'DEBUG')
# logger.info('Loging %s lewel', 'INFO')
# logger.warning('Loging %s lewel', 'WARN')
# logger.error('Loging %s lewel', 'ERROR')
# logger.critical('Loging %s lewel', 'CRITICAL')

In [None]:
time.ctime()

'Thu Aug 27 16:56:45 2020'

In [None]:
time.ctime().replace(' ','_')

'Thu_Aug_27_16:56:47_2020'

## Data Transformations

We first start with defining our data transformations. We need to think what our data is and how can we augment it to correct represent images which it might not see otherwise. 


In [None]:
train_transforms = transforms.Compose([
    transforms.RandomRotation((-7.0, 7.0), fill=(1,)),                                   
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Dataset and Creating Train/Test Split

In [None]:
train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# Dataloader Arguments & Test/Train Dataloaders


In [None]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
# logger.info("CUDA Available?", cuda)
# logger.info(f"CUDA Available? {cuda}")

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

# The model
Let's start with the model we first saw

In [None]:
class BatchNorm(nn.BatchNorm2d):
    def __init__(self, num_features, eps=1e-05, momentum=0.1, weight=True, bias=True):
        super().__init__(num_features, eps=eps, momentum=momentum)
        self.weight.data.fill_(1.0)
        self.bias.data.fill_(0.0)
        self.weight.requires_grad = weight
        self.bias.requires_grad = bias


class GhostBatchNorm(BatchNorm):
    def __init__(self, num_features, num_splits, **kw):
        super().__init__(num_features, **kw)
        self.num_splits = num_splits
        self.register_buffer('running_mean', torch.zeros(num_features * self.num_splits))
        self.register_buffer('running_var', torch.ones(num_features * self.num_splits))

    def train(self, mode=True):
        if (self.training is True) and (mode is False):  # lazily collate stats when we are going to use them
            self.running_mean = torch.mean(self.running_mean.view(self.num_splits, self.num_features), dim=0).repeat(
                self.num_splits)
            self.running_var = torch.mean(self.running_var.view(self.num_splits, self.num_features), dim=0).repeat(
                self.num_splits)
        return super().train(mode)

    def forward(self, input):
        N, C, H, W = input.shape
        if self.training or not self.track_running_stats:
            return F.batch_norm(
                input.view(-1, C * self.num_splits, H, W), self.running_mean, self.running_var,
                self.weight.repeat(self.num_splits), self.bias.repeat(self.num_splits),
                True, self.momentum, self.eps).view(N, C, H, W)
        else:
            return F.batch_norm(
                input, self.running_mean[:self.num_features], self.running_var[:self.num_features],
                self.weight, self.bias, False, self.momentum, self.eps)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(8)
        ) # output_size = 26

        # CONVOLUTION BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16)
        ) # output_size = 24

        # TRANSITION BLOCK 1
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 12
        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(1, 1), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(8)
        ) # output_size = 12

        # CONVOLUTION BLOCK 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16)
        ) # output_size = 10
        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(32)
        ) # output_size = 8

        # OUTPUT BLOCK
        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(10)
        ) # output_size = 8
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=8)
        ) # output_size = 1

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.pool1(x)
        x = self.convblock3(x)
        x = self.convblock4(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.gap(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

# Model Params
Can't emphasize on how important viewing Model Summary is. 
Unfortunately, there is no in-built model visualizer, so we have to take external help

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
# logger.info(device)
logger.info(f"Device : {device}")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
            Conv2d-4           [-1, 16, 24, 24]           1,152
              ReLU-5           [-1, 16, 24, 24]               0
       BatchNorm2d-6           [-1, 16, 24, 24]              32
         MaxPool2d-7           [-1, 16, 12, 12]               0
            Conv2d-8            [-1, 8, 12, 12]             128
              ReLU-9            [-1, 8, 12, 12]               0
      BatchNorm2d-10            [-1, 8, 12, 12]              16
           Conv2d-11           [-1, 16, 10, 10]           1,152
             ReLU-12           [-1, 16, 10, 10]               0
      BatchNorm2d-13           [-1, 16, 10, 10]              32
           Conv2d-14             [-1, 3

In [None]:
# for i in model.parameters():
#   logger.info(i)
#   break

# Training and Testing

Looking at logs can be boring, so we'll introduce **tqdm** progressbar to get cooler logs. 

Let's write train and test functions

In [None]:
def get_current_train_acc(model, train_loader):
  model.eval()
  train_loss = 0
  correct = 0
  with torch.no_grad():
      for data, target in train_loader:
          data, target = data.to(device), target.to(device)
          output = model(data)
          train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
          pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
          correct += pred.eq(target.view_as(pred)).sum().item()
  train_loss /= len(train_loader.dataset)

  logger.info('\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
      train_loss, correct, len(train_loader.dataset),
      100. * correct / len(train_loader.dataset)))
  
  train_acc = 100. * correct / len(train_loader.dataset)
  return train_acc, train_loss

In [None]:
from tqdm import tqdm

# train_losses = []
# test_losses = []
# train_acc = []
# test_acc = []

def train(model, device, train_loader, optimizer, lambda_l1=0, train_acc=[], train_losses=[]):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    #train_losses.append(loss)

    # L1 regularisation

    l1 = 0
    for p in model.parameters():
      l1 += p.abs().sum()
    loss += lambda_l1 * l1

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm
    
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Current_train_batch_accuracy={100*correct/processed:0.2f}')
  current_train_acc, current_train_loss = get_current_train_acc(model, train_loader)
  train_acc.append(current_train_acc)
  train_losses.append(current_train_loss)
  return train_acc, train_losses

def test(model, device, test_loader, test_acc=[], test_losses=[]):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    logger.info('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))
    return test_acc, test_losses


# Let's Train and test our model

In [None]:
# def save_best_model(epochs, model, device, train_loader, optimizer, lambda_l1=0.0, scheduler):
#   for epoch in range(EPOCHS):
#     logger.info(f" ***** EPOCH:{epoch} ***** ")
#     train(model, device, train_loader, optimizer, epoch, lambda_l1)
#     scheduler.step()
#     test(model, device, test_loader)


In [None]:
def get_best_train_test_acc(train_acc=[], test_acc=[]):
  """
  Example:
  train_acc_1=[96.5,98.7,99.2,99.3];test_acc_1=[97.2,98.5, 99.25, 99.2]
  assert get_best_train_test_acc(train_acc_1, test_acc_1)==(99.2, 99.25)
  """
  tr_te_acc_pairs = list(zip(train_acc, test_acc))
  tr_te_acc_pairs_original = tr_te_acc_pairs[:]
  tr_te_acc_pairs.sort(key = lambda x: x[1], reverse=True)
  for tr_acc, te_acc in tr_te_acc_pairs:
    if tr_acc > te_acc and tr_acc - te_acc >= 1:
      tr_te_acc_pairs.remove((tr_acc, te_acc))
  return tr_te_acc_pairs[0], tr_te_acc_pairs_original.index(tr_te_acc_pairs[0])+1


In [None]:
def save_model(model, PATH='./test_model.pickle'):
  """
   Save trained model at given PATH
  """
  torch.save(model.state_dict(), PATH)
  logger.info(f"Model saved at {PATH}")

In [None]:
train_acc_1=[96.5,98.7,99.2,99.3];test_acc_1=[97.2,98.5, 99.25, 99.2]
get_best_train_test_acc(train_acc_1, test_acc_1)

assert get_best_train_test_acc(train_acc_1, test_acc_1)==((99.2, 99.25),3)


In [None]:
def fit_model(epochs, model, device, train_loader, test_loader, optimizer, lambda_l1, scheduler):
  train_acc = []
  train_losses = []
  test_acc = []
  test_losses = []
  for epoch in range(EPOCHS):
    logger.info(f"[EPOCH:{epoch}]")
    train_acc, train_losses = train(model, device, train_loader, optimizer, lambda_l1, train_acc, train_losses)
    scheduler.step()
    test_acc, test_losses = test(model, device, test_loader, test_acc, test_losses)
  return train_acc, train_losses, test_acc, test_losses


In [None]:
      # train_acc, train_losses, test_acc, test_losses = fit_model(epochs, model, device, train_loader, test_loader, optimizer, para, scheduler)
      # (best_train_acc, best_test_acc), epoch = get_best_train_test_acc(train_acc, test_acc)
      # logger.info(f"For L1 lambda parameter {para} Best train Accuracy {best_train_acc}% and Best Test Accuracy {best_test_acc}% at Epoch {epoch}")
      # all_lambdal1_train_test_acc_from_best_epoch.append((best_train_acc, best_test_acc, para))
      # temp_best_train_acc_list.append(best_train_acc)
      # temp_best_test_acc_list.append(best_test_acc)

In [None]:
def best_tr_te_acc_from_epoch(epochs, model, device, train_loader, test_loader, optimizer, lambda_l1=0, lambda_l2=0, scheduler=None):
  temp_best_train_acc_list = []
  temp_best_test_acc_list = []
  all_lambdal1_train_test_acc_from_best_epoch =[]
  train_acc, train_losses, test_acc, test_losses = fit_model(epochs, model, device, train_loader, test_loader, optimizer, lambda_l1=lambda_l1, scheduler=scheduler)
  (best_train_acc, best_test_acc), epoch = get_best_train_test_acc(train_acc, test_acc)
  logger.info(f"\n===================> For L1 lambda parameter {lambda_l1}, For L2 lambda parameter {lambda_l2}, Best train Accuracy {best_train_acc}% and Best Test Accuracy {best_test_acc}% at Epoch {epoch} <===================\n")
  all_lambdal1_train_test_acc_from_best_epoch.append((best_train_acc, best_test_acc, lambda_l1, lambda_l2 ))
  temp_best_train_acc_list.append(best_train_acc)
  temp_best_test_acc_list.append(best_test_acc)
  return temp_best_train_acc_list, temp_best_test_acc_list, all_lambdal1_train_test_acc_from_best_epoch

In [None]:
def my_grid_search(epochs, model, device, train_loader, test_loader, optimizer, scheduler, lambda_l1_range = [], lambda_l2_range = [], size = 20, l1_l2_trails=0):
  best_lambdal1_train_acc = 0.0
  best_lambdal1_test_acc = 0.0
  best_lambdal1 = 0.0
  all_lambdal1_train_test_acc_from_best_epoch = []
 

  if lambda_l1_range and lambda_l2_range:
    if lambda_l1_range[0]>lambda_l1_range[1] or lambda_l2_range[0]>lambda_l2_range[1]:
      raise Exception("It should be => min<max")
    options_l1 = np.random.uniform(low=lambda_l1_range[0], high=lambda_l1_range[1], size=size)
    options_l2 = np.random.uniform(low=lambda_l2_range[0], high=lambda_l2_range[1], size=size)
    for i in range(l1_l2_trails):
      l1_value = random.choice(options_l1)
      l2_value = random.choice(options_l2)
      logger.info(f"\n L1&L2 Trail:{i+1} - Model is getting trained with L1 regularisation parameter {l1_value} and L2 regularisation parameter {l2_value}\n")
      model =  Net().to(device)
      optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=l2_value)
      scheduler = StepLR(optimizer, step_size=12, gamma=0.2)
      temp_best_train_acc_list, temp_best_test_acc_list, all_lambdal1_train_test_acc_from_best_epoch = best_tr_te_acc_from_epoch(epochs, model, device, train_loader, test_loader, optimizer, lambda_l1=l1_value, lambda_l2=l2_value, scheduler=scheduler)
    
    (best_para_train_acc, best_para_test_acc), idx = get_best_train_test_acc(temp_best_train_acc_list, temp_best_test_acc_list)
    idx -= 1
    final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 = all_lambdal1_train_test_acc_from_best_epoch[idx]
    logger.info(f"\n===================> final_best_train_acc: {final_best_train_acc}, final_best_test_acc: {final_best_test_acc}, final_best_lambda_l1: {final_best_lambda_l1} , final_best_lambda_l2: {final_best_lambda_l2} <===================\n")
    return final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2


  elif lambda_l1_range:
    if lambda_l1_range[0]>lambda_l1_range[1]:
      raise Exception("It should be => lambda_l1_range[0]<lambda_l1_range[1]")
    options = np.random.uniform(low=lambda_l1_range[0], high=lambda_l1_range[1], size=size)
    for i, para in enumerate(options):
      logger.info(f"\n L1 Trail:{i+1} - Model is getting trained with L1 regularisation parameter {para}\n")
      model =  Net().to(device)
      optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
      scheduler = StepLR(optimizer, step_size=12, gamma=0.2)
      temp_best_train_acc_list, temp_best_test_acc_list, all_lambdal1_train_test_acc_from_best_epoch = best_tr_te_acc_from_epoch(epochs, model, device, train_loader, test_loader, optimizer, lambda_l1=para, lambda_l2=0, scheduler=scheduler)
    (best_para_train_acc, best_para_test_acc), idx = get_best_train_test_acc(temp_best_train_acc_list, temp_best_test_acc_list)
    idx -= 1
    final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 = all_lambdal1_train_test_acc_from_best_epoch[idx]
    logger.info(f"\n===================> final_best_train_acc: {final_best_train_acc}, final_best_test_acc: {final_best_test_acc}, final_best_lambda_l1: {final_best_lambda_l1} <===================\n")
    return final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2

  elif lambda_l2_range:
    if lambda_l2_range[0]>lambda_l2_range[1]:
      raise Exception("It should be => lambda_l2_range[0]<lambda_l2_range[1]")
    options = np.random.uniform(low=lambda_l2_range[0], high=lambda_l2_range[1], size=size)
    for i, para in enumerate(options):
      logger.info(f"\n L2 Trail:{i+1} - Model is getting trained with L2 regularisation parameter {para}\n")
      model =  Net().to(device)
      optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=para)
      scheduler = StepLR(optimizer, step_size=12, gamma=0.2)
      temp_best_train_acc_list, temp_best_test_acc_list, all_lambdal1_train_test_acc_from_best_epoch = best_tr_te_acc_from_epoch(epochs, model, device, train_loader, test_loader, optimizer=optimizer, lambda_l1=0, lambda_l2=para, scheduler=scheduler)
    (best_para_train_acc, best_para_test_acc), idx = get_best_train_test_acc(temp_best_train_acc_list, temp_best_test_acc_list)
    idx -= 1
    final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 = all_lambdal1_train_test_acc_from_best_epoch[idx]
    logger.info(f"\n===================> final_best_train_acc: {final_best_train_acc}, final_best_test_acc: {final_best_test_acc}, final_best_lambda_l2: {final_best_lambda_l2} <===================\n")
    return final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2

  else:
    raise Exception("Select at least one parameter to search its mathematical space")


In [None]:
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
EPOCHS = 15
scheduler = StepLR(optimizer, step_size=12, gamma=0.2)
# lambda_l1=0
l1_l2_trails=50

# para_grid_lambda = [[0,0.1],[0,0.01],[0,0.001],[0,0.0001]]
#para_grid_lambda = [[0,0.0001], [0,0.001], [0,0.01],[0,0.1]]
para_grid_lambda = [[0,0.0001], [0,0.001]]
# para_grid_lambda = [[0,0.0001], [0,0.1]]
results_lambda_l1 = []
results_lambda_l2 = []
results_lambda_l1_l2 = []
size = 20 # Number of random choices in the given range

In [None]:

# ## L1&L2 regularisation hyper parameter search

# # l1 and l2 reg paras in same range given but can be given different ranges by writing little more sophisticated logic
for para_range in para_grid_lambda:
  logger.info(f"\n===================> Started - Trail on L1 & L2 reg parameters range - {para_range}, Number of para_ranges - {size}, , Number of trails per para_range - {l1_l2_trails}, Number of Epochs - {EPOCHS}<===================\n")
  final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 = my_grid_search(EPOCHS, model, device, train_loader, test_loader, optimizer, scheduler, lambda_l1_range = para_range, lambda_l2_range = para_range, size = size, l1_l2_trails=l1_l2_trails)
  results_lambda_l1_l2.append((final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2))
  logger.info(f"\n===================> current results_lambda_l1_l2 - {results_lambda_l1_l2} <===================\n")
  logger.info(f"\n===================> Completed - Trail on L1 and L2 reg parameters range - {para_range} <===================\n")

logger.info(f"\n===================> L1 & L2 - Results of Coarse/finer grid search in various ranges - {para_grid_lambda} <===================\n")
for final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 in results_lambda_l1_l2:
  logger.info(f"L1 reg parameter: {final_best_lambda_l1}, L2 reg parameter: {final_best_lambda_l2}, Train_acc: {final_best_train_acc}, Test_acc: {final_best_test_acc}")

Loss=0.12660039961338043 Batch_id=468 Current_train_batch_accuracy=94.01: 100%|██████████| 469/469 [00:14<00:00, 31.42it/s]
Loss=0.10420891642570496 Batch_id=468 Current_train_batch_accuracy=97.94: 100%|██████████| 469/469 [00:14<00:00, 31.95it/s]
Loss=0.08869346976280212 Batch_id=468 Current_train_batch_accuracy=98.28: 100%|██████████| 469/469 [00:15<00:00, 31.15it/s]
Loss=0.10045590996742249 Batch_id=468 Current_train_batch_accuracy=98.46: 100%|██████████| 469/469 [00:14<00:00, 31.28it/s]
Loss=0.0811336413025856 Batch_id=468 Current_train_batch_accuracy=98.48: 100%|██████████| 469/469 [00:14<00:00, 31.36it/s]
Loss=0.06163523718714714 Batch_id=468 Current_train_batch_accuracy=98.64: 100%|██████████| 469/469 [00:15<00:00, 31.12it/s]
Loss=0.08621945977210999 Batch_id=468 Current_train_batch_accuracy=98.63: 100%|██████████| 469/469 [00:14<00:00, 31.33it/s]
Loss=0.06197027489542961 Batch_id=468 Current_train_batch_accuracy=98.66: 100%|██████████| 469/469 [00:14<00:00, 31.27it/s]
Loss=0.07

In [None]:
"""model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
EPOCHS = 15
scheduler = StepLR(optimizer, step_size=12, gamma=0.2)
# lambda_l1=0
l1_l2_trails=50

# para_grid_lambda = [[0,0.1],[0,0.01],[0,0.001],[0,0.0001]]
para_grid_lambda = [[0,0.0001], [0,0.001], [0,0.01],[0,0.1]]
# para_grid_lambda = [[0,0.0001], [0,0.1]]
results_lambda_l1 = []
results_lambda_l2 = []
results_lambda_l1_l2 = []
size = 20 # Number of random choices in the given range


## L1 regularisation hyper parameter search

for para_range in para_grid_lambda:
  logger.info(f"\n===================> Started - Trail on L1 reg parameters range - {para_range}, Number of trails - {size}, Number of Epochs - {EPOCHS} <===================\n")
  final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 = my_grid_search(EPOCHS, model, device, train_loader, test_loader, optimizer, scheduler, lambda_l1_range = para_range, lambda_l2_range = [], size = size)
  results_lambda_l1.append((final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2))
  logger.info(f"\n===================> current results_lambda_l1 - {results_lambda_l1} <===================\n")
  logger.info(f"\n===================> Completed - Trail on L1 reg parameters range - {para_range} <===================\n")

logger.info(f"\n===================> L1 - Results of Coarse/finer grid search in various ranges - {para_grid_lambda}<===================\n")
for final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 in results_lambda_l1:
  logger.info(f"L1 reg parameter: {final_best_lambda_l1}, L2 reg parameter: {final_best_lambda_l2}, Train_acc: {final_best_train_acc}, Test_acc: {final_best_test_acc}")

# ## L2 regularisation hyper parameter search

# for para_range in para_grid_lambda:
#   logger.info(f"\n===================> Started - Trail on L2 reg parameters range - {para_range}, Number of trails - {size}, Number of Epochs - {EPOCHS}<===================\n")
#   final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 = my_grid_search(EPOCHS, model, device, train_loader, test_loader, optimizer, scheduler, lambda_l1_range = [], lambda_l2_range = para_range, size = size)
#   results_lambda_l2.append((final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2))
#   logger.info(f"\n===================> current results_lambda_l2 - {results_lambda_l2} <===================\n")
#   logger.info(f"\n===================> Completed - Trail on L2 reg parameters range - {para_range} <===================\n")

# logger.info(f"\n===================> L2 - Results of Coarse/finer grid search in various ranges - {para_grid_lambda}<===================\n")
# for final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 in results_lambda_l2:
#   logger.info(f"L1 reg parameter: {final_best_lambda_l1}, L2 reg parameter: {final_best_lambda_l2}, Train_acc: {final_best_train_acc}, Test_acc: {final_best_test_acc}")

# ## L1&L2 regularisation hyper parameter search

# # l1 and l2 reg paras in same range given but can be given different ranges by writing little more sophisticated logic
# for para_range in para_grid_lambda:
#   logger.info(f"\n===================> Started - Trail on L1 & L2 reg parameters range - {para_range}, Number of para_ranges - {size}, , Number of trails per para_range - {l1_l2_trails}, Number of Epochs - {EPOCHS}<===================\n")
#   final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 = my_grid_search(EPOCHS, model, device, train_loader, test_loader, optimizer, scheduler, lambda_l1_range = para_range, lambda_l2_range = para_range, size = size, l1_l2_trails=l1_l2_trails)
#   results_lambda_l1_l2.append((final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2))
#   logger.info(f"\n===================> current results_lambda_l1_l2 - {results_lambda_l1_l2} <===================\n")
#   logger.info(f"\n===================> Completed - Trail on L1 and L2 reg parameters range - {para_range} <===================\n")

# logger.info(f"\n===================> L1 & L2 - Results of Coarse/finer grid search in various ranges - {para_grid_lambda} <===================\n")
# for final_best_train_acc, final_best_test_acc, final_best_lambda_l1, final_best_lambda_l2 in results_lambda_l1_l2:
#   logger.info(f"L1 reg parameter: {final_best_lambda_l1}, L2 reg parameter: {final_best_lambda_l2}, Train_acc: {final_best_train_acc}, Test_acc: {final_best_test_acc}")"""


Loss=0.12899385392665863 Batch_id=468 Current_train_batch_accuracy=94.00: 100%|██████████| 469/469 [00:14<00:00, 31.27it/s]
Loss=0.11819936335086823 Batch_id=468 Current_train_batch_accuracy=97.92: 100%|██████████| 469/469 [00:14<00:00, 31.53it/s]
Loss=0.12428629398345947 Batch_id=468 Current_train_batch_accuracy=98.31: 100%|██████████| 469/469 [00:14<00:00, 31.65it/s]
Loss=0.09569384157657623 Batch_id=468 Current_train_batch_accuracy=98.42: 100%|██████████| 469/469 [00:14<00:00, 31.46it/s]
Loss=0.1254483014345169 Batch_id=468 Current_train_batch_accuracy=98.47: 100%|██████████| 469/469 [00:14<00:00, 31.45it/s]
Loss=0.08706681430339813 Batch_id=468 Current_train_batch_accuracy=98.56: 100%|██████████| 469/469 [00:14<00:00, 31.69it/s]
Loss=0.1018138974905014 Batch_id=468 Current_train_batch_accuracy=98.65: 100%|██████████| 469/469 [00:14<00:00, 31.34it/s]
Loss=0.0737680196762085 Batch_id=468 Current_train_batch_accuracy=98.66: 100%|██████████| 469/469 [00:14<00:00, 31.36it/s]
Loss=0.1044