##The Resnet Research paper can be accessed from here https://arxiv.org/pdf/1512.03385v1.pdf

In [None]:
import torch
import numpy as np
import os
import torch.optim as optim
import copy

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
  print('CUDA is not available.  Training on CPU ...')
else:
  print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
DRIVE_PATH = './drive/MyDrive/DynamicResNet'

log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'

In [None]:
import logging
import logging.handlers


logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    handlers=[logging.StreamHandler()])


logging.info("test message")
logging.error("test message")

ERROR:root:test message


In [None]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

# Create the memory handler with a capacity of 1000 records
mem_handler = logging.handlers.MemoryHandler(capacity=1000)

# Add the memory handler to the logger
logger.addHandler(mem_handler)

# Create a file handler that will flush the memory handler to a file every 30 seconds
file_handler = logging.FileHandler(f'{DRIVE_PATH}/output.log')
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))

# Add the file handler to the logger
logger.addHandler(file_handler)

In [None]:
# Log some messages
for i in range(2):
    logger.debug(f"Test Message {i}")

# Flush the memory handler to the file
mem_handler.flush()

# Close the file handler
file_handler.close()

DEBUG:__main__:Test Message 0
DEBUG:__main__:Test Message 1


In [None]:
# Save the initial resnet model (THESE ARE THE WEIGHTS USED TO INITALIZE ALL RESNET18 MODELS!)
INITIAL_NET_PATH = os.path.join(DRIVE_PATH,"ResNet18_initial.pth")
def save_initial_net(initial_net_model):
  if not os.path.exists(INITIAL_NET_PATH):
    torch.save(initial_net_model.state_dict(), f'{DRIVE_PATH}/ResNet18_initial.pth')

#**Downloading the CIFAR10 datset and loading the data in Normalized form as torch.FloatTensor datatype and generating a validation set by dividing the training set in 80-20 ratio**
#**CIFAR10**
The CIFAR10 and CIFAR-100 are labeled subsets of the 80 million tiny images dataset. They were collected by Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton.

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.

The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class.

Here are the classes in the dataset:
1. airplane
2. automobile
3. bird
4. cat
5. deer
6. dog
7. frog
8. horse
9. ship
10. truck

The classes are completely mutually exclusive. There is no overlap between automobiles and trucks. "Automobile" includes sedans, SUVs, things of that sort. "Truck" includes only big trucks. Neither includes pickup trucks.

More can be read from their page at https://www.cs.toronto.edu/~kriz/cifar.html

#**Image Augmentation**
In this cell, we perform some simple data augmentation by randomly flipping and cropping the given image data. We do this by defining a torchvision transform, and you can learn about all the transforms that are used to pre-process and augment data from the [PyTorch documentation](https://pytorch.org/docs/stable/torchvision/transforms.html)

In [None]:
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20

# convert data to a normalized torch.FloatTensor
print('==> Preparing data..')
#Image augmentation is used to train the model
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
#Only the data is normalaized we do not need to augment the test data
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# choose the training and test datasets
train_data = datasets.CIFAR10('data', train=True,
                              download=True, transform=transform_train)
test_data = datasets.CIFAR10('data', train=False,
                             download=True, transform=transform_test)

# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.seed(42)
np.random.shuffle(indices)
train_idx = indices

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=num_workers)

# specify the image classes
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck']


==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:12<00:00, 13232642.24it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


#**Defining the Network Architecture**
In this section the entire Research Paper is implemented to define the Residual Network approach taken by the researchers

NOTE:

Output volume for a convolutional layer
To compute the output size of a given convolutional layer we can perform the following calculation (taken from Stanford's cs231n course):

We can compute the spatial size of the output volume as a function of the input volume size (W), the kernel/filter size (F), the stride with which they are applied (S), and the amount of zero padding used (P) on the border. The correct formula for calculating how many neurons define the output_W is given by (W−F+2P)/S+1.

For example for a 7x7 input and a 3x3 filter with stride 1 and pad 0 we would get a 5x5 output. With stride 2 we would get a 3x3 output.

In [None]:
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Dropout

class BasicBlock(nn.Module):
  expansion = 1
  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes:
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(self.expansion*planes)
      )
  
  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out

class BottleNeck(nn.Module):
  expansion = 4

  def __init__(self, in_planes, planes, stride=1):
    super(BottleNeck, self).__init__()
    self.conv1 = nn.Conv2d(in_planes , planes, kernel_size=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(self.expansion*planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes :
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(self.expansion*planes)
      )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = F.relu(self.bn2(self.conv2(out)))
    out = self.bn3(self.conv3(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out

class ResNet(nn.Module):
  def __init__(self, block, num_blocks, dropout_rate, dropout_location, model_architecture, num_classes=10, curr_epoch=0, valid_loss_min=np.Inf):
    super(ResNet, self).__init__()
    self.in_planes = 64

    #### variables we added
    """
    :param dropout_location: "last", "middle", "all"
    """
    self.dropout_rate = dropout_rate
    self.dropout_location = dropout_location
    self.curr_epoch = curr_epoch
    ####

    #### logger
    self.model_props = f"{model_architecture}.{dropout_location}.{dropout_rate}"
    ####

    self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    if self.dropout_location == 'all':
      self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, is_dropout=True)
      self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, is_dropout=True)
      self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, is_dropout=True)
      self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, is_dropout=True)
    else:
      self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, is_dropout=False)
      self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, is_dropout=False)
      if self.dropout_location == 'middle': 
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, is_dropout=True)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, is_dropout=False)
      else: # self.dropout_location == 'last':
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, is_dropout=False)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, is_dropout=True)      
    self.linear = nn.Linear(512*block.expansion, num_classes)

  def get_dropout_rate(self):
      return self.dropout_rate

  def _make_layer(self, block, planes, num_blocks, stride, is_dropout):
    strides = [stride] + [1]*(num_blocks-1)
    layers = []
    for stride in strides:
      layers.append(block(self.in_planes, planes, stride))
      self.in_planes = planes * block.expansion
    if is_dropout:
      return nn.Sequential(*layers, Dropout(self.dropout_rate))
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = F.avg_pool2d(out, 4)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

def ResNet18(rate, location):
    return ResNet(BasicBlock, [2, 2, 2, 2], model_architecture="ResNet18", dropout_rate=rate, dropout_location=location)

def ResNet34(rate, location):
    return ResNet(BasicBlock, [3, 4, 6, 3], model_architecture="ResNet34", dropout_rate=rate, dropout_location=location)

def ResNet50(rate, location):
    return ResNet(Bottleneck, [3, 4, 6, 3], model_architecture="ResNet50", dropout_rate=rate, dropout_location=location)

def ResNet101(rate, location):
    return ResNet(Bottleneck, [3, 4, 23, 3], model_architecture="ResNet101", dropout_rate=rate, dropout_location=location)

def ResNet152(rate, location):
    return ResNet(Bottleneck, [3, 8, 36, 3], model_architecture="ResNet152", dropout_rate=rate, dropout_location=location)

In [None]:
# This class Wraps the torch.nn.DataParallel and adds the logging capability
class LoggerDataParallel(torch.nn.DataParallel):
    def __init__(self, module, model_props=None, *args, **kwargs):
        super(LoggerDataParallel, self).__init__(module, *args, **kwargs)
        self.model_props = model_props

In [None]:
dropout_values = [0.0,0.1,0.2,0.3,0.4,0.5]
droput_locations = ['last','middle']
combinations = []

for drop_val in dropout_values:
  for drop_loc in droput_locations:
    combinations.append((drop_loc, drop_val))

combinations

[('last', 0.0),
 ('middle', 0.0),
 ('last', 0.1),
 ('middle', 0.1),
 ('last', 0.2),
 ('middle', 0.2),
 ('last', 0.3),
 ('middle', 0.3),
 ('last', 0.4),
 ('middle', 0.4),
 ('last', 0.5),
 ('middle', 0.5)]

#**Training Loop**
Here we train the architecture on training data and check its validation loss by using the validation set and saving the model only if there is an improvement ie decrease in the validation loss.

In [None]:
def save_checkpoint(epoch, model, optimizer, dropout_rate, dropout_location, train_loss_min ,suffix):
    """Saves model checkpoint"""
    torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'dropout':dropout_rate,
                'train_loss_min': train_loss_min,
                }, f'{DRIVE_PATH}/{dropout_location}/ResNet18_{dropout_rate}_{suffix}.pth')

In [None]:
def print_accuracy_info(class_total, class_correct, prefix):
    for i in range(10):
      if class_total[i] > 0:
        message = '%s Accuracy of %5s: %2d%% (%2d/%2d)' % (
            prefix, classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i]))
        logger.info(message)
        
      else:
        print('%s Accuracy of %5s: N/A (no training examples)' % (prefix, classes[i]))

    message = '%s Accuracy (Overall): %2d%% (%2d/%2d)' % (prefix,
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total))
    logger.info(message)

In [None]:
def run_train_loop(curr_net, curr_optimizer, criterion, dropout_location, dropout_rate, curr_epoch = 0, train_loss_min = np.inf):
  # number of epochs to train the model
  n_epochs = 50

  class_correct_train = list(0. for i in range(10))
  class_total_train = list(0. for i in range(10))

  for epoch in range(curr_epoch, n_epochs+1):
    # keep track of training and validation loss
    train_loss = 0.0
      
    ###################
    # train the model #
    ###################
    curr_net.train()
    for batch_idx, (data, target) in enumerate(train_loader):
      # move tensors to GPU if CUDA is available
      if train_on_gpu:
        data, target = data.cuda(), target.cuda()
      # clear the gradients of all optimized variables
      curr_optimizer.zero_grad()
      # forward pass: compute predicted outputs by passing inputs to the model
      output = curr_net(data)
      # calculate the batch loss
      loss = criterion(output, target)
      # backward pass: compute gradient of the loss with respect to model parameters
      loss.backward()
      # perform a single optimization step (parameter update)
      curr_optimizer.step()
      # update training loss
      train_loss += loss.item()*data.size(0)

      _, pred = torch.max(output, 1)    
      # compare predictions to true label
      correct_tensor = pred.eq(target.data.view_as(pred))
      correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
      # calculate test accuracy for each object class
      for i in range(batch_size):
        label = target.data[i]
        class_correct_train[label] += correct[i].item()
        class_total_train[label] += 1
          
    # calculate average losses
    train_loss = train_loss/len(train_loader.sampler)
    

    epoch_info = 'Epoch: {} \tTraining Loss: {:.6f} \t'.format(epoch, train_loss)      
    # print training/validation statistics 
    text_to_log = f"{curr_net.model_props}, {epoch_info}"
    logger.info(text_to_log)

    print_accuracy_info(class_total_train, class_correct_train, "Train")
    if train_loss < train_loss_min:
          train_loss_min = train_loss
          
          (epoch, curr_net, curr_optimizer, dropout_rate, dropout_location, train_loss_min,"best_valid")
    save_checkpoint(epoch, curr_net, curr_optimizer, dropout_rate, dropout_location, train_loss_min ,"last")

    # Flush the memory handler to the file
    mem_handler.flush()

    # Close the file handler
    file_handler.close()

In [None]:
net = ResNet18(0.0, 'last')
if train_on_gpu:
  # net = torch.nn.DataParallel(net).cuda()
  net = LoggerDataParallel(net, model_props = net.model_props)
  cudnn.benchmark = True
# net.get_dropout_rate()

In [None]:
#### Load model checkpoint
def load_checkpoint(path, dropout_location):
  try:
    checkpoint = torch.load(path)
    model_state_dict_pre_fix = checkpoint.get("model_state_dict")
    shaved_state_dict = {}
    for full_key in model_state_dict_pre_fix.keys():
      if "module" in full_key:
        shaved_state_dict[".".join(full_key.split(".")[1:])] = model_state_dict_pre_fix.get(full_key)
    model_state_dict_pre_fix.update(shaved_state_dict)
    curr_epoch = checkpoint.get('epoch', 0)
    dropout_rate = checkpoint.get('dropout', 0.0)
    train_loss_min = checkpoint.get('train_loss_min', np.inf)
    net = ResNet18(dropout_rate, dropout_location)
    #### Load model and optimizer state dictionaries
    net.load_state_dict(shaved_state_dict)
    #optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    return net , curr_epoch, train_loss_min
  except Exception as e:
    print(e)
    return None, 0, np.inf

In [None]:
logger_path = f"{DRIVE_PATH}"
initial_net = ResNet18(0.0, 'last')
state_dict = initial_net.state_dict()
save_initial_net(initial_net)

In [None]:
model_map = {}
for combination in combinations:
  dropout_location, dropout_rate = combination
  
  net_path = os.path.join(DRIVE_PATH,dropout_location,f"ResNet18_{dropout_rate}_last.pth")
  net , curr_epoch, train_loss_min = load_checkpoint(net_path, dropout_location=dropout_location)

  if net is None:
    net = ResNet18(dropout_rate, dropout_location)
    net.load_state_dict(copy.deepcopy(state_dict))
    print(f"Created the model using initial weights: {net.model_props}")
  else:
    print("loaded model from disk")  
  # if train_on_gpu:
  #   net = LoggerDataParallel(net, model_props = net.model_props)

    cudnn.benchmark = True

  model_key = f"{dropout_location}_{dropout_rate}"
  model_map[model_key] = (net, curr_epoch, train_loss_min)

loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk
loaded model from disk


In [None]:
def get_model_for_training(dropout_location, dropout_rate):
  model_key = f"{dropout_location}_{dropout_rate}"
  net, curr_epoch, train_loss_min = model_map.get(model_key)
  print(f"the following model {net.model_props} is starting from epoch : {curr_epoch}")
  return net, curr_epoch, train_loss_min

In [None]:
for combination in combinations:
  dropout_location, dropout_rate = combination
  net, curr_epoch, train_loss_min = get_model_for_training(dropout_location, dropout_rate)

  if curr_epoch == 0:
    assert torch.all(torch.eq(initial_net.state_dict()['conv1.weight'], net.state_dict()['conv1.weight']))

  if train_on_gpu:
    net = LoggerDataParallel(net, model_props = net.model_props)
    cudnn.benchmark = True
  
  text_to_log = f"{net.model_props} Starting Training"
  logger.info(text_to_log)

  # specify loss function (categorical cross-entropy)
  criterion = nn.CrossEntropyLoss()

  # specify optimizer
  optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

  run_train_loop(net, optimizer, criterion, dropout_location, dropout_rate, curr_epoch, train_loss_min)

INFO:__main__:ResNet18.last.0.0 Starting Training


the following model ResNet18.last.0.0 is starting from epoch : 50


KeyboardInterrupt: ignored

In [None]:
# Flush the memory handler to the file
mem_handler.flush()

# Close the file handler
file_handler.close()

#**Loading the Best Model**

In [None]:
def enable_dropout(mod: nn.Module):
    if isinstance(mod, nn.Dropout):
        mod.train()

In [None]:
print(model_map.keys())

net, curr_epoch, train_loss_min = get_model_for_training('middle', 0.5)
print(curr_epoch)
criterion = nn.CrossEntropyLoss()


dict_keys(['last_0.0', 'middle_0.0', 'last_0.1', 'middle_0.1', 'last_0.2', 'middle_0.2', 'last_0.3', 'middle_0.3', 'last_0.4', 'middle_0.4', 'last_0.5', 'middle_0.5'])
the following model ResNet18.middle.0.5 is starting from epoch : 50
50


#**Testing Loop**
The real test of the model architecture how well does the model recognizes the image and what is the accuracy on the test data

In [None]:
# track test loss
def run_test_data(net, optimizer, criterion, keep_dropout = True, is_train_loader = False):
  test_loss = 0.0
  class_correct = list(0. for i in range(10))
  class_total = list(0. for i in range(10))

  net.eval()
  if keep_dropout:
    net.apply(enable_dropout)

  if train_on_gpu:
      net = LoggerDataParallel(net, model_props = net.model_props)

  curr_loader = train_loader if is_train_loader else test_loader
  log_label = "Train" if is_train_loader else "Test"

  # iterate over test data
  for batch_idx, (data, target) in enumerate(curr_loader):
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
      data, target = data.cuda(), target.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = net(data)
    # calculate the batch loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)    
    # compare predictions to true label
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    # calculate test accuracy for each object class
    for i in range(batch_size):
      label = target.data[i]
      class_correct[label] += correct[i].item()
      class_total[label] += 1

  # average test loss
  test_loss = test_loss/len(test_loader.dataset)
  print_accuracy_info(class_total, class_correct, log_label)
  
  return np.sum(class_correct) / np.sum(class_total)

In [None]:
results_dict_location = os.path.join(DRIVE_PATH, 'trial_results.json')

In [None]:
import json
def save_trials_dict(trial_result_dict):
  with open(results_dict_location, 'w') as f:
      # write the dictionary to the file as JSON
      json.dump(trial_result_dict, f)

def load_trials_dict():
  if os.path.exists(results_dict_location):
    with open(results_dict_location, 'r') as f:
        data = json.load(f)
    return data


In [None]:
TRIALS_NUMBER = 20
loaded_dict = load_trials_dict()
trial_result_dict = {} if loaded_dict is None else copy.deepcopy(loaded_dict)

In [None]:
trial_result_dict.keys()

dict_keys(['0.0_last', '0.0_middle', '0.1_last', '0.1_middle', '0.2_last', '0.2_middle', '0.3_last', '0.3_middle', '0.4_last', '0.4_middle', '0.5_last', '0.5_middle'])

General schema of the results
```
{

  "0.4_last":{
  
  "0.0_train": [1,2,3,.....],

  "0.0_test": [1,2,3,.....]

  },


}
```

In [None]:
for combination in combinations:
  dropout_location, dropout_rate = combination
  trial_dict_key = f"{dropout_rate}_{dropout_location}"

  if trial_result_dict.get(trial_dict_key) is None:
    trial_result_dict[trial_dict_key]={}

  net, curr_epoch, train_loss_min = get_model_for_training(dropout_location, dropout_rate)
  original_dict = copy.deepcopy(net.state_dict())
  print(f"Current Loss {train_loss_min} ")
  for rate in np.arange(0, 1.1, 0.1):
    print(f"Loading into a new model with {rate} dropout in {dropout_location} location")
    net = ResNet18(rate, dropout_location)
    net.load_state_dict(original_dict)

    trial_test_key = f"{rate}_test"
    trial_train_key = f"{rate}_train"

    if trial_result_dict.get(trial_dict_key).get(trial_test_key) is None:
      trial_result_dict[trial_dict_key][trial_test_key]= []
    
    if trial_result_dict.get(trial_dict_key).get(trial_train_key) is None:
      trial_result_dict[trial_dict_key][trial_train_key] = []

    start_idx = len(trial_result_dict[trial_dict_key][trial_train_key])

    for trial in range(start_idx, TRIALS_NUMBER):  
      print(f"Current trial : {trial}")
      if train_on_gpu:
        net = LoggerDataParallel(net, model_props = net.model_props)
        cudnn.benchmark = True

      # specify loss function (categorical cross-entropy)
      criterion = nn.CrossEntropyLoss()

      # specify optimizer
      optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

      #run_test_data(net, optimizer, criterion, False)
      total_overall_accuracy_on_train = run_test_data(net, optimizer, criterion, True, True)
      total_overall_accuracy_on_test = run_test_data(net, optimizer, criterion, True, False)

      trial_result_dict[trial_dict_key][trial_test_key].append(total_overall_accuracy_on_test)
      trial_result_dict[trial_dict_key][trial_train_key].append(total_overall_accuracy_on_train)
      save_trials_dict(trial_result_dict)

the following model ResNet18.last.0.0 is starting from epoch : 50
Current Loss 0.0868239977533929 
Loading into a new model with 0.0 dropout in last location
Loading into a new model with 0.1 dropout in last location
Loading into a new model with 0.2 dropout in last location
Loading into a new model with 0.30000000000000004 dropout in last location
Loading into a new model with 0.4 dropout in last location
Loading into a new model with 0.5 dropout in last location
Loading into a new model with 0.6000000000000001 dropout in last location
Loading into a new model with 0.7000000000000001 dropout in last location
Loading into a new model with 0.8 dropout in last location
Loading into a new model with 0.9 dropout in last location
Loading into a new model with 1.0 dropout in last location
the following model ResNet18.middle.0.0 is starting from epoch : 50
Current Loss 0.08586716080633923 
Loading into a new model with 0.0 dropout in middle location
Loading into a new model with 0.1 dropout i