<a href="https://colab.research.google.com/github/AsimZz/Electo-Store/blob/master/cnn_models_and_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initial Setup

In [None]:
!pip install torch==1.6.0 torchvision==0.7.0

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from matplotlib import image as mp_image
import seaborn as sns
from tqdm import tqdm

%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

# Data Preprocessing

In [None]:
data_origin = '/content/gdrive/MyDrive/Graduation Project/ds'

In [None]:
!ls '/content/gdrive/MyDrive/Graduation Project/ds' 

In [None]:
import torch
import torchvision
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import glob
import tifffile as tiff
from PIL import Image

In [None]:
import time
import math
import random

In [None]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [None]:
device = get_default_device()
device

## Akram's Suggested Method

In [None]:
from torch.utils.data import Dataset

class EuroSatDataset(Dataset):

  def __init__(self,images,labels):
    self.images = images
    self.labels = labels
  
  def __getitem__(self,index):
    return (self.images[index],self.labels[index])

  def __len__(self):
    return len(self.labels)

In [None]:
def tiff_loader(filename):
  img = tiff.imread(filename)
  img = torch.tensor(img.astype(np.float32), device=get_default_device())
  return img

In [None]:
data_transforms = transforms.Compose([transforms.ToTensor()])
image_datasets = ImageFolder(root= data_origin,loader=tiff_loader)

In [None]:
## run this cell only once if you didn't save the EuroDataset class object
images = []
labels = []

image_datasets = EuroSatDataset(images,labels)

for image,label in tqdm(image_datasets):
  images.append(image)
  labels.append(labels)


In [None]:
import pickle

save_path = '/content/gdrive/MyDrive/Graduation Project/'

In [None]:
## then we save the model in drive

with open(save_path + 'image_datasets.pkl', 'wb') as handle:
    pickle.dump(image_datasets, handle)

In [None]:
with open(save_path + 'images_dataset.pkl', 'rb') as handle:
    image_datasets = pickle.load(handle)

## Split The Dataset 

In [None]:
len(image_datasets)

In [None]:
# split the dataset to train, test and validations sets
# use random_split from pytorch.dataset module
from torch.utils.data.dataset import random_split

In [None]:
# first take out the 20% of the dataset for validation
lengths = [math.floor(len(image_datasets)*0.8),math.ceil(len(image_datasets)*0.2)]
train_data, val_data = random_split(image_datasets,[21000,6000])

In [None]:
# first take out the 20% of the train dataset for validation
lengths = [math.floor(len(train_data)*0.8),math.ceil(len(train_data)*0.2)]
train_data, test_data = random_split(train_data,[15000,6000])

In [None]:
print('Train Length = ' + str(len(train_data)))
print('Validation Length = ' + str(len(val_data)))
print('Test Length = ' + str(len(test_data)))

In [None]:
device = get_default_device()
batch_size = 64
kw = {'num_workers': 8, 'pin_memory': True} if device == 'cuda' else {}

train_loaders = DataLoader(train_data, batch_size = batch_size, shuffle = True, **kw)
val_loaders = DataLoader(val_data, batch_size = batch_size, shuffle = True, **kw)
test_loaders = DataLoader(test_data, batch_size = batch_size, shuffle = True, **kw)

# Model Archetictures

In [None]:
import torch.nn as nn
import torch.nn.functional as F

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassifier(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['lrs'][-1], result['train_loss'],result['val_loss'], result['val_acc']))

## ResNet-152 Model

Residual network use the concept of residual blocks. it was proved that ResNet can perform very well in image classification tasks.
The architecture of the network is explained in the figure below:

![alt text](https://www.researchgate.net/profile/Dongyun-Lin/publication/324961229/figure/fig2/AS:633700479954944@1528097376059/The-basic-architecture-of-Resnet152.png)

In [None]:
class Block(nn.Module):
    def __init__(self, in_channels, out_channels, indentity_downsample=None, stride=1):
        super(Block, self).__init__()
        self.expansion = 4
        self.conv1 = nn.Conv2d(in_channels, out_channels,
                               kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels,
                               kernel_size=3, stride=stride, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion,
                               kernel_size=1, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
        self.relu = nn.ReLU()
        self.indentity_downsample = indentity_downsample

    def forward(self, x):
        identity = x

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)

        if(self.indentity_downsample is not None):
            identity = self.indentity_downsample(identity)

        x += identity
        x = self.relu(x)
        return x

"""
  here we represent the Resnet which start with non-residual layers as follows:
  a Conv with kernel size of 7 x 7 ---> Batch normlization ---> ReLU Function 
  ---> Maxpooling 
  after that we present the residual layers with 4 blocks each block repeated 
  (3, 8, 36, 3) respectively. the block architecture implemented in the Block class above.
"""

class ResNet(ImageClassifier):
    def __init__(self, block, layers, image_channels, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(
            image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # ResNet layers

        self.layer1 = self._make_layer(
            block, layers[0], out_channels=64, stride=1)
        self.layer2 = self._make_layer(
            block, layers[1], out_channels=128, stride=2)
        self.layer3 = self._make_layer(
            block, layers[2], out_channels=256, stride=2)
        self.layer4 = self._make_layer(
            block, layers[3], out_channels=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512*4, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

    def _make_layer(self, block, num_residual_blocks, out_channels, stride):
        indentity_downsample = None
        layers = []

        # check for the identitiy layer so we know when to add a skip connection
        if stride != 1 or self.in_channels != out_channels*4:
            indentity_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels*4, kernel_size=1, stride=stride), nn.BatchNorm2d(out_channels*4))
        layers.append(
            block(self.in_channels, out_channels, indentity_downsample, stride))
        self.in_channels = out_channels*4

        for _ in range(num_residual_blocks-1):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)


"""
"""

def ResNet152(image_channels, num_classes):
    return ResNet(Block, [3, 8, 36, 3], image_channels=image_channels, num_classes=num_classes)

In [None]:
resnet_model = ResNet152(13,10)
resnet_model

## GoogleNet (InceptionNet) Model

<img src="https://www.researchgate.net/profile/Bo-Zhao-67/publication/312515254/figure/fig3/AS:489373281067012@1493687090916/nception-module-of-GoogLeNet-This-figure-is-from-the-original-paper-10.png" width=500>

In [None]:
class GoogleNet(ImageClassifier):
    def __init__(self, in_channels, num_classes):
        super(GoogleNet, self).__init__()

        self.conv1 = ConvBlock(
            in_channels=in_channels, out_channels=64, kernel_size=7, stride=2, padding=3)

        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = ConvBlock(
            in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1)

        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception3a = InceptionBlock(
            in_channels=192, out_1x1=64, red_3x3=96, out_3x3=128, red_5x5=16, out_5x5=32, out_1x1pool=32)
        self.inception3b = InceptionBlock(
            in_channels=256, out_1x1=128, red_3x3=128, out_3x3=192, red_5x5=32, out_5x5=96, out_1x1pool=64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.inception4a = InceptionBlock(
            in_channels=480, out_1x1=192, red_3x3=96, out_3x3=208, red_5x5=16, out_5x5=48, out_1x1pool=64)
        self.inception4b = InceptionBlock(
            in_channels=512, out_1x1=160, red_3x3=112, out_3x3=224, red_5x5=24, out_5x5=64, out_1x1pool=64)
        self.inception4c = InceptionBlock(
            in_channels=512, out_1x1=128, red_3x3=128, out_3x3=256, red_5x5=24, out_5x5=64, out_1x1pool=64)
        self.inception4d = InceptionBlock(
            in_channels=512, out_1x1=112, red_3x3=144, out_3x3=288, red_5x5=32, out_5x5=64, out_1x1pool=64)
        self.inception4e = InceptionBlock(
            in_channels=528, out_1x1=256, red_3x3=160, out_3x3=320, red_5x5=32, out_5x5=128, out_1x1pool=128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception5a = InceptionBlock(
            in_channels=832, out_1x1=256, red_3x3=160, out_3x3=320, red_5x5=32, out_5x5=128, out_1x1pool=128)
        self.inception5b = InceptionBlock(
            in_channels=832, out_1x1=384, red_3x3=192, out_3x3=384, red_5x5=48, out_5x5=128, out_1x1pool=128)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=0.4)
        self.fc = nn.Linear(1024, num_classes)

    def forward(self, input):
        input = self.conv1(input)
        input = self.maxpool1(input)
        input = self.conv2(input)
        input = self.maxpool2(input)

        input = self.inception3a(input)
        input = self.inception3b(input)
        input = self.maxpool3(input)

        input = self.inception4a(input)
        input = self.inception4b(input)
        input = self.inception4c(input)
        input = self.inception4d(input)
        input = self.inception4e(input)
        input = self.maxpool4(input)

        input = self.inception5a(input)
        input = self.inception5b(input)

        input = self.avgpool(input)
        input = input.reshape(input.shape[0], -1)
        input = self.dropout(input)
        input = self.fc(input)
        return input


class InceptionBlock(nn.Module):
    def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool):
        super(InceptionBlock, self).__init__()

        self.branch1 = ConvBlock(
            in_channels=in_channels, out_channels=out_1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            ConvBlock(in_channels=in_channels,
                      out_channels=red_3x3, kernel_size=1),
            ConvBlock(red_3x3, out_3x3, kernel_size=3, stride=1, padding=1)
        )

        self.branch3 = nn.Sequential(
            ConvBlock(in_channels=in_channels,
                      out_channels=red_5x5, kernel_size=1),
            ConvBlock(red_5x5, out_5x5, kernel_size=5, padding=2)
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBlock(in_channels=in_channels,
                      out_channels=out_1x1pool, kernel_size=1)

        )

    def forward(self, input):
        # N x filterss x 64 x 64
        return torch.cat([
            self.branch1(input),
            self.branch2(input),
            self.branch3(input),
            self.branch4(input),
        ], 1)


class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(ConvBlock, self).__init__()
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels=in_channels,
                              out_channels=out_channels, **kwargs)
        self.batch_norm = nn.BatchNorm2d(out_channels)

    def forward(self, input):
        return self.relu(self.batch_norm(self.conv(input)))

In [None]:
googleNet_model = GoogleNet(13,10)
googleNet_model

#EfficientNet Model 


In [None]:
import torch
import torch.nn as nn
from math import ceil

base_model = [
    # expand_ratio, channels, repeats, stride, kernel_size
    [1, 16, 1, 1, 3],
    [6, 24, 2, 2, 3],
    [6, 40, 2, 2, 5],
    [6, 80, 3, 2, 3],
    [6, 112, 3, 1, 5],
    [6, 192, 4, 2, 5],
    [6, 320, 1, 1, 3],
]

phi_values = {
    # tuple of: (phi_value, resolution, drop_rate)
    "b0": (0, 224, 0.2),  # alpha, beta, gamma, depth = alpha ** phi
    "b1": (0.5, 240, 0.2),
    "b2": (1, 260, 0.3),
    "b3": (2, 300, 0.3),
    "b4": (3, 380, 0.4),
    "b5": (4, 456, 0.4),
    "b6": (5, 528, 0.5),
    "b7": (6, 600, 0.5),
}

class CNNBlock(nn.Module):
    def __init__(
            self, in_channels, out_channels, kernel_size, stride, padding, groups=1
    ):
        super(CNNBlock, self).__init__()
        self.cnn = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            groups=groups,
            bias=False,
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.silu = nn.SiLU() # SiLU <-> Swish

    def forward(self, x):
        return self.silu(self.bn(self.cnn(x)))

class SqueezeExcitation(nn.Module):
    def __init__(self, in_channels, reduced_dim):
        super(SqueezeExcitation, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1), # C x H x W -> C x 1 x 1
            nn.Conv2d(in_channels, reduced_dim, 1),
            nn.SiLU(),
            nn.Conv2d(reduced_dim, in_channels, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return x * self.se(x)

class InvertedResidualBlock(nn.Module):
    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            expand_ratio,
            reduction=4, # squeeze excitation
            survival_prob=0.8, # for stochastic depth
    ):
        super(InvertedResidualBlock, self).__init__()
        self.survival_prob = 0.8
        self.use_residual = in_channels == out_channels and stride == 1
        hidden_dim = in_channels * expand_ratio
        self.expand = in_channels != hidden_dim
        reduced_dim = int(in_channels / reduction)

        if self.expand:
            self.expand_conv = CNNBlock(
                in_channels, hidden_dim, kernel_size=3, stride=1, padding=1,
            )

        self.conv = nn.Sequential(
            CNNBlock(
                hidden_dim, hidden_dim, kernel_size, stride, padding, groups=hidden_dim,
            ),
            SqueezeExcitation(hidden_dim, reduced_dim),
            nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels),
        )

    def stochastic_depth(self, x):
        if not self.training:
            return x

        binary_tensor = torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob
        return torch.div(x, self.survival_prob) * binary_tensor

    def forward(self, inputs):
        x = self.expand_conv(inputs) if self.expand else inputs

        if self.use_residual:
            return self.stochastic_depth(self.conv(x)) + inputs
        else:
            return self.conv(x)


class EfficientNet(ImageClassifier):
    def __init__(self, version, num_classes):
        super(EfficientNet, self).__init__()
        width_factor, depth_factor, dropout_rate = self.calculate_factors(version)
        last_channels = ceil(1280 * width_factor)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.features = self.create_features(width_factor, depth_factor, last_channels)
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(last_channels, num_classes),
        )

    def calculate_factors(self, version, alpha=1.2, beta=1.1):
        phi, res, drop_rate = phi_values[version]
        depth_factor = alpha ** phi
        width_factor = beta ** phi
        return width_factor, depth_factor, drop_rate

    def create_features(self, width_factor, depth_factor, last_channels):
        channels = int(32 * width_factor)
        features = [CNNBlock(13, channels, 3, stride=2, padding=1)]
        in_channels = channels

        for expand_ratio, channels, repeats, stride, kernel_size in base_model:
            out_channels = 4*ceil(int(channels*width_factor) / 4)
            layers_repeats = ceil(repeats * depth_factor)

            for layer in range(layers_repeats):
                features.append(
                    InvertedResidualBlock(
                        in_channels,
                        out_channels,
                        expand_ratio=expand_ratio,
                        stride = stride if layer == 0 else 1,
                        kernel_size=kernel_size,
                        padding=kernel_size//2, # if k=1:pad=0, k=3:pad=1, k=5:pad=2
                    )
                )
                in_channels = out_channels

        features.append(
            CNNBlock(in_channels, last_channels, kernel_size=1, stride=1, padding=0)
        )

        return nn.Sequential(*features)

    def forward(self, x):
        x = self.pool(self.features(x))
        return self.classifier(x.view(x.shape[0], -1))


def build_model():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    version = "b0"
    phi, res, drop_rate = phi_values[version]
    num_examples, num_classes = len(val_data)+len(train_data)+len(test_data), 10
    model = EfficientNet(
        version=version,
        num_classes=num_classes,
    ).to(device)

    return model # (num_examples, num_classes)

enet_model=build_model()

# Using GPU for Training
To seamlessly use a **GPU**, if one is available, we define a couple of helper functions (`get_default_device` & `to_device`) and a helper class `DeviceDataLoader` to move our model & data to the **GPU as required**.

In [None]:
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [None]:

device = get_default_device()
device

In [None]:
torch.cuda.empty_cache()

# Accuracy Calculation

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

In [None]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    total_acc = 0
    model.eval()
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(device=device)
            targets = targets.to(device=device)
            
            scores = model(inputs)
            _, predictions = scores.max(1)
            total_acc += accuracy_score(targets.cpu(),predictions.cpu())
            num_samples += 1
        
        return(f'Got {num_correct} / {num_samples} with accuracy {total_acc/float(num_samples)}') 
    
    model.train()

# Training The ResNet model

In [None]:
resnet_model = to_device(resnet_model, device)

In [None]:
import time

@torch.no_grad()
def evaluate(model, val_loaders):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loaders]
    return model.validation_epoch_end(outputs)
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    
    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, 
                                                steps_per_epoch=len(train_loader))
    
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()

            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            optimizer.step()
            optimizer.zero_grad()

            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()


        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)

    #save the model parameters
    torch.save({
            'epochs': epochs,
            'resnet_model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'weight_decay': weight_decay,
            'scheduler': sched,
            'learning_rates': lrs,
            'grad_clip': grad_clip,
            'train_losses': train_losses,
        }, save_path+'saved_models/resnet_model/resnet_model.pth')
    with open(save_path + 'saved_models/resnet_model/resnet_file.pkl', 'wb') as handle:
      pickle.dump(history, handle)
    return history

In [None]:
train_loaders = DeviceDataLoader(train_loaders, device)
val_loaders = DeviceDataLoader(val_loaders, device)

In [None]:
history = [evaluate(resnet_model, val_loaders=val_loaders)]
history

In [None]:
epochs = 20
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.Adam

In [None]:
%%time
history += fit_one_cycle(epochs, max_lr, resnet_model, train_loaders, val_loaders, 
                             grad_clip=grad_clip, 
                             weight_decay=weight_decay, 
                             opt_func=opt_func)

In [None]:
saved_model = torch.load(save_path+'saved_models/resnet_model/resnet_model.pth')

In [None]:
saved_resnet_model = ResNet152(13,10)
saved_resnet_model = to_device(saved_resnet_model, device)

In [None]:
saved_resnet_model.load_state_dict(saved_model['resnet_model'])

In [None]:
check_accuracy(train_loaders,resnet_model)

# Training The GoogleNet model

In [None]:
googleNet_model = to_device(googleNet_model, device)

In [None]:
import time

@torch.no_grad()
def evaluate(model, val_loaders):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loaders]
    return model.validation_epoch_end(outputs)
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    
    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, 
                                                steps_per_epoch=len(train_loader))
    
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()

            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            optimizer.step()
            optimizer.zero_grad()

            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()


        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)

    #save the model parameters
    torch.save({
            'epochs': epochs,
            'googleNet_model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'weight_decay': weight_decay,
            'scheduler': sched,
            'learning_rates': lrs,
            'grad_clip': grad_clip,
            'train_losses': train_losses,
        }, save_path+'saved_models/googleNet_model/googleNet_model.pth')
    with open(save_path + 'saved_models/googleNet_model/googleNet_file.pkl', 'wb') as handle:
      pickle.dump(history, handle)
    return history

In [None]:
train_loaders = DeviceDataLoader(train_loaders, device)
val_loaders = DeviceDataLoader(val_loaders, device)

In [None]:
history = [evaluate(googleNet_model, val_loaders=val_loaders)]
history

In [None]:
epochs = 20
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.Adam

In [None]:
%%time
history += fit_one_cycle(epochs, max_lr, googleNet_model, train_loaders, val_loaders, 
                             grad_clip=grad_clip, 
                             weight_decay=weight_decay, 
                             opt_func=opt_func)

In [None]:
saved_model = torch.load(save_path+'saved_models/googleNet_model/googleNet_model.pth')

In [None]:
saved_googleNet_model = GoogleNet(13,10)
saved_googleNet_model = to_device(saved_googleNet_model, device)

In [None]:
saved_googleNet_model.load_state_dict(saved_model['googleNet_model'])

In [None]:
check_accuracy(train_loaders,googleNet_model)

# Training The EffNet model

In [None]:
enet_model = to_device(enet_model, device)
enet_model

In [None]:
import time

@torch.no_grad()
def evaluate(model, val_loaders):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loaders]
    return model.validation_epoch_end(outputs)
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    
    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, 
                                                steps_per_epoch=len(train_loader))
    
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        lrs = []
        t0 = time.perf_counter()
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()

            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            optimizer.step()
            optimizer.zero_grad()

            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()


        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)

    #save the model parameters
    torch.save({
            'epochs': epochs,
            'googleNet_model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'weight_decay': weight_decay,
            'scheduler': sched,
            'learning_rates': lrs,
            'grad_clip': grad_clip,
            'train_losses': train_losses,
        }, save_path+'saved_models/effNet_model/effNet_model.pth')
    with open(save_path + 'saved_models/effNet_model/effNet_file.pkl', 'wb') as handle:
      pickle.dump(history, handle)
    return history

In [None]:
train_loaders = DeviceDataLoader(train_loaders, device)
val_loaders = DeviceDataLoader(val_loaders, device)

In [None]:
history = [evaluate(enet_model, val_loaders=val_loaders)]
history

In [None]:
epochs = 20
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.Adam

In [None]:
%%time
history += fit_one_cycle(epochs, max_lr, enet_model, train_loaders, val_loaders, 
                             grad_clip=grad_clip, 
                             weight_decay=weight_decay, 
                             opt_func=opt_func)

In [None]:
saved_model = torch.load(save_path+'saved_models/effNet_model/effNet_model.pth')

In [None]:
saved_effNet_model = EfficientNet('b0',10)
saved_effNet_model = to_device(saved_effNet_model, device)

In [None]:
saved_effNet_model.load_state_dict(saved_model['effNet_model'])

In [None]:
check_accuracy(train_loaders,saved_effNet_model)

#Other Metrics

In [None]:
from sklearn import metrics

In [None]:
def check_metric(loader, model,metric,average,score):
    total_metric = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(device=device)
            targets = targets.to(device=device)
            scores = model(inputs)
            _, predictions = scores.max(1)
            num_samples += 1
            total_metric  += metric(targets.cpu(),predictions.cpu(),average=average)
        
        return(f'Got {score} Score {total_metric/float(num_samples)}') 
    
    model.train()

##Precision, Recall and F1 Score For GoogleNet

In [None]:
check_metric(test_loaders,googleNet_model,metrics.recall_score,'macro','Recall')

In [None]:
check_metric(test_loaders,googleNet_model,metrics.precision_score,'macro','Precision')

In [None]:
check_metric(test_loaders,googleNet_model,metrics.f1_score,'macro','F1')

# Precision, Recall and F1 score for ResNet152

In [None]:
check_metric(test_loaders,saved_resnet_model,metrics.recall_score,'macro','Recall')

In [None]:
check_metric(test_loaders,saved_resnet_model,metrics.precision_score,'macro','Precision')

In [None]:
check_metric(test_loaders,saved_resnet_model,metrics.f1_score,'macro','F1')