![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)

# <center> Deep Learning Methods </center>
## <center> Exercise 3: Transfer Learning </center>
### <center> Dogs VS. Cats </center>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/DeepLearningMethods/Exercises/Ex3/MainDogsVsCats.ipynb)

In [135]:
import numpy as np
import time
import torch

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import matplotlib

import torch
import torch.nn               as nn
import torch.optim            as optim

import os
from   google.colab import files

import torch.optim.lr_scheduler as lr_scheduler
from torch.optim.lr_scheduler import OneCycleLR

In [136]:
#-- Classification accuracy:
def Accuracy(mScore, vY):
    vHatY = mScore.detach().argmax(dim=1)
    return (vHatY == vY).float().mean().item()

#--------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------#
from sklearn.metrics import r2_score

def R2Score(vHatY, vY):
    vY    = vY   .detach().cpu().view(-1)
    vHatY = vHatY.detach().cpu().view(-1)
    return r2_score(vY, vHatY)


#--------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------#
def Epoch(oModel, oDataDL, Loss, Metric, oOptim=None, oScheduler=None, bTrain=True):

    epochLoss   = 0
    epochMetric = 0
    count       = 0
    nIter       = len(oDataDL)
    vLR         = np.full(nIter, np.nan)
    DEVICE      = next(oModel.parameters()).device #-- CPU\GPU


    oModel.train(bTrain) #-- train or test

    #-- Iterate over the mini-batches:
    for ii, (mX, vY) in enumerate(oDataDL):
        #-- Move to device (CPU\GPU):
        mX = mX.to(DEVICE)
        vY = vY.to(DEVICE)

        #-- Forward:
        if bTrain == True:
            #-- Store computational graph:
            mZ   = oModel(mX)
            loss = Loss(mZ, vY)
        else:
            with torch.no_grad():
                #-- Do not store computational graph:
                mZ   = oModel(mX)
                loss = Loss(mZ, vY)

        #-- Backward:
        if bTrain == True:
            oOptim.zero_grad() #-- set gradients to zeros
            loss.backward()    #-- backward
            oOptim.step()      #-- update parameters
            if oScheduler is not None:
                vLR[ii] = oScheduler.get_last_lr()[0]
                oScheduler.step() #-- update learning rate

        Nb           = vY.shape[0]
        count       += Nb
        epochLoss   += Nb * loss.item()
        epochMetric += Nb * Metric(mZ, vY)
        print(f'\r{"Train" if bTrain else "Val"} - Iteration: {ii:3d} ({nIter}): loss = {loss:2.6f}', end='')

    print('', end='\r')
    epochLoss   /= count
    epochMetric /= count

    return epochLoss, epochMetric, vLR

#--------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------#
def TrainClassficationModel(oModel, oTrainData, oValData, Loss, nEpochs, oOptim, oScheduler=None):

    vTrainLoss = np.full(nEpochs, np.nan)
    vTrainAcc  = np.full(nEpochs, np.nan)
    vValLoss   = np.full(nEpochs, np.nan)
    vValAcc    = np.full(nEpochs, np.nan)
    vLR        = np.full(0,       np.nan)
    bestAcc    = 0

    for epoch in range(nEpochs):
        startTime                 = time.time()
        trainLoss, trainAcc, vLRi = Epoch(oModel, oTrainData, Loss, Accuracy, oOptim, oScheduler, bTrain=True ) #-- train
        valLoss,   valAcc,   _    = Epoch(oModel, oValData,   Loss, Accuracy,                     bTrain=False) #-- validate
        epochTime                 = time.time() - startTime

        #-- Display:
        print('Epoch '              f'{epoch    :03d}:',   end='')
        print(' | Train loss: '     f'{trainLoss:6.3f}',   end='')
        print(' | Val loss: '       f'{valLoss  :6.3f}',   end='')
        print(' | Train Accuracy: ' f'{trainAcc :6.3f}',   end='')
        print(' | Val Accuracy: '   f'{valAcc   :6.3f}',   end='')
        print(' | epoch time: '     f'{epochTime:6.3f} |', end='')

        vTrainLoss[epoch] = trainLoss
        vTrainAcc [epoch] = trainAcc
        vValLoss  [epoch] = valLoss
        vValAcc   [epoch] = valAcc
        vLR               = np.concatenate([vLR, vLRi])

        #-- Save best model (early stopping):
        if bestAcc < valAcc:
            bestAcc = valAcc
            try:
                torch.save(oModel.state_dict(), 'BestModel.pt')
            except:
                pass
            print(' <-- Checkpoint!')
        else:
            print('')

    #-- Load best model (early stopping):
    oModel.load_state_dict(torch.load('BestModel.pt'))

    return vTrainLoss, vTrainAcc, vValLoss, vValAcc, vLR

#--------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------#
def TrainRegressionModel(oModel, oTrainData, oValData, Loss, nEpochs, oOptim, oScheduler=None):

    vTrainLoss = np.full(nEpochs, np.nan)
    vTrainR2   = np.full(nEpochs, np.nan)
    vValLoss   = np.full(nEpochs, np.nan)
    vValR2     = np.full(nEpochs, np.nan)
    vLR        = np.full(0,       np.nan)
    bestR2     = -np.inf

    for epoch in range(nEpochs):
        startTime                = time.time()
        trainLoss, trainR2, vLRi = Epoch(oModel, oTrainData, Loss, R2Score, oOptim, oScheduler, bTrain=True ) #-- train
        valLoss,   valR2,   _    = Epoch(oModel, oValData,   Loss, R2Score,                     bTrain=False) #-- validate
        epochTime                = time.time() - startTime

        #-- Display:
        print('Epoch '          f'{epoch    :03d}:',   end='')
        print(' | Train loss: ' f'{trainLoss:.5f}' ,   end='')
        print(' | Val loss: '   f'{valLoss  :.5f}' ,   end='')
        print(' | Train R2: '   f'{trainR2 :+2.4f}',   end='')
        print(' | Val R2: '     f'{valR2   :+2.4f}',   end='')
        print(' | epoch time: ' f'{epochTime:6.3f} |', end='')

        vTrainLoss[epoch] = trainLoss
        vTrainR2  [epoch] = trainR2
        vValLoss  [epoch] = valLoss
        vValR2    [epoch] = valR2
        vLR               = np.concatenate([vLR, vLRi])

        #-- Save best model (early stopping):
        if bestR2 < valR2:
            bestR2 = valR2
            try:
                torch.save(oModel.state_dict(), 'BestModel.pt')
            except:
                pass
            print(' <-- Checkpoint!')
        else:
            print('')

    #-- Load best model (early stopping):
    oModel.load_state_dict(torch.load('BestModel.pt'))

    return vTrainLoss, vTrainR2, vValLoss, vValR2, vLR
#--------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------#

### Dogs vs. Cats:
* Download the data from:  
https://www.kaggle.com/shaunthesheep/microsoft-catsvsdogs-dataset
* Remove `Cat\666.jpg` and `Dog\11702.jpg` (bad files)
* Split your data into $22,500$ training images and $2,498$ validation images.
* Obtain at least $98\%$ classification accuracy.  
Try:
    * Your own network (e.g. a ResNet based architecture).
    * A pretrained network (using transfer learning).
    

**Tips:**  
* Load the data:
    * Dataset -- `torchvision.datasets.ImageFolder` and `torchvision.transforms`
    * Split -- `torch.utils.data.random_split`
    * Data loader -- `torch.utils.data.DataLoader`
* Model -- `torch.nn` or `torchvision.models`
* Set:
    * Device
    * Loss function (binary classification)
    * Optimizer
    * Scheduler (optional)
* Train you model!

In [137]:
! pip install -q kaggle

In [138]:
files.upload()

Saving kaggle.json to kaggle (1).json


{'kaggle.json': b'{"username":"avdaniel","key":"db7784e5cf498fca677dd8a59d309110"}'}

In [139]:
! mkdir ~/.kaggle

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [140]:
! cp kaggle.json ~/.kaggle/

In [141]:
 ! chmod 600 ~/.kaggle/kaggle.json

In [142]:
! kaggle datasets download -d shaunthesheep/microsoft-catsvsdogs-dataset

microsoft-catsvsdogs-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [143]:
! mkdir PetImages

mkdir: cannot create directory ‘PetImages’: File exists


In [145]:
! rm PetImages/PetImages/Dog/11702.jpg

rm: cannot remove 'PetImages/PetImages/Dog/11702.jpg': No such file or directory


In [146]:
! rm PetImages/PetImages/Cat/666.jpg

rm: cannot remove 'PetImages/PetImages/Cat/666.jpg': No such file or directory


In [147]:
count = 0

dir_path = 'PetImages/PetImages/Cat'
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        count += 1
print('File count:', count)

count = 0

dir_path = 'PetImages/PetImages/Dog'
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        count += 1
print('File count:', count)

File count: 12500
File count: 12500


In [148]:

oTransforms = transforms.Compose([
    transforms.Resize    (256),
    transforms.CenterCrop(224),
    transforms.ToTensor  ()
])

DATA_PATH = 'PetImages/PetImages'
DataSet  = torchvision.datasets.ImageFolder(root=DATA_PATH, transform=oTransforms)
Classes  = DataSet.classes
Classes

['Cat', 'Dog']

In [149]:
N                   = len(DataSet)
Train               = 22500
Test                = 2498
TrainSet, TestSet   = torch.utils.data.random_split(DataSet, [Train, Test], generator=torch.Generator().manual_seed(42))

len(TrainSet), len(TestSet)

(22500, 2498)

In [150]:
batchSize = 256
oTrainDL  = torch.utils.data.DataLoader(TrainSet, shuffle=True,  batch_size=1*batchSize, num_workers=0, persistent_workers=False)
oTestDL   = torch.utils.data.DataLoader(TestSet,  shuffle=False, batch_size=2*batchSize, num_workers=0, persistent_workers=False)

In [151]:
def GetModel():
    Model = torchvision.models.resnet50(weights=True)
    for Param in Model.parameters():
        Param.requires_grad = False
        
    In = Model.fc.in_features
    Model.fc = nn.Sequential(
        nn.Linear(In, 128), nn.ReLU(),
        nn.Linear(128, 2)
    )
    
    return Model

In [152]:
Loss   = nn.CrossEntropyLoss()
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
DEVICE

device(type='cuda', index=0)

In [153]:
NumOfEpochs    = 20
NumOfIter           = NumOfEpochs * len(oTrainDL)

Model     = GetModel().to(DEVICE)
Optim     = optim.AdamW(Model.parameters(), lr=0, betas=(0.9, 0.99), weight_decay=2e-4)
Scheduler = OneCycleLR (Optim, max_lr=1e-3, total_steps=NumOfIter)

History   = TrainClassficationModel(Model, oTrainDL, oTestDL, Loss, NumOfEpochs, Optim, Scheduler)

Epoch 000: | Train loss:  0.342 | Val loss:  0.119 | Train Accuracy:  0.932 | Val Accuracy:  0.974 | epoch time: 204.696 | <-- Checkpoint!
Epoch 001: | Train loss:  0.080 | Val loss:  0.065 | Train Accuracy:  0.977 | Val Accuracy:  0.977 | epoch time: 203.908 | <-- Checkpoint!
Epoch 002: | Train loss:  0.051 | Val loss:  0.058 | Train Accuracy:  0.981 | Val Accuracy:  0.979 | epoch time: 202.841 | <-- Checkpoint!
Epoch 003: | Train loss:  0.050 | Val loss:  0.051 | Train Accuracy:  0.982 | Val Accuracy:  0.982 | epoch time: 203.464 | <-- Checkpoint!
Epoch 004: | Train loss:  0.042 | Val loss:  0.047 | Train Accuracy:  0.985 | Val Accuracy:  0.982 | epoch time: 203.657 |
Epoch 005: | Train loss:  0.041 | Val loss:  0.053 | Train Accuracy:  0.985 | Val Accuracy:  0.981 | epoch time: 203.856 |
Epoch 006: | Train loss:  0.049 | Val loss:  0.044 | Train Accuracy:  0.981 | Val Accuracy:  0.985 | epoch time: 203.110 | <-- Checkpoint!
Epoch 007: | Train loss:  0.042 | Val loss:  0.050 | Train 