<a href="https://colab.research.google.com/github/eteral00/UNT_CSCE5218_Project_DogsBreeds/blob/main/Inception_Resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Mounting GoogleDrive**
You will need to add a shortcut to this Google shared folder to your own Google Drive to be able to access the dataset, the saved models, nad saved output statistics: \\
https://drive.google.com/drive/folders/1IypaLVub6Mk9A0AO_hN3wv5yEY04JNin

As well as going through this list of shared folders so that your drive recognized you have access to them (they are allshared as "has-link-can-view", but your GGDrive still need to recognize that you have the access) \\
https://drive.google.com/drive/folders/18QH_EOzL4halbKdHtDBil-De9Ia_Ys1d

https://drive.google.com/drive/folders/1Q2AhR6p28aD6J57ICYipuwvPgCgLoj1t

https://drive.google.com/drive/folders/1O75reXnX9LOT6LusUM1KUN6X8DlDor6S

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Handling data**

## Dataset: Tsinghua Dogs:
Main link: https://cg.cs.tsinghua.edu.cn/ThuDogs/ \\
Lower resolution version (~2.5GBs): https://cloud.tsinghua.edu.cn/f/80013ef29c5f42728fc8/

## Decompress

In [2]:
!unzip "/content/drive/MyDrive/CSCE5218/Dog_Breed_classification/Dog_Breed_dataset/Tshihua/low-resolution.zip" -d "/content/"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/low-resolution/7449-n000128-teddy/n165759.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165760.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165761.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165762.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165763.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165764.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165765.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165766.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165767.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165768.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165769.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165770.jpeg  
  inflating: /content/low-resolution/7449-n000128-teddy/n165771.jpeg  
  inflating:

In [3]:
!unzip "/content/drive/MyDrive/CSCE5218/Dog_Breed_classification/Dog_Breed_dataset/Tshihua/TrainValSplit.zip" -d "/content/"

Archive:  /content/drive/MyDrive/CSCE5218/Dog_Breed_classification/Dog_Breed_dataset/Tshihua/TrainValSplit.zip
   creating: /content/TrainAndValList/
  inflating: /content/TrainAndValList/train.lst  
  inflating: /content/TrainAndValList/validation.lst  


## Split dataset to Train/Test sets

In [4]:
import argparse
import os
import shutil
import logging
import sys
import glob
from typing import Dict, Any, Set

def split_train_test(images_dir: str, output_dir: str, lst_path: str):
    with open(lst_path, "r") as f:
        lst: Set[str] = set([os.path.basename(line.strip()) for line in f.readlines()])

    for image_name in lst:
        image_path: str = glob.glob(os.path.join(images_dir, "*", image_name), recursive=True)[0]
        class_name: str = os.path.basename(os.path.dirname(image_path))

        dest_dir: str = os.path.join(output_dir, class_name)
        os.makedirs(dest_dir, exist_ok=True)

        logging.info(f"Copying {image_path} to {dest_dir}")
        shutil.copy(image_path, dest_dir)

In [5]:
TSINGHUA_DOGS_ROOT_DIR = "/content/drive/MyDrive/CSCE5218/Dog_Breed_classification"
content_root = "/content"
# images_dir: str = os.path.join(TSINGHUA_DOGS_ROOT_DIR, "low-resolution")
images_dir: str = os.path.join(content_root, "low-resolution")
train_lst_path: str = os.path.join(content_root, "TrainAndValList", "train.lst")
test_lst_path: str = os.path.join(content_root, "TrainAndValList", "validation.lst")

In [None]:
## Only run this once if putting data in shared folder on ggdrive

# #Prepare train set
# train_dir: str = os.path.join(TSINGHUA_DOGS_ROOT_DIR, "train")
# os.makedirs(train_dir, exist_ok=True)
# split_train_test(images_dir, output_dir=train_dir, lst_path=train_lst_path)

# #Prepare validation set
# test_dir: str = os.path.join(TSINGHUA_DOGS_ROOT_DIR, "test")
# os.makedirs(test_dir, exist_ok=True)
# split_train_test(images_dir, output_dir=test_dir, lst_path=test_lst_path)


In [6]:
##If putting data in temporary runtime directory, need to rerun this every time

##Prepare train set
train_dir: str = os.path.join(content_root, "train")
os.makedirs(train_dir, exist_ok=True)
split_train_test(images_dir, output_dir=train_dir, lst_path=train_lst_path)

##Prepare test set
test_dir: str = os.path.join(content_root, "test")
os.makedirs(test_dir, exist_ok=True)
split_train_test(images_dir, output_dir=test_dir, lst_path=test_lst_path)

# **Defining Neural Network**

## Inception-Resnet (Inception Net with Residual connections)

### An Inception-Resnet implementation without a pretrained model, following as closely as possible the Inception-Resnet-v2 design in the original paper that introduced it -- "Inception-v4, Inception-ResNet and the impact of residual connections on learning," by C. Szegedy and his team at Google, 2016.


In [7]:
from __future__ import print_function, division, absolute_import
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import os
import sys



class BasicConv2d(nn.Module):

    def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) # verify bias=false
        self.bn = nn.BatchNorm2d(out_planes, eps=0.001, momentum=0.1, affine=True)  
        ##param 'eps' uses the value found in tensorflow, which is used for implementation of IRN in original paper
        ##param 'momentum' uses default pytorch value

        self.relu = nn.ReLU(inplace=False)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)

        return x



class BlockStem(nn.Module):
    ##The Stem of the Inception-Resnet v2
    ##There are 2 versions for it, 
    ##The 1st one, as in the original paper, uses the Stem of Inception v4
    ##The 2nd one uses the Stem of Inception-Resnet v1 adds an extra Stem-to-A block
    def __init__(self):
        super(BlockStem, self).__init__()
        self.conv_1 = BasicConv2d(3, 32, kernel_size=3, stride=2)
        self.conv_2 = BasicConv2d(32, 32, kernel_size=3, stride=1)
        self.conv_3 = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.maxpool_4 = nn.MaxPool2d(3, stride=2)
        self.conv_4 = BasicConv2d(64, 96, kernel_size=3, stride=2)

        self.branch_5_0 = nn.Sequential(
            BasicConv2d(160, 64, kernel_size=1, stride=1),
            BasicConv2d(64, 96, kernel_size=3, stride=1)
            )

        self.branch_5_1 = nn.Sequential(
            BasicConv2d(160, 64, kernel_size=1, stride=1),
            BasicConv2d(64, 64, kernel_size=(1,7), stride=1, padding=(0,3)),
            BasicConv2d(64, 64, kernel_size=(7,1), stride=1, padding=(3,0)),
            BasicConv2d(64, 96, kernel_size=(3,3), stride=1)
            )
        
        self.conv_6 = BasicConv2d(192, 192, kernel_size=3, stride=2)
        self.maxpool_6 = nn.MaxPool2d(3, stride=2)


    def forward(self, x):
        x = self.conv_1(x) # size: 3->32
        x = self.conv_2(x) # size: 32->32
        x = self.conv_3(x) # size: 32->64

        x0 = self.maxpool_4(x) # size: 64->64
        x1 = self.conv_4(x) # size: 64->96
        out_4 = torch.cat((x0, x1), 1) # size: 64+96=160

        x0 = self.branch_5_0(out_4) # size: 160->96
        x1 = self.branch_5_1(out_4) # size: 160->96
        out_5 = torch.cat((x0, x1), 1) # size: 96+96=192

        x0 = self.conv_6(out_5) # size: 192->192
        x1 = self.maxpool_6(out_5) # size: 192->192
        out_6 = torch.cat((x0, x1), 1) # size: 192+192=384

        return out_6 # size: 384



class BlockSToA(nn.Module):
    ##Block between the Stem and Inception-Resnet-A
    ##Used for the Stem of IRNv1 to build the 2nd ver of Stem for IRNv2
    def __init__(self):
        super(BlockSToA, self).__init__()

        self.branch0 = BasicConv2d(192, 96, kernel_size=1, stride=1)

        self.branch1 = nn.Sequential(
            BasicConv2d(192, 48, kernel_size=1, stride=1),
            BasicConv2d(48, 96, kernel_size=5, stride=1, padding=2)
        )

        self.branch2 = nn.Sequential(
            BasicConv2d(192, 64, kernel_size=1, stride=1),
            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
        )

        self.branch3 = nn.Sequential(
            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
            BasicConv2d(192, 96, kernel_size=1, stride=1)
        )

    def forward(self, x):
        x0 = self.branch0(x) # size: 192->96
        x1 = self.branch1(x) # size: 192->96
        x2 = self.branch2(x) # size: 192->96
        x3 = self.branch3(x) # size: 192->96
        out = torch.cat((x0, x1, x2, x3), 1) # size: 96+96+96+96=384

        return out



class BlockStemV2(nn.Module):
    ##The Stem of the Inception-Resnet v2
    ##There are 2 versions for it, 
    ##The 1st one, as in the original paper, uses the Stem of Inception v4
    ##The 2nd one uses the Stem of Inception-Resnet v1 adds an extra Stem-to-A block
    def __init__(self):
        super(BlockStemV2, self).__init__()
        self.conv_1 = BasicConv2d(3, 32, kernel_size=3, stride=2)
        self.conv_2 = BasicConv2d(32, 32, kernel_size=3, stride=1)
        self.conv_3 = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.maxpool_4 = nn.MaxPool2d(3, stride=2)
        self.conv_5 = BasicConv2d(64, 80, kernel_size=1, stride=1)
        self.conv_6 = BasicConv2d(80, 192, kernel_size=3, stride=1)
        self.maxpool_7 = nn.MaxPool2d(3, stride=2)
        self.block_sta = BlockSToA()


    def forward(self, x):
        x = self.conv_1(x) # size: 3->32
        x = self.conv_2(x) # size: 32->32
        x = self.conv_3(x) # size: 32->64
        x = self.maxpool_4(x) # size: 64->64
        x = self.conv_5(x) # size: 64->80
        x = self.conv_6(x) # size: 80->192
        x = self.maxpool_7(x) # size: 192->192
        x = self.block_sta(x) # size: 192->384

        return x # size: 384



class BlockA(nn.Module):
    ##Inception-Resnet-A: 35x35 grid
    def __init__(self, scale=1.0):
        super(BlockA, self).__init__()

        self.scale = scale

        self.branch0 = BasicConv2d(384, 32, kernel_size=1, stride=1)

        self.branch1 = nn.Sequential(
            BasicConv2d(384, 32, kernel_size=1, stride=1),
            BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
        )

        self.branch2 = nn.Sequential(
            BasicConv2d(384, 32, kernel_size=1, stride=1),
            BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1),
            BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1)
        )

        self.conv2d = nn.Conv2d(128, 384, kernel_size=1, stride=1)
        self.relu = nn.ReLU(inplace=False)


    def forward(self, x):
        x0 = self.branch0(x) # size: 384->32
        x1 = self.branch1(x) # size: 384->32
        x2 = self.branch2(x) # size: 384->64
        out = torch.cat((x0, x1, x2), 1) # size: 32+32+64=128
        out = self.conv2d(out) # size: 128->384
        out = out * self.scale + x # size: 384->384
        out = self.relu(out) # size: 384->384

        return out # size: 384



class BlockRA(nn.Module):
    ##Reduction-A: 35x35 -> 17x17
    def __init__(self):
        super(BlockRA, self).__init__()

        self.branch0 = nn.MaxPool2d(3, stride=2)

        self.branch1 = BasicConv2d(384, 384, kernel_size=3, stride=2)

        self.branch2 = nn.Sequential(
            BasicConv2d(384, 256, kernel_size=1, stride=1),
            BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),
            BasicConv2d(256, 384, kernel_size=3, stride=2)
        )


    def forward(self, x):
        x0 = self.branch0(x) # size: 384->384
        x1 = self.branch1(x) # size: 384->384
        x2 = self.branch2(x) # size: 384->384
        out = torch.cat((x0, x1, x2), 1) # size: 384+384+384=1152

        return out # size:1152



class BlockB(nn.Module):
    ##Inception-Resnet-B: 17x17 grid
    def __init__(self, scale=1.0):
        super(BlockB, self).__init__()

        self.scale = scale

        self.branch0 = BasicConv2d(1152, 192, kernel_size=1, stride=1)

        self.branch1 = nn.Sequential(
            BasicConv2d(1152, 128, kernel_size=1, stride=1),
            BasicConv2d(128, 160, kernel_size=(1,7), stride=1, padding=(0,3)),
            BasicConv2d(160, 192, kernel_size=(7,1), stride=1, padding=(3,0))
        )

        self.conv2d = nn.Conv2d(384, 1152, kernel_size=1, stride=1)
        self.relu = nn.ReLU(inplace=False)


    def forward(self, x):
        x0 = self.branch0(x) # size: 1152->192
        x1 = self.branch1(x) # size: 1152->192
        out = torch.cat((x0, x1), 1) # size: 192+192=384
        out = self.conv2d(out) # size: 384->1152
        out = out * self.scale + x # size: 1152->1152
        out = self.relu(out) # size: 1152->1152

        return out # size: 1152



class BlockRB(nn.Module):
    ##Reduction-B: 17x17 -> 8x8
    def __init__(self):
        super(BlockRB, self).__init__()

        self.branch0 = nn.MaxPool2d(3, stride=2)

        self.branch1 = nn.Sequential(
            BasicConv2d(1152, 256, kernel_size=1, stride=1),
            BasicConv2d(256, 384, kernel_size=3, stride=2)
        )

        self.branch2 = nn.Sequential(
            BasicConv2d(1152, 256, kernel_size=1, stride=1),
            BasicConv2d(256, 288, kernel_size=3, stride=2)
        )

        self.branch3 = nn.Sequential(
            BasicConv2d(1152, 256, kernel_size=1, stride=1),
            BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1),
            BasicConv2d(288, 320, kernel_size=3, stride=2)
        )

        
    def forward(self, x):
        x0 = self.branch0(x) # size: 1152->1152
        x1 = self.branch1(x) # size: 1152->384
        x2 = self.branch2(x) # size: 1152->288
        x3 = self.branch3(x) # size: 1152->320
        out = torch.cat((x0, x1, x2, x3), 1) # size: 1152+384+288+320=2144

        return out # size: 2144



class BlockC(nn.Module):
    ##Inception-Resnet-C: 8x8 grid
    def __init__(self, scale=1.0, noReLU=False):
        super(BlockC, self).__init__()

        self.scale = scale
        self.noReLU = noReLU

        self.branch0 = BasicConv2d(2144, 192, kernel_size=1, stride=1)

        self.branch1 = nn.Sequential(
            BasicConv2d(2144, 192, kernel_size=1, stride=1),
            BasicConv2d(192, 224, kernel_size=(1,3), stride=1, padding=(0,1)),
            BasicConv2d(224, 256, kernel_size=(3,1), stride=1, padding=(1,0))
        )

        self.conv2d = nn.Conv2d(448, 2144, kernel_size=1, stride=1)
        if not self.noReLU:
            self.relu = nn.ReLU(inplace=False)


    def forward(self, x):
        x0 = self.branch0(x) # size: 2144->192
        x1 = self.branch1(x) # size: 2144->256
        out = torch.cat((x0, x1), 1) # size: 192+256=448
        out = self.conv2d(out) # size: 448->2144
        out = out * self.scale + x # size: 2144->2144
        if not self.noReLU:
            out = self.relu(out) # size: 2144->2144

        return out # size: 2144



class InceptionResNet(nn.Module):
    ##Inception-Resnet version 2
    def __init__(self, num_classes=1000):
        super(InceptionResNet, self).__init__()
        # Special attributs
        self.input_space = "RGB"
        self.input_size = (299, 299, 3)
        self.mean = [0.5, 0.5, 0.5]
        self.std = [0.5, 0.5, 0.5]


        ##Modules
        ##The Stem block
        self.block_stem = BlockStem()

        ##Inception-Resnet-A x10
        ##Note: activation scaling is required for stability, as explained in the original paper
        ##Scale is arbitrarily chosen between 0.1--0.3
        self.repeat_a = nn.Sequential(
            BlockA(scale=0.22),
            BlockA(scale=0.22),
            BlockA(scale=0.22),
            BlockA(scale=0.22),
            BlockA(scale=0.22),
            BlockA(scale=0.22),
            BlockA(scale=0.22),
            BlockA(scale=0.22),
            BlockA(scale=0.22),
            BlockA(scale=0.22)
        )

        ##Reduction-A
        self.block_ra = BlockRA()

        ##Inception-Resnet-B x20
        self.repeat_b = nn.Sequential(
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15),
            BlockB(scale=0.15)
        )

        ##Reduction-B
        self.block_rb = BlockRB()

        ##Inception-Resnet-C x9
        self.repeat_c = nn.Sequential(
            BlockC(scale=0.22),
            BlockC(scale=0.22),
            BlockC(scale=0.22),
            BlockC(scale=0.22),
            BlockC(scale=0.22),
            BlockC(scale=0.22),
            BlockC(scale=0.22),
            BlockC(scale=0.22),
            BlockC(scale=0.22)
        )
        ##last BlockC without ReLU, 
        ##and, instead, using conv2d as activation
        self.last_blockC = BlockC(scale=0.22, noReLU=True)
        self.blockC_conv2d = BasicConv2d(2144, 2144, kernel_size=1, stride=1)

        ##average pooling and dropout
        self.last_avgpool = nn.AvgPool2d(8, count_include_pad=False)
        self.last_dropout = nn.Dropout(p=0.2)

        ##last linear activation, i.e., the actual classifier
        self.linear_cls = nn.Linear(2144, num_classes)


    def features(self, input):
        x = self.block_stem(input) # size: 3->384

        x = self.repeat_a(x) # size: 384->384
        x = self.block_ra(x) # size: 384->1152

        x = self.repeat_b(x) # size: 1152->1152
        x = self.block_rb(x) # size: 1152->2144
        x = self.repeat_c(x) # size: 2144->2144

        x = self.last_blockC(x) # size: 2144->2144
        x = self.blockC_conv2d(x) # size: 2144->2144

        return x # size: 2144


    def logits(self, features):
        x = self.last_avgpool(features) # size: 2144->2144
        x = self.last_dropout(x) # size: 2144->2144
        x = x.view(x.size(0), -1) # size: 2144->2144, flatten to 2 dimensions
        x = self.linear_cls(x) # size: 2144->number of classes
        
        return x # size: number of classes


    def forward(self, input):
        x = self.features(input) # size: 3->2144
        x = self.logits(x) # size: 2144->number of classes

        return x # size: number of classes



# **Define Training Function & Utilities**

## Utilities Methods

In [8]:
import numpy as np
import random
import torch
import torch.nn.functional as F
import torchvision
from torchvision import transforms, models
from torch.autograd import Variable



def cosine_anneal_schedule(t, nb_epoch, lr):
    cos_inner = np.pi * (t % (nb_epoch))  # t - 1 is used when t has 1-based indexing.
    cos_inner /= (nb_epoch)
    cos_out = np.cos(cos_inner) + 1

    return float(lr / 2 * cos_out)


def build_model(require_grad=True, num_classes=0):
    print("==> Building model: " + "Inception-Resnet v2" + "..")
    if num_classes:
      nnet = InceptionResNet(num_classes)  
    else:
      nnet = InceptionResNet() ## default IRNet: 1000 classes

    for param in nnet.parameters():
      param.requires_grad = require_grad
    print("==>Completed building model" + "Inception-Resnet v2" + "!")

    return nnet


def model_info(model):  # Plots a line-by-line description of a PyTorch model
    n_p = sum(x.numel() for x in model.parameters())  # number parameters
    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
    print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
    for i, (name, p) in enumerate(model.named_parameters()):
        name = name.replace('module_list.', '')
        print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
            i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
    print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g))


def test(net, criterion, batch_size, testloader=None):
    net.eval()
    use_cuda = torch.cuda.is_available()
    test_loss = 0
    correct = 0
    correct_com = 0
    total = 0
    idx = 0
    device = torch.device("cuda:0")

    for batch_idx, (inputs, targets) in enumerate(testloader):
        idx = batch_idx
        if use_cuda: #gpu
            inputs, targets = inputs.to(device), targets.to(device) 
        inputs, targets = Variable(inputs, volatile=True), Variable(targets) #cpu

        output_p = net(inputs)
        loss = criterion(output_p, targets)

        test_loss += loss.item()
        _, predicted = torch.max(output_p.data, 1)

        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        if batch_idx % 50 == 0:
            print('Step: %d | Loss: %.3f | Acc: %.3f%% (%d/%d) \n' % 
                  (batch_idx, 
                   test_loss / (batch_idx + 1), 
                   100. * float(correct) / total, 
                   correct, 
                   total
                  )
            )

    test_acc = 100. * float(correct) / total
    test_loss = test_loss / (idx + 1)

    return test_acc, test_loss
    

## Training Function

In [9]:
from __future__ import print_function
import os
from PIL import Image

import logging
import random
import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn



def train(nb_epoch, batch_size, store_name, resume=False, start_epoch=0, model_path=None, trainloader=None, testloader=None, batch_size_test=3, in_net=None):
    # setup output
    exp_dir = store_name
    try:
        os.stat(exp_dir)
    except:
        os.makedirs(exp_dir)

    use_cuda = torch.cuda.is_available()
    print("GPU is available: ", use_cuda)


    # Model
    if in_net:
        net = in_net
    else:
        net = build_model(num_classes=130) ##dogs breeds dataset has 130 classes

    if resume:
        # if in_net:
        #     if in_net.linear_cls.bias.size()[0] < 1000:
        #         net = build_model()
        #         net.load_state_dict(torch.load(model_path))
        #         linear_in_feature = in_net.linear_cls.weight.size()[1]
        #         linear_out_feature = in_net.linear_cls.weight.size()[0]
        #         new_linear_cls = nn.Linear(linear_in_feature, linear_out_feature)
        #         new_linear_cls.weight.data = net.linear_cls.weight.data[:linear_out_feature]
        #         new_linear_cls.bias.data = net.linear_cls.bias.data[:linear_out_feature]
        #         net.linear_cls = new_linear_cls
        #     else:
        #         net.load_state_dict(torch.load(model_path))
        # else:
        #     net.load_state_dict(torch.load(model_path))

        # net = torch.load(model_path)
        net.load_state_dict(torch.load(model_path))

    netp = torch.nn.DataParallel(net, device_ids=[0])

    # GPU
    device = torch.device("cuda:0")
    net.to(device)
    # cudnn.benchmark = True
    CELoss = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.0002, momentum=0.9, weight_decay=5e-4)
    lrScheduler1 = optim.lr_scheduler.CosineAnnealingLR(optimizer, nb_epoch)

    max_val_acc = 0
    for epoch in range(start_epoch, nb_epoch):
        print('\nEpoch: %d' % epoch)
        net.train()
        train_loss = 0
        correct = 0
        total = 0
        idx = 0
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            idx = batch_idx
            if inputs.shape[0] < batch_size:
                continue
            if use_cuda: #gpu
                inputs, targets = inputs.to(device), targets.to(device)
            inputs, targets = Variable(inputs), Variable(targets) #cpu

            # Step 1
            optimizer.zero_grad()
            output_p = netp(inputs)
            loss = CELoss(output_p, targets) * 1
            loss.backward()
            optimizer.step()

            #  training log
            _, predicted = torch.max(output_p.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()
            train_loss += loss.item()

            if batch_idx % 50 == 0:
                print(
                    'Step: %d | Loss: %.5f | Acc: %.3f%% (%d/%d)' % 
                    (batch_idx, 
                     train_loss / (batch_idx + 1), 
                     100. * float(correct) / total, correct, total)
                )
            if batch_idx % 500 == 0:
                ##save model every 500 steps
                net.cpu()
                torch.save(net.state_dict(), store_name + '/model_irnetv2.pth')
                net.to(device)
                
        lrScheduler1.step() # update learning rate after each epoch

        train_acc = 100. * float(correct) / total
        train_loss = train_loss / (idx + 1)
        with open(exp_dir + '/results_train_irnetv2.txt', 'a') as file:
            file.write(
                'Iteration %d | train_acc = %.5f | train_loss = %.5f |\n' % 
                (epoch, 
                 train_acc, 
                 train_loss,
                )
            )

        val_acc, val_loss = test(net, CELoss, batch_size=batch_size_test, testloader=testloader)
        if val_acc >= max_val_acc:
            max_val_acc = val_acc
            net.cpu()
            torch.save(net.state_dict(), store_name + '/model_irnetv2_high.pth')
            torch.save(net.state_dict(), store_name + '/model_irnetv2.pth')
            net.to(device)
            with open(exp_dir + '/results_test_irnetv2_high.txt', 'a') as file_h:
                file_h.write('Iteration %d, test_acc = %.5f, test_loss = %.6f\n' % 
                           (epoch, val_acc, val_loss)
                           )
        else:
            net.cpu()
            torch.save(net.state_dict(), store_name + '/model_irnetv2.pth')
            net.to(device)

        with open(exp_dir + '/results_test_irnetv2.txt', 'a') as file:
            file.write('Iteration %d, test_acc = %.5f, test_loss = %.6f\n' % 
                        (epoch, val_acc, val_loss)
            )

    return net, CELoss


## Dataloaders (train/test)

In [10]:
import torch
import torchvision
from torchvision import transforms, models

##Setting parameters
batchSize_train = 16
batchSize_test = 3

##Transform Data
print("==> Preparing data..")
transform_train = transforms.Compose([
        transforms.Resize((550, 550)),
        transforms.RandomCrop(448, padding=8),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

transform_test = transforms.Compose([
        transforms.Resize((550, 550)),
        transforms.CenterCrop(448),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])


==> Preparing data..


In [11]:

# ##Data on shared folder on ggdrive
# rootPath = "/content/drive/MyDrive/CSCE5218/Dog_Breed_classification"

# trainset = torchvision.datasets.ImageFolder(root= rootPath + "/train", transform=transform_train)
# train_loader = torch.utils.data.DataLoader(trainset, batch_size=batchSize_train, shuffle=True, num_workers=4)

# testset = torchvision.datasets.ImageFolder(root= rootPath + "/test", transform=transform_test)
# test_loader = torch.utils.data.DataLoader(testset, batch_size=batchSize_test, shuffle=True, num_workers=4)

In [11]:

##Alternatively, data on temporary runtime directory
rootPath = "/content"

trainset = torchvision.datasets.ImageFolder(root= rootPath + "/train", transform=transform_train)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batchSize_train, shuffle=True, num_workers=4)

testset = torchvision.datasets.ImageFolder(root= rootPath + "/test", transform=transform_test)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batchSize_test, shuffle=True, num_workers=4)

# **Training Section**

## Calling Training Function

### \*Clean GPU RAM 

In [12]:
# clean memory in case GPU memory is fragmented
torch.cuda.empty_cache() 

### Initial training
\* For first-start. \\
\* Testing on reserved test set is also done every epoch during training.

In [None]:
# #Initial train
# #path to store output model and test statistics
# store_path = '/content/drive/MyDrive/CSCE5218/Dog_Breed_classification/output'

# #temporary store on runtime directory
# store_path = '/content/output'

# #calling training
# trained_net, trained_loss = train(nb_epoch=20,             # number of epoch
#         batch_size=batchSize_train,         # batch size
#         store_name=store_path,     # folder for output
#         resume=False,          # resume training from checkpoint
#         start_epoch=0,         # the start epoch number when you resume the training
#         model_path=None,         # the saved model where you want to resume the training
#         trainloader=train_loader,
#         testloader=test_loader,
#         batch_size_test=batchSize_test
#         )

GPU is available:  True
==> Building model: Inception-Resnet v2..

Epoch: 0


  cpuset_checked))


Step: 0 | Loss: 6.96978 | Acc: 0.000% (0/16)
Step: 50 | Loss: 6.51758 | Acc: 7.966% (65/816)
Step: 100 | Loss: 6.07002 | Acc: 9.282% (150/1616)
Step: 150 | Loss: 5.72490 | Acc: 10.555% (255/2416)
Step: 200 | Loss: 5.46603 | Acc: 11.287% (363/3216)
Step: 250 | Loss: 5.30121 | Acc: 11.753% (472/4016)
Step: 300 | Loss: 5.17951 | Acc: 12.043% (580/4816)
Step: 350 | Loss: 5.06535 | Acc: 12.518% (703/5616)
Step: 400 | Loss: 4.97808 | Acc: 12.609% (809/6416)
Step: 450 | Loss: 4.91229 | Acc: 12.888% (930/7216)
Step: 500 | Loss: 4.83161 | Acc: 13.386% (1073/8016)
Step: 550 | Loss: 4.77936 | Acc: 13.373% (1179/8816)
Step: 600 | Loss: 4.72989 | Acc: 13.706% (1318/9616)
Step: 650 | Loss: 4.67309 | Acc: 13.988% (1457/10416)
Step: 700 | Loss: 4.63492 | Acc: 14.087% (1580/11216)
Step: 750 | Loss: 4.59517 | Acc: 14.248% (1712/12016)
Step: 800 | Loss: 4.55770 | Acc: 14.544% (1864/12816)
Step: 850 | Loss: 4.52923 | Acc: 14.791% (2014/13616)
Step: 900 | Loss: 4.49821 | Acc: 14.942% (2154/14416)
Step: 950

### Resumed training
\*For interrupted training run

In [13]:
# clean memory in case GPU memory is fragmented
torch.cuda.empty_cache() 

# #Resumed train
# print(store_path)

# #path to store output model and test statistics
store_path = '/content/drive/MyDrive/CSCE5218/Dog_Breed_classification/output'

# #temporary store on runtime directory
# store_path = '/content/output'

# #path to load saved model from last run
modelPath = store_path + "/model_irnetv2.pth"

irnet_in = build_model(require_grad=True, num_classes=130)
# irnet_in.load_state_dict(torch.load(modelPath)) # not needed, reload done inside train function

# last epoch from previous run
startEpoch = 0

trained_net, trained_loss = train(nb_epoch=20,             # number of epoch
        batch_size=batchSize_train,         # batch size
        store_name=store_path,     # folder for output
        resume=True,          # resume training from checkpoint
        start_epoch=startEpoch,         # the start epoch number when you resume the training
        model_path=modelPath,         # the saved model where you want to resume the training
        trainloader=train_loader,
        testloader=test_loader,
        batch_size_test=batchSize_test,
        in_net=irnet_in
        )

==> Building model: Inception-Resnet v2..
==>Completed building modelInception-Resnet v2!
GPU is available:  True

Epoch: 0
Step: 0 | Loss: 2.38500 | Acc: 37.500% (6/16)
Step: 50 | Loss: 1.93691 | Acc: 49.142% (401/816)
Step: 100 | Loss: 1.95115 | Acc: 48.515% (784/1616)
Step: 150 | Loss: 1.97620 | Acc: 47.144% (1139/2416)
Step: 200 | Loss: 1.98080 | Acc: 46.859% (1507/3216)
Step: 250 | Loss: 1.97800 | Acc: 46.713% (1876/4016)
Step: 300 | Loss: 1.96709 | Acc: 47.093% (2268/4816)
Step: 350 | Loss: 1.96658 | Acc: 47.133% (2647/5616)
Step: 400 | Loss: 1.96619 | Acc: 47.257% (3032/6416)
Step: 450 | Loss: 1.96724 | Acc: 47.187% (3405/7216)
Step: 500 | Loss: 1.96032 | Acc: 47.517% (3809/8016)
Step: 550 | Loss: 1.97042 | Acc: 47.289% (4169/8816)
Step: 600 | Loss: 1.96954 | Acc: 47.411% (4559/9616)
Step: 650 | Loss: 1.96635 | Acc: 47.437% (4941/10416)
Step: 700 | Loss: 1.95812 | Acc: 47.575% (5336/11216)
Step: 750 | Loss: 1.95582 | Acc: 47.570% (5716/12016)
Step: 800 | Loss: 1.95733 | Acc: 47.



Step: 0 | Loss: 3.703 | Acc: 33.333% (1/3) 

Step: 50 | Loss: 2.843 | Acc: 27.451% (42/153) 

Step: 100 | Loss: 2.735 | Acc: 30.363% (92/303) 

Step: 150 | Loss: 2.722 | Acc: 28.918% (131/453) 

Step: 200 | Loss: 2.790 | Acc: 26.866% (162/603) 

Step: 250 | Loss: 2.748 | Acc: 27.756% (209/753) 

Step: 300 | Loss: 2.749 | Acc: 28.239% (255/903) 

Step: 350 | Loss: 2.737 | Acc: 28.110% (296/1053) 

Step: 400 | Loss: 2.754 | Acc: 28.180% (339/1203) 

Step: 450 | Loss: 2.746 | Acc: 28.677% (388/1353) 

Step: 500 | Loss: 2.733 | Acc: 28.876% (434/1503) 

Step: 550 | Loss: 2.736 | Acc: 29.220% (483/1653) 

Step: 600 | Loss: 2.727 | Acc: 29.118% (525/1803) 

Step: 650 | Loss: 2.709 | Acc: 29.135% (569/1953) 

Step: 700 | Loss: 2.728 | Acc: 28.768% (605/2103) 

Step: 750 | Loss: 2.742 | Acc: 28.362% (639/2253) 

Step: 800 | Loss: 2.744 | Acc: 28.381% (682/2403) 

Step: 850 | Loss: 2.744 | Acc: 28.320% (723/2553) 

Step: 900 | Loss: 2.754 | Acc: 28.265% (764/2703) 

Step: 950 | Loss: 2.745 | Ac

KeyboardInterrupt: ignored

# \* **Debug & adjustment**

In [None]:
# ##checking gpu:
# ##memory footprint support libraries/code
# !ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
# !pip install gputil
# !pip install psutil
# !pip install humanize

# import psutil
# import humanize
# import os
# import GPUtil as GPU

# GPUs = GPU.getGPUs()
# ##XXX: only one GPU on Colab and isn’t guaranteed
# gpu = GPUs[0]
# def printm():
#     process = psutil.Process(os.getpid())
#     print("Gen RAM Free: " + humanize.naturalsize(psutil.virtual_memory().available), " |     Proc size: " + humanize.naturalsize(process.memory_info().rss))
#     print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total     {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
# printm()

In [None]:
# ##model debug and adjustment 

# temp_net = build_model()
# temp_net.load_state_dict(torch.load(modelPath))
# linear_in_features = temp_net.linear_cls.weight.size()[1]
# linear_out_features = 130
# new_linear_cls = nn.Linear(linear_in_features, linear_out_features)
# new_linear_cls.weight.data = temp_net.linear_cls.weight.data[:linear_out_features]
# new_linear_cls.bias.data = temp_net.linear_cls.bias.data[:linear_out_features]
# temp_net.linear_cls = new_linear_cls

In [None]:
# storePath = '/content/drive/MyDrive/CSCE5218/Dog_Breed_classification/output'
# modelName = '/model_irnetv2_130cls.pth'
# modelPath = storePath + modelName
# temp_net.cpu()
# torch.save(temp_net.state_dict(), modelPath)

In [None]:

# #clean memory in case GPU memory is fragmented
# torch.cuda.empty_cache() 

# # ##
# trained_net, trained_loss = train(nb_epoch=20,             # number of epoch
#         batch_size=batchSize_train,         # batch size
#         store_name=store_path,     # folder for output
#         resume=True,          # resume training from checkpoint
#         start_epoch=startEpoch,         # the start epoch number when you resume the training
#         model_path=modelPath,         # the saved model where you want to resume the training
#         trainloader=train_loader,
#         testloader=test_loader,
#         batch_size_test=batchSize_test,
#         in_net=temp_net
#         )

