In [None]:
!nvidia-smi

Wed Sep 22 02:36:34 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.63.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   72C    P8    81W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
#!pip install pretrainedmodels
#import pretrainedmodels

In [None]:
import torch
import torchvision
from torchvision import transforms,models
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from skimage import io

import os

import numpy as np
from PIL import Image
import time
import matplotlib.pyplot as plt
from tqdm import tqdm
import copy
import shutil
import logging
import torchvision.utils as utils



from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_curve, roc_auc_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cuda = torch.cuda.is_available()

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
def get_loader(data_path,pin_memory=True,num_workers = 8,batch_size = 256):
    """
    Return the dataloader for train,dev and test, along with number of Classes in dataset
    Args:
        pin_memory: (bool) speed up host to device transfer(load samples on CPU push to GPU on training)
        number_workers: (int) multi-process data loading
        batch_size: (int) load data in batches
    
    Returns:
        dataloaders: (DataLoader) train, test, and dev dataloaders
        num_classses: (int) number of different classes of faces in dataset
    """
    transform = transforms.Compose([transforms.Resize(256),
                                    transforms.CenterCrop(224),
                                    #transforms.Resize(32),
                                    transforms.CenterCrop(224),
                                    transforms.RandomHorizontalFlip(p=0.5),
                                    transforms.RandomCrop(size=(224,224)),
                                    transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
                                    transforms.ToTensor(),##Add more data Augumentation(Select data augumentation -> None or other options like horizontal flip crop)
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])
    # Train Dataset
    train_dataset = torchvision.datasets.ImageFolder(root=data_path+'train/', 
                                                 transform = transform)
    
    # Dev Dataset                                             
    dev_dataset = torchvision.datasets.ImageFolder(root=data_path+'val/', 
                                               transform = transform)
    # Test Dataset
    test_dataset = torchvision.datasets.ImageFolder(root=data_path+'test/', 
                                               transform = transform)
    
    #Trainloader
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, 
                                               shuffle=True, num_workers=num_workers,pin_memory=True)

    
    # Dev Loader
    dev_dataloader = torch.utils.data.DataLoader(dev_dataset, batch_size=batch_size, 
                                             shuffle=True, num_workers=num_workers, pin_memory=True)

    # Test Loader
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=10, 
                                             shuffle=False, num_workers=num_workers, pin_memory=True)
    
    return train_dataloader, dev_dataloader,test_dataloader, len(train_dataset.classes)

In [None]:
class MyVgg(nn.Module):
  '''
  PreTrained VGG model(fixed Feature Extractor) with
  Attention Mechanism
  https://arxiv.org/pdf/1804.02391v2.pdf
  '''
  def __init__(self):
    super(MyVgg,self).__init__()
    vgg = models.vgg19_bn(pretrained=True)
    vgg_lay = []
    for x in vgg.children():
      vgg_lay.append(x)
    self.conv_feat = nn.Sequential(*vgg_lay[0],vgg_lay[1])
    self.attention_model = nn.Sequential(nn.Conv2d(512,512,kernel_size=3,padding=1),nn.Conv2d(512,512,kernel_size=1))
    self.fc_1 = nn.Linear(512*7*7,4096)
    self.fc_final = nn.Linear(4608,2)
  
  def forward(self,x):
    f_mn = self.conv_feat(x)
    w_mn = self.attention_model(f_mn)
    b,c,w,h = w_mn.shape
    m_mn = F.softmax(w_mn.view(b,c,-1),dim=2).view(b,c,w,h)
    f_att_all = f_mn + m_mn * f_mn
    x = torch.flatten(f_att_all, 1)
    x = self.fc_1(x)
    batch,chan,_,_ = f_att_all.shape
    x1 = F.adaptive_avg_pool2d(f_att_all,output_size=1).view(batch,chan)
    x = torch.cat([x,x1],dim=1)
    x = self.fc_final(x)
    return [x,m_mn]

  def freeze(self):   
    for p in self.conv_feat.parameters():
      p.requires_grad = False


In [None]:
class RunningAverage():
    """A simple class that maintains the running average of a quantity
    
    Example:
    ```
    loss_avg = RunningAverage()
    loss_avg.update(2)
    loss_avg.update(4)
    loss_avg() = 3
    ```
    """
    def __init__(self):
        self.steps = 0
        self.total = 0
    
    def update(self, val):
        self.total += val
        self.steps += 1
    
    def __call__(self):
        return self.total/float(self.steps)


def save_checkpoint(state, is_best, checkpoint):
    """Saves model and training parameters at checkpoint + 'last.pth.tar'. If is_best==True, also saves
    checkpoint + 'best.pth.tar'
    Args:
        state: (dict) contains model's state_dict, may contain other keys such as epoch, optimizer state_dict
        is_best: (bool) True if it is the best model seen till now
        checkpoint: (string) folder where parameters are to be saved
    """
    filepath = os.path.join(checkpoint, 'last.pth.tar')
    if not os.path.exists(checkpoint):
        print("Checkpoint Directory does not exist! Making directory {}".format(checkpoint))
        os.mkdir(checkpoint)
    else:
        print("Checkpoint Directory exists! ")
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, 'best.pth.tar'))

def load_checkpoint(checkpoint, model, optimizer=None,scheduler=None):
    """Loads model parameters (state_dict) from file_path. If optimizer is provided, loads state_dict of
    optimizer assuming it is present in checkpoint.
    Args:
        checkpoint: (string) filename which needs to be loaded
        model: (torch.nn.Module) model for which the parameters are loaded
        optimizer: (torch.optim) optional: resume optimizer from checkpoint
    """
    if not os.path.exists(checkpoint):
        raise("File doesn't exist {}".format(checkpoint))
    #checkpoint = torch.load(checkpoint)
    checkpoint = torch.load(checkpoint,map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])

    if optimizer:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        #optimizer2.load_state_dict(checkpoint['optimizer_closs_state_dict'])
    
    if scheduler:
      scheduler.load_state_dict(checkpoint["scheduler_save"])

    return checkpoint

def set_logger(log_path):
    """Set the logger to log info in terminal and file `log_path`.
    In general, it is useful to have a logger so that every output to the terminal is saved
    in a permanent file. Here we save it to `model_dir/train.log`.
    Example:
    ```
    logging.info("Starting training...")
    ```
    Args:
        log_path: (string) where to log
    """
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    if not logger.handlers:
        # Logging to a file
        file_handler = logging.FileHandler(log_path)
        file_handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s'))
        logger.addHandler(file_handler)

        # Logging to console
        stream_handler = logging.StreamHandler()
        stream_handler.setFormatter(logging.Formatter('%(message)s'))
        logger.addHandler(stream_handler)

In [None]:
def cross_entropy(output, labels):
    """
    Loss for Classification
    Args:
        output: (Tensor) Tensor of Prediction made by the model for face class
        labels: (Tensor) True labels for each batch of faces
    """
    return F.cross_entropy(output, labels)

In [None]:
def one_epoch(epoch,net,loader,optimizer,images_disp):
  net.train()
  running_loss = 0.0
  n = 0
  correct = 0
  total = 0
  loss_avg = RunningAverage()
  with tqdm(total=len(loader)) as t:
    for i,(inputs,labels) in enumerate(loader):
      
      inputs = inputs.to(device)
      labels = labels.to(device)
      if i == 0:
        images_disp.append(inputs[0:36,:,:,:])
      optimizer.zero_grad()
      outputs,_ = net(inputs)
      #outputs = net(inputs)
      loss = cross_entropy(outputs,labels)
      running_loss += loss.item()
      loss.backward()
      optimizer.step()
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
      torch.cuda.empty_cache()
      loss_avg.update(loss.item())

      del inputs
      del labels
      del loss
      t.set_postfix(loss='{:05.3f}'.format(loss_avg()))
      t.update()
  avg_loss = running_loss / total
  acc = correct / total *100
  return avg_loss,acc

In [None]:
def infer_classfication(net, loader,images_disp):
    net.eval()
    running_loss = 0.0
    n = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (inputs,labels) in enumerate(loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            if i == 0:
              images_disp.append(inputs[0:36,:,:,:])
            outputs,_ = net(inputs)
            #outputs = net(inputs)
            loss = cross_entropy(outputs,labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del inputs
            del labels

    acc = correct / total * 100
    avg_loss = running_loss / total
    return avg_loss, acc

In [None]:
def train_step(net,loader,dev_loader,optimizer,scheduler,epochs):
  net.train()
  train_losses = []
  valid_losses = []
  valid_acc = []
  auc_acc = []

  best_acc = 0.0
  is_best = False
  for epoch in range(epochs):
    images_disp = []
    for prarm_group in optimizer.param_groups:
      print("Current lr: \t{}".format(prarm_group["lr"]))
      writer.add_scalar('train/learning_rate', optimizer.param_groups[0]['lr'], epoch)
  
    avg_loss_t, acc_t = one_epoch(epoch, net, loader, optimizer,images_disp)

    print(f'\n\n*************\n')
    print('Epoch [%d], loss: %.8f, acc: %.4f' %
                (epoch + 1, avg_loss_t, acc_t))
    avg_loss_v, acc_v = infer_classfication(net, dev_loader,images_disp)
    writer.add_scalar('train/accuracy', acc_t, epoch)
    writer.add_scalar('test/accuracy', acc_v, epoch)
    writer.add_scalar('train/loss', avg_loss_t, epoch)
    writer.add_scalar('test/loss', avg_loss_v, epoch)
    I_train = utils.make_grid(images_disp[0], nrow=6, normalize=True, scale_each=True)
    writer.add_image('train/image', I_train, epoch)
    if epoch == 0:
      I_test = utils.make_grid(images_disp[1], nrow=6, normalize=True, scale_each=True)
      writer.add_image('test/image', I_test, epoch)
    '''
    min_up_factor = 8
    vis_fun = visualize_attn_softmax
    #vis_fun = visualize_attn_sigmoid
    __, c1, c2, c3 = net(images_disp[0])
    
    if c1 is not None:
        attn1 = vis_fun(I_train, c1, up_factor=min_up_factor, nrow=6)
        writer.add_image('train/attention_map_1', attn1, epoch)
    if c2 is not None:
        attn2 = vis_fun(I_train, c2, up_factor=min_up_factor*2, nrow=6)
        writer.add_image('train/attention_map_2', attn2, epoch)
    if c3 is not None:
        attn3 = vis_fun(I_train, c3, up_factor=min_up_factor*4, nrow=6)
        writer.add_image('train/attention_map_3', attn3, epoch)
    # test data
    
    __, c1, c2, c3 = net(images_disp[1])
    if c1 is not None:
        attn1 = vis_fun(I_test, c1, up_factor=min_up_factor, nrow=6)
        writer.add_image('test/attention_map_1', attn1, epoch)
    if c2 is not None:
        attn2 = vis_fun(I_test, c2, up_factor=min_up_factor*2, nrow=6)
        writer.add_image('test/attention_map_2', attn2, epoch)
    if c3 is not None:
        attn3 = vis_fun(I_test, c3, up_factor=min_up_factor*4, nrow=6)
        writer.add_image('test/attention_map_3', attn3, epoch)
    '''
    valid_losses.append(avg_loss_v)
    valid_acc.append(acc_v)   
    print('[Classification valid] loss: %.8f, acc: %.4f\n\n' % (avg_loss_v, acc_v))
    if acc_v > best_acc:
      is_best = True
    #scheduler.step() # StepLR
    scheduler.step(avg_loss_v) # ReduceonPlateau
    train_losses.append(avg_loss_t)
    print('\n','='*20)
    #print("*** Saving Checkpoint ***\n")
    
    
    save_checkpoint({'epoch': epoch + 1,
                               'model_state_dict': net.state_dict(),
                               'optimizer_state_dict': optimizer.state_dict(),
                                'scheduler_save' : scheduler.state_dict(),
                               'train_loss': train_losses,
                                'dev_loss':valid_losses,
                                'dev_acc': valid_acc,},
                              is_best=is_best,
                              checkpoint = hyper_param["ckpnt_training"]
                              #checkpoint="/content/gdrive/MyDrive/capstone/workings/Project_LocalMachine/checkpoint"
                              )
    
    
  return train_losses, valid_losses,valid_acc

In [None]:
def main():
  # Set the logger
  #set_logger(os.path.join('/content/gdrive/MyDrive/capstone/workings/Project_LocalMachine/checkpoint', 'train.log'))
  # from torch.utils.tensorboard import SummaryWriter
  # writer = SummaryWriter()

  logging.info("Loading the datasets...")
  # fetch dataloaders
  train_dataloader, dev_dataloader,test_dataloader, num_classes = get_loader(hyper_param["data_path"]
                                                                             ,num_workers=hyper_param["num_workers"]
                                                                             ,batch_size = hyper_param["batch_size"])
  logging.info("- done.")
  
  logging.info("Loading the Pretrained Resnet18 Model for Transfer Learning")
  #Custom
  tl_resnet50 = MyVgg()
  tl_resnet50.freeze()
  tl_resnet50.to(device)
  print(tl_resnet50)
  


  #Model Org
  '''
  tl_resnet50 = torchvision.models.vgg19_bn(pretrained=True)
  for param in tl_resnet50.parameters():
    param.requires_grad = False

  num_ftrs = tl_resnet50.classifier[6].in_features
  tl_resnet50.classifier[6] = nn.Linear(num_ftrs, 2)
  print(tl_resnet50)
  tl_resnet50 = tl_resnet50.to(device)
  '''





  #Adam_optimizer = torch.optim.Adam(tl_resnet50.parameters(),lr = hyper_param["lr"])
  Adam_optimizer = optim.SGD(tl_resnet50.parameters(), lr=hyper_param["lr"], momentum=0.9, weight_decay=5e-4)
  scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(Adam_optimizer,mode='min',patience=3)
  #scheduler = None
  
  '''
  checkpoint_path =hyper_param['ckpnt_training']+'/last.pth.tar'
  #checkpoint_path = "/content/gdrive/MyDrive/capstone/workings/Project_LocalMachine/checkpoint/last.pth.tar"
  loaded_checkpoint = load_checkpoint(checkpoint_path,tl_resnet50,Adam_optimizer,scheduler)
  train_loss,dev_loss,d_acc = loaded_checkpoint["train_loss"],loaded_checkpoint["dev_loss"], loaded_checkpoint["dev_acc"]
  print('='*20)
  print(train_loss,dev_loss,d_acc)
  print('='*20)
  '''
  
  logging.info(f"Starting training for {hyper_param['numEpochs']} epoch(s)")
  train_losses, valid_losses, valid_acc = train_step(tl_resnet50, train_dataloader, dev_dataloader,Adam_optimizer,scheduler,hyper_param["numEpochs"])

In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [None]:
if __name__ == "__main__":
  hyper_param = {
    "lr": 1e-3,
    "batch_size": 64,
    "numEpochs": 100, 
    "num_workers": 2,
    "ckpnt_training": "/content/gdrive/MyDrive/capstone/new_checkpoint",
    #"ckpnt_training": "/content/gdrive/MyDrive/capstone/checkpoint/newdata_checkpoint",
    "checkpoint":"/content/gdrive/MyDrive/capstone/new_checkpoint/last.pth.tar",
    "data_path":"/content/gdrive/MyDrive/capstone/comprs_images_boom/"
    #'data_path':'/content/gdrive/MyDrive/capstone/kag_data/data/'
  }
  main()

MyVgg(
  (conv_feat): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 

100%|██████████| 47/47 [00:48<00:00,  1.03s/it, loss=0.629]



*************

Epoch [1], loss: 0.01002757, acc: 67.3898





[Classification valid] loss: 0.00897197, acc: 77.1739



Checkpoint Directory exists! 
Current lr: 	0.001


100%|██████████| 47/47 [00:53<00:00,  1.14s/it, loss=0.571]



*************

Epoch [2], loss: 0.00909973, acc: 72.4746





[Classification valid] loss: 0.00841669, acc: 78.5326



Checkpoint Directory exists! 
Current lr: 	0.001


100%|██████████| 47/47 [00:54<00:00,  1.17s/it, loss=0.541]



*************

Epoch [3], loss: 0.00862523, acc: 74.7797





[Classification valid] loss: 0.00816805, acc: 77.7174



Checkpoint Directory exists! 
Current lr: 	0.001


100%|██████████| 47/47 [00:53<00:00,  1.14s/it, loss=0.528]



*************

Epoch [4], loss: 0.00841055, acc: 76.3729





[Classification valid] loss: 0.00831109, acc: 77.7174



Checkpoint Directory exists! 
Current lr: 	0.001


100%|██████████| 47/47 [00:53<00:00,  1.14s/it, loss=0.505]



*************

Epoch [5], loss: 0.00805112, acc: 76.6102





[Classification valid] loss: 0.00823471, acc: 78.8043



Checkpoint Directory exists! 
Current lr: 	0.001


100%|██████████| 47/47 [00:54<00:00,  1.16s/it, loss=0.506]



*************

Epoch [6], loss: 0.00806773, acc: 75.9661





[Classification valid] loss: 0.00835148, acc: 76.9022



Checkpoint Directory exists! 
Current lr: 	0.001


100%|██████████| 47/47 [00:56<00:00,  1.21s/it, loss=0.475]



*************

Epoch [7], loss: 0.00756738, acc: 77.7627





[Classification valid] loss: 0.00829376, acc: 76.9022



Checkpoint Directory exists! 
Current lr: 	0.0001


100%|██████████| 47/47 [00:59<00:00,  1.26s/it, loss=0.471]



*************

Epoch [8], loss: 0.00749993, acc: 79.4576





[Classification valid] loss: 0.00827265, acc: 76.3587



Checkpoint Directory exists! 
Current lr: 	0.0001


100%|██████████| 47/47 [00:56<00:00,  1.20s/it, loss=0.457]



*************

Epoch [9], loss: 0.00727825, acc: 79.8644





[Classification valid] loss: 0.00811094, acc: 77.9891



Checkpoint Directory exists! 


KeyboardInterrupt: ignored