In [None]:
!nvidia-smi

### Helper Functions

In [None]:
from collections import OrderedDict
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil
import time

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

In [None]:
def accuracy(output, target, topk=(1,)):
  """Computes the accuracy over the k top predictions for the specified values of k"""
  with torch.no_grad():
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
      correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
      res.append(correct_k.mul_(100.0 / batch_size))
    return res

class ProgressMeter(object):
  def __init__(self, num_batches, meters, prefix=""):
    self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
    self.meters = meters
    self.prefix = prefix

  def display(self, batch):
    entries = [self.prefix + self.batch_fmtstr.format(batch)]
    entries += [str(meter) for meter in self.meters]
    print('\t'.join(entries))

  def _get_batch_fmtstr(self, num_batches):
    num_digits = len(str(num_batches // 1))
    fmt = '{:' + str(num_digits) + 'd}'
    return '[' + fmt + '/' + fmt.format(num_batches) + ']'

class AverageMeter(object):
  """Computes and stores the average and current value"""
  def __init__(self, name, fmt=':f'):
    self.name = name
    self.fmt = fmt
    self.reset()
    self.epoch_sum = 0
    self.epoch_count = 0
    self.epoch_avg = 0

  def reset(self):
#     self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count
    self.epoch_sum += val * n
    self.epoch_count += n
    self.epoch_avg = self.epoch_sum / self.epoch_count
    
  def __str__(self):
    fmtstr = '{name} {avg' + self.fmt + '} ({epoch_avg' + self.fmt + '})'
    return fmtstr.format(**self.__dict__)
  
def save_checkpoint(state, is_best, filename='checkpoint_conv.pth.tar'):
  torch.save(state, filename)
  if is_best:
    shutil.copyfile(filename, 'model_best_conv.pth.tar')
    
def imshow(img):
  unnormalize = transforms.Normalize((-0.4914/0.247, -0.4822/0.243, -0.4465/0.261), (1/0.247, 1/0.243, 1/0.261))
  img = unnormalize(img)
  npimg = img.numpy()
  plt.imshow(np.transpose(npimg, (1, 2, 0)))
  plt.show()

In [None]:
def summary(model, input_size, batch_size=-1, device="cuda"):

    def register_hook(module):

        def hook(module, input, output):
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)

            m_key = "%s-%i" % (class_name, module_idx + 1)
            summary[m_key] = OrderedDict()
            summary[m_key]["input_shape"] = list(input[0].size())
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [
                    [-1] + list(o.size())[1:] for o in output
                ]
            else:
                summary[m_key]["output_shape"] = list(output.size())
                summary[m_key]["output_shape"][0] = batch_size

            params = 0
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                params += torch.prod(torch.LongTensor(list(module.weight.size())))
                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                params += torch.prod(torch.LongTensor(list(module.bias.size())))
            summary[m_key]["nb_params"] = params

        if (
            not isinstance(module, nn.Sequential)
            and not isinstance(module, nn.ModuleList)
            and not (module == model)
        ):
            hooks.append(module.register_forward_hook(hook))

    device = device.lower()
    assert device in [
        "cuda",
        "cpu",
    ], "Input device is not valid, please specify 'cuda' or 'cpu'"

    if device == "cuda" and torch.cuda.is_available():
        dtype = torch.cuda.FloatTensor
    else:
        dtype = torch.FloatTensor

    # multiple inputs to the network
    if isinstance(input_size, tuple):
        input_size = [input_size]

    # batch_size of 2 for batchnorm
    x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
    print(x[0].shape)
    # print(type(x[0]))

    # create properties
    summary = OrderedDict()
    hooks = []

    # register hook
    model.apply(register_hook)

    # make a forward pass
    # print(x.shape)
    model(*x)

    # remove these hooks
    for h in hooks:
        h.remove()

    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
    print(line_new)
    print("================================================================")
    total_params = 0
    total_output = 0
    trainable_params = 0
    for layer in summary:
        # input_shape, output_shape, trainable, nb_params
        line_new = "{:>20}  {:>25} {:>15}".format(
            layer,
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
        )
        total_params += summary[layer]["nb_params"]
        total_output += np.prod(summary[layer]["output_shape"])
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        print(line_new)

    # assume 4 bytes/number (float on cuda).
    total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
    total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))  # x2 for gradients
    total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
    total_size = total_params_size + total_output_size + total_input_size

    print("================================================================")
    print("Total params: {0:,}".format(total_params))
    print("Trainable params: {0:,}".format(trainable_params))
    print("Non-trainable params: {0:,}".format(total_params - trainable_params))
    print("----------------------------------------------------------------")
    print("Input size (MB): %0.2f" % total_input_size)
    print("Forward/backward pass size (MB): %0.2f" % total_output_size)
    print("Params size (MB): %0.2f" % total_params_size)
    print("Estimated Total Size (MB): %0.2f" % total_size)
    print("----------------------------------------------------------------")
    # return summary

### FixRes

In [None]:
# !git clone https://github.com/facebookresearch/FixRes.git

In [None]:
%cd /workspace/FixRes
!ls

In [None]:
# !apt  update
# !apt install wget
# !wget https://dl.fbaipublicfiles.com/FixRes_data/FixRes_Pretrained_Models/ResNeXt_101_32x48d.pth

In [None]:
import torch
from imnet_evaluate.resnext_wsl import *

model = resnext101_32x8d_wsl(progress=False)

# pretrained_dict = torch.load('ResNeXt_101_32x48d.pth', map_location='cpu')['model']

# model_dict = model.state_dict()
# for k in model_dict.keys():
#   if(('module.'+k) in pretrained_dict.keys()):
#     model_dict[k] = pretrained_dict.get(('module.'+k))
    
# model.load_state_dict(model_dict)

In [None]:
###################################################
## Settings
batch_size = 28
# val_ratio = 10000/50000
batch_print_freq = 500
start_epoch = 0
# epochs = 1

###################################################
## Load Data
# dataloaders = {}
# dataloaders['train'], dataloaders['val'] = get_train_val_loaders('./data', batch_size, val_ratio)
# trainloader, _ =  get_train_val_loaders('./data', batch_size, val_ratio)

# classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
from imnet_finetune.transforms import get_transforms
transformation = get_transforms(input_size=320,test_size=320, kind='full', crop=True, need=('train', 'val'), backbone=None)
trainset = torchvision.datasets.ImageFolder('/workspace/data/train', transform=transformation['val'])
trainloader = DataLoader(trainset, batch_size=batch_size, num_workers=2)
print(trainset)

###################################################
## Load Model
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define/load model
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, 10)
# Send model to GPU
model.to(device)

# Define loss function (criterion) and optimizer and LR scheduler
criterion = nn.CrossEntropyLoss()  
# NOTE: define optimizer after sending model to GPU. May lead to error otherwise.
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) 
#   lrscheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
%env CUDNN_LOGINFO_DBG=1
%env CUDNN_LOGDEST_DBG=/workspace/FixRes/logs/cudnn1.log

In [None]:
## Profiling Training on GPU
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')

# set to train mode
model.train()

# batch times
batchTimes = []

trainiter = iter(trainloader)
# specify which batch you want to profile
batches = 1
isProfile = False
for i in range(batches):
    images, target = trainiter.next()
    # Time
    torch.cuda.synchronize()
    start = time.time()
    images = images.to(device)
    target = target.to(device)
  
#     if i == (batches-1):
#         isProfile = True
    
#     with torch.autograd.profiler.profile(enabled=isProfile,use_cuda=True) as prof:
    output = model(images)
    loss = criterion(output, target)
  # compute gradients and do kprop 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # time
    torch.cuda.synchronize()
    end1 = time.time()
    batchTimes.append(end1-start)
    
    # measure accuracy and record loss
    acc1, acc5 = accuracy(output, target, topk=(1, 5))
    losses.update(loss.item(), images.size(0))
    top1.update(acc1[0], images.size(0))
    top5.update(acc5[0], images.size(0))
    
    print(' * TRAIN: Acc@1 {top1.epoch_avg:.3f} Acc@5 {top5.epoch_avg:.3f}'.format(top1=top1, top5=top5))
    
# print(prof)
print(batchTimes)

In [None]:
# ResNeXt101_32x48d - batch 2 - avg time
import pandas as pd
df = pd.DataFrame(batchTimes)
df.mean()
## Nvidia
# [0.540184736251831, 0.5099315643310547, 0.5100128650665283, 0.5107409954071045, 0.5070912837982178, 0.5088832378387451, 0.508690357208252, 0.5116229057312012, 0.509019136428833, 0.5115687847137451, 0.5111806392669678, 0.5110089778900146, 0.5101885795593262, 0.5084385871887207, 0.5120565891265869, 0.5119218826293945, 0.511404275894165, 0.512099027633667, 0.5103232860565186, 0.5099480152130127, 0.5124289989471436, 0.5113327503204346, 0.5095608234405518, 0.5098636150360107, 0.510570764541626, 0.511298418045044, 0.5104150772094727, 0.5117084980010986, 0.5108726024627686, 0.5109071731567383]
# 0.511509

In [None]:
# ResNeXt101_32x8d - batch 28 - avg time
import pandas as pd
df = pd.DataFrame(batchTimes)
df.mean()
## Nvidia
# [0.6732978820800781, 0.6745915412902832, 0.6740171909332275, 0.6741843223571777, 0.6739389896392822, 0.6741132736206055, 0.6734766960144043, 0.6736109256744385, 0.6746206283569336, 0.6754834651947021, 0.6758608818054199, 0.674668550491333, 0.6746973991394043, 0.6754088401794434, 0.6756057739257812, 0.6759140491485596, 0.6750330924987793, 0.6748614311218262, 0.6749403476715088, 0.6774365901947021, 0.6748318672180176, 0.6770632266998291, 0.6754391193389893, 0.6777341365814209, 0.6746277809143066, 0.6782195568084717, 0.6738636493682861, 0.6777245998382568, 0.6749558448791504, 0.6749212741851807]
# 0.675171

In [None]:
print(model)

In [None]:
summary(model, (3,224,224))