In [1]:
!nvidia-smi

Fri Aug 30 11:51:05 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  On   | 00000000:00:1E.0 Off |                    0 |
| N/A   38C    P0    21W / 300W |      0MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

### Helper Functions

In [2]:
from collections import OrderedDict
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil
import time

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

In [3]:
def accuracy(output, target, topk=(1,)):
  """Computes the accuracy over the k top predictions for the specified values of k"""
  with torch.no_grad():
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
      correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
      res.append(correct_k.mul_(100.0 / batch_size))
    return res

class ProgressMeter(object):
  def __init__(self, num_batches, meters, prefix=""):
    self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
    self.meters = meters
    self.prefix = prefix

  def display(self, batch):
    entries = [self.prefix + self.batch_fmtstr.format(batch)]
    entries += [str(meter) for meter in self.meters]
    print('\t'.join(entries))

  def _get_batch_fmtstr(self, num_batches):
    num_digits = len(str(num_batches // 1))
    fmt = '{:' + str(num_digits) + 'd}'
    return '[' + fmt + '/' + fmt.format(num_batches) + ']'

class AverageMeter(object):
  """Computes and stores the average and current value"""
  def __init__(self, name, fmt=':f'):
    self.name = name
    self.fmt = fmt
    self.reset()
    self.epoch_sum = 0
    self.epoch_count = 0
    self.epoch_avg = 0

  def reset(self):
#     self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count
    self.epoch_sum += val * n
    self.epoch_count += n
    self.epoch_avg = self.epoch_sum / self.epoch_count
    
  def __str__(self):
    fmtstr = '{name} {avg' + self.fmt + '} ({epoch_avg' + self.fmt + '})'
    return fmtstr.format(**self.__dict__)
  
def save_checkpoint(state, is_best, filename='checkpoint_conv.pth.tar'):
  torch.save(state, filename)
  if is_best:
    shutil.copyfile(filename, 'model_best_conv.pth.tar')
    
def imshow(img):
  unnormalize = transforms.Normalize((-0.4914/0.247, -0.4822/0.243, -0.4465/0.261), (1/0.247, 1/0.243, 1/0.261))
  img = unnormalize(img)
  npimg = img.numpy()
  plt.imshow(np.transpose(npimg, (1, 2, 0)))
  plt.show()

In [4]:
def summary(model, input_size, batch_size=-1, device="cuda"):

    def register_hook(module):

        def hook(module, input, output):
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)

            m_key = "%s-%i" % (class_name, module_idx + 1)
            summary[m_key] = OrderedDict()
            summary[m_key]["input_shape"] = list(input[0].size())
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [
                    [-1] + list(o.size())[1:] for o in output
                ]
            else:
                summary[m_key]["output_shape"] = list(output.size())
                summary[m_key]["output_shape"][0] = batch_size

            params = 0
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                params += torch.prod(torch.LongTensor(list(module.weight.size())))
                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                params += torch.prod(torch.LongTensor(list(module.bias.size())))
            summary[m_key]["nb_params"] = params

        if (
            not isinstance(module, nn.Sequential)
            and not isinstance(module, nn.ModuleList)
            and not (module == model)
        ):
            hooks.append(module.register_forward_hook(hook))

    device = device.lower()
    assert device in [
        "cuda",
        "cpu",
    ], "Input device is not valid, please specify 'cuda' or 'cpu'"

    if device == "cuda" and torch.cuda.is_available():
        dtype = torch.cuda.FloatTensor
    else:
        dtype = torch.FloatTensor

    # multiple inputs to the network
    if isinstance(input_size, tuple):
        input_size = [input_size]

    # batch_size of 2 for batchnorm
    x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
    print(x[0].shape)
    # print(type(x[0]))

    # create properties
    summary = OrderedDict()
    hooks = []

    # register hook
    model.apply(register_hook)

    # make a forward pass
    # print(x.shape)
    model(*x)

    # remove these hooks
    for h in hooks:
        h.remove()

    print("----------------------------------------------------------------")
    line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
    print(line_new)
    print("================================================================")
    total_params = 0
    total_output = 0
    trainable_params = 0
    for layer in summary:
        # input_shape, output_shape, trainable, nb_params
        line_new = "{:>20}  {:>25} {:>15}".format(
            layer,
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
        )
        total_params += summary[layer]["nb_params"]
        total_output += np.prod(summary[layer]["output_shape"])
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        print(line_new)

    # assume 4 bytes/number (float on cuda).
    total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
    total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))  # x2 for gradients
    total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
    total_size = total_params_size + total_output_size + total_input_size

    print("================================================================")
    print("Total params: {0:,}".format(total_params))
    print("Trainable params: {0:,}".format(trainable_params))
    print("Non-trainable params: {0:,}".format(total_params - trainable_params))
    print("----------------------------------------------------------------")
    print("Input size (MB): %0.2f" % total_input_size)
    print("Forward/backward pass size (MB): %0.2f" % total_output_size)
    print("Params size (MB): %0.2f" % total_params_size)
    print("Estimated Total Size (MB): %0.2f" % total_size)
    print("----------------------------------------------------------------")
    # return summary

### FixRes

In [5]:
# !git clone https://github.com/facebookresearch/FixRes.git

In [5]:
%cd /workspace/FixRes
!ls

/workspace/FixRes
CODE_OF_CONDUCT.md	image			  main_extract.py
CONTRIBUTING.md		imnet_evaluate		  main_finetune.py
Contest			imnet_extract		  main_resnet50_scratch.py
LICENSE.md		imnet_finetune		  requirements.txt
README.md		imnet_resnet50_scratch	  setup.py
ResNeXt_101_32x48d.pth	logs			  transforms_v2.py
data			main_evaluate_imnet.py
hubconf.py		main_evaluate_softmax.py


In [7]:
# !apt  update
# !apt install wget
# !wget https://dl.fbaipublicfiles.com/FixRes_data/FixRes_Pretrained_Models/ResNeXt_101_32x48d.pth

In [6]:
import torch
from imnet_evaluate.resnext_wsl import resnext101_32x48d_wsl

model = resnext101_32x48d_wsl(progress=True)

pretrained_dict = torch.load('ResNeXt_101_32x48d.pth', map_location='cpu')['model']

model_dict = model.state_dict()
for k in model_dict.keys():
  if(('module.'+k) in pretrained_dict.keys()):
    model_dict[k] = pretrained_dict.get(('module.'+k))
    
model.load_state_dict(model_dict)

<All keys matched successfully>

In [7]:
###################################################
## Settings
batch_size = 4
val_ratio = 10000/50000
batch_print_freq = 500
start_epoch = 0
# epochs = 1

###################################################
## Load Data
# dataloaders = {}
# dataloaders['train'], dataloaders['val'] = get_train_val_loaders('./data', batch_size, val_ratio)
# trainloader, _ =  get_train_val_loaders('./data', batch_size, val_ratio)

# classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
from imnet_finetune.transforms import get_transforms
transformation = get_transforms(input_size=320,test_size=320, kind='full', crop=True, need=('train', 'val'), backbone=None)
trainset = torchvision.datasets.ImageFolder('/workspace/data/train', transform=transformation['val'])
trainloader = DataLoader(trainset, batch_size=batch_size, num_workers=2)
print(trainset)

###################################################
## Load Model
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define/load model
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, 10)
# Send model to GPU
model.to(device)

# Define loss function (criterion) and optimizer and LR scheduler
criterion = nn.CrossEntropyLoss()  
# NOTE: define optimizer after sending model to GPU. May lead to error otherwise.
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) 
#   lrscheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Dataset ImageFolder
    Number of datapoints: 1281167
    Root location: /workspace/data/train
    StandardTransform
Transform: Compose(
               Resize(size=365, interpolation=PIL.Image.BILINEAR, largest=False)
               CenterCrop(size=(320, 320))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )


In [8]:
%env CUDNN_LOGINFO_DBG=1
%env CUDNN_LOGDEST_DBG=/workspace/FixRes/logs/cudnn1.log

env: CUDNN_LOGINFO_DBG=1
env: CUDNN_LOGDEST_DBG=/workspace/FixRes/logs/cudnn1.log


In [9]:
## Profiling Training on GPU
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')

# set to train mode
model.train()

trainiter = iter(trainloader)
# specify which batch you want to profile
batches = 1
isProfile = False
for i in range(batches):
    images, target = trainiter.next()
    images = images.to(device)
    target = target.to(device)
  
#     if i == (batches-1):
#         isProfile = True
    
#     with torch.autograd.profiler.profile(enabled=isProfile,use_cuda=True) as prof:
    output = model(images)
    loss = criterion(output, target)
  # compute gradients and do kprop 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  
   # measure accuracy and record loss
    acc1, acc5 = accuracy(output, target, topk=(1, 5))
    losses.update(loss.item(), images.size(0))
    top1.update(acc1[0], images.size(0))
    top5.update(acc5[0], images.size(0))
    
    print(' * TRAIN: Acc@1 {top1.epoch_avg:.3f} Acc@5 {top5.epoch_avg:.3f}'.format(top1=top1, top5=top5))
    
# print(prof)

 * TRAIN: Acc@1 0.000 Acc@5 25.000


In [12]:
!nvidia-smi

Wed Aug 28 06:22:18 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  On   | 00000000:00:1E.0 Off |                    0 |
| N/A   46C    P0    56W / 300W |  15559MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [13]:
trainset = torchvision.datasets.ImageFolder('/workspace/data/train')
print(trainset)

Dataset ImageFolder
    Number of datapoints: 1281167
    Root location: /workspace/data/train


In [14]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(1536, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_s

In [10]:
blocks = []
with open('logs/cudnn1.log', 'r') as logfile:
    while(True):
        line = logfile.readline()
        if not line:
            break
        # do stuff
        if 'cudnnConvolutionForward()' in line:
            block = line
            line = logfile.readline()
            while(line.strip()):
                block += line
                line = logfile.readline()
            blocks.append(block)
print(len(blocks))

104


In [34]:
# with open("logs/convfwd.txt", 'w') as file:
#     for i,block in enumerate(blocks):
#         file.write(str(i+1) + '\n')
#         file.write(block)
#         if i < len(blocks)-1:
#             file.write('\n\n')

In [18]:
import re

convsInfo = []
for i, block in enumerate(blocks):
    conv = {}
    
    lines = block.split('\n')
    
    ## INPUT INFORMATION
    xDescLine = [x for x in range(len(lines)) if 'xDesc' in lines[x]]
    assert len(xDescLine) == 1, "multiple 'xDesc' in block:{}".format(i+1)
    xDescLine = xDescLine[0]
    
    xPatter = re.compile(r'i!\s+dimA: type=int; val=\[(?P<n>\d+),(?P<c>\d+),(?P<h>\d+),(?P<w>\d+)\];')
    # Note: input information is in the 3rd line after xDesc key
    xInfo = xPatter.match(lines[xDescLine + 3])
    if xInfo:
        conv['n'] = int(xInfo.group('n'))
        conv['c'] = int(xInfo.group('c'))
        conv['h'] = int(xInfo.group('h'))
        conv['w'] = int(xInfo.group('w'))
    else:
        print("ERROR: Input dimensions not found at expected spot for block:{}".format(i+1))
        conv['n'] = conv['c'] = conv['h'] = conv['w'] = -1
        
    ## Filter Information
    wDescLine = [x for x in range(len(lines)) if 'wDesc' in lines[x]]
    assert len(wDescLine) == 1, "multiple 'wDesc' in block:{}".format(i+1)
    wDescLine = wDescLine[0]
    
    wPattern = re.compile(r'i!\s+dimA: type=int; val=\[(?P<k>\d+),(?P<i_prime>\d+),(?P<f_h>\d+),(?P<f_w>\d+)\];')
    # NOTE: filter information is in the 4th line after wDesc key.
    wInfo = wPattern.match(lines[wDescLine + 4])
    if wInfo:
#         conv['inDim'] = int(wInfo.group('i_prime')) # this is actually (inDim/groupCount)
        conv['k'] = int(wInfo.group('k'))
        conv['f_h'] = int(wInfo.group('f_h'))
        conv['f_w'] = int(wInfo.group('f_w'))
    else:
        print("ERROR: Filter dimensions not found at expected spot for block:{}.".format(i+1))
        conv['k'] = conv['f_h'] = conv['f_w'] = -1
    
    # Convolution Info
    convDescLine = [x for x in range(len(lines)) if 'convDesc' in lines[x]]
    assert len(convDescLine) == 1, "multiple 'convDesc' in block:{}".format(i+1)
    convDescLine = convDescLine[0]
    
    # Padding
    padPattern = re.compile(r'i!\s+padA: type=int; val=\[(?P<pad_h>\d+),(?P<pad_w>\d+)\];')
    # NOTE: padding information is in the 6th line after convDesc key.
    padInfo = padPattern.match(lines[convDescLine + 6])
    if padInfo:
        conv['pad_h'] = int(padInfo.group('pad_h'))
        conv['pad_w'] = int(padInfo.group('pad_w'))
    else:
        print("ERROR: Padding dimensions not found at expected spot for block:{}.".format(i+1))
        conv['pad_h'] = conv['pad_w'] = -1
    
    # Stride
    stridePattern = re.compile(r'i!\s+strideA: type=int; val=\[(?P<stride_h>\d+),(?P<stride_w>\d+)\];')
    # NOTE: stride info is in the 7th line after convDesc key.
    strideInfo = stridePattern.match(lines[convDescLine + 7])
    if strideInfo:
        conv['stride_h'] = int(strideInfo.group('stride_h'))
        conv['stride_w'] = int(strideInfo.group('stride_w'))
    else:
        print("ERROR: Padding dimensions not found at expected spot for block:{}.".format(i+1))
        conv['stride_h'] = conv['stride_w'] = -1
    
    # Dilation
    dilPattern = re.compile(r'i!\s+dilationA: type=int; val=\[(?P<dil_h>\d+),(?P<dil_w>\d+)\];')
    # NOTE: dilation info is in the 8th line after convDesc key.
    dilInfo = dilPattern.match(lines[convDescLine + 8])
    if dilInfo:
        conv['dil_h'] = int(dilInfo.group('dil_h'))
        conv['dil_w'] = int(dilInfo.group('dil_w'))
    else:
        print("ERROR: Dilation dimensions not found at expected spot for block:{}.".format(i+1))
        conv['dil_h'] = conv['dil_w'] = -1
    
    # Group Count
    groupPattern = re.compile(r'i!\s+groupCount: type=int; val=(?P<groupCount>\d+);')
    # NOTE: groupcount info is in the 9th line after convDesc key.
    groupInfo = groupPattern.match(lines[convDescLine + 9])
    if groupInfo:
        conv['groupCount'] = int(groupInfo.group('groupCount'))
    else:
        print("ERROR: Dilation dimensions not found at expected spot for block:{}.".format(i+1))
        conv['groupCount'] = -1
    
    ## ALGORITHM INFO
    algoLine = [x for x in range(len(lines)) if 'algo' in lines[x]]
    assert len(algoLine) == 1, "multiple 'algo' in block:{}".format(i+1)
    algoLine = algoLine[0]
    algoPattern = re.compile(r'i!\s+algo: type=cudnnConvolutionFwdAlgo_t; val=(?P<algo>[a-zA-Z0-9_ ()]+);')
    algoInfo = algoPattern.match(lines[algoLine])
    if algoInfo:
        conv['fwd_algo'] = algoInfo.group('algo')
    else:
        print("ERROR: Algorithm information not found at expected spot for block:{}.".format(i+1))
        conv['fwd_algo'] = "UNKNOWN"
    # Add to list
    convsInfo.append(conv)

# for item in convsInfo:
#     print(item)
print(len(convsInfo))

{'n': 4, 'c': 3, 'h': 320, 'w': 320, 'k': 64, 'f_h': 7, 'f_w': 7, 'pad_h': 3, 'pad_w': 3, 'stride_h': 2, 'stride_w': 2, 'dil_h': 1, 'dil_w': 1, 'groupCount': 1, 'fwd_algo': 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM (1)'}
{'n': 4, 'c': 64, 'h': 80, 'w': 80, 'k': 1536, 'f_h': 1, 'f_w': 1, 'pad_h': 0, 'pad_w': 0, 'stride_h': 1, 'stride_w': 1, 'dil_h': 1, 'dil_w': 1, 'groupCount': 1, 'fwd_algo': 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM (1)'}
{'n': 4, 'c': 1536, 'h': 80, 'w': 80, 'k': 1536, 'f_h': 3, 'f_w': 3, 'pad_h': 1, 'pad_w': 1, 'stride_h': 1, 'stride_w': 1, 'dil_h': 1, 'dil_w': 1, 'groupCount': 32, 'fwd_algo': 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM (0)'}
{'n': 4, 'c': 1536, 'h': 80, 'w': 80, 'k': 256, 'f_h': 1, 'f_w': 1, 'pad_h': 0, 'pad_w': 0, 'stride_h': 1, 'stride_w': 1, 'dil_h': 1, 'dil_w': 1, 'groupCount': 1, 'fwd_algo': 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM (1)'}
{'n': 4, 'c': 64, 'h': 80, 'w': 80, 'k': 256, 'f_h': 1, 'f_w': 1, 'pad_h': 0, 'pad_w'

In [40]:
import pandas as pd
df = pd.DataFrame(convsInfo)
print(len(df))
df = df.drop_duplicates()
print(len(df))

104
24


In [45]:
# print(df)
distinctConv = df.apply(lambda item: "std::make_tuple({},{},{},{},{},{},{},{},{},{},{},{})".format(item['w'], item['h'], item['c'], item['n'],
                                                                           item['k'], item['f_w'], item['f_h'], item['pad_w'],
                                                                           item['pad_h'], item['stride_w'], item['stride_h'],
                                                                           item['groupCount']), axis=1)

<class 'pandas.core.series.Series'>


In [46]:
# arglist = []
# for item in df:
#     string = "std::make_tuple({},{},{},{},{},{},{},{},{},{},{},{})".format(item['w'], item['h'], item['c'], item['n'],
#                                                                            item['k'], item['f_w'], item['f_h'], item['pad_w'],
#                                                                            item['pad_h'], item['stride_w'], item['stride_h'],
#                                                                            item['groupCount'])
#     arglist.append(string)
 
allconvstr = ",\n".join(distinctConv)
print(allconvstr)

std::make_tuple(320,320,3,4,64,7,7,3,3,2,2,1),
std::make_tuple(80,80,64,4,1536,1,1,0,0,1,1,1),
std::make_tuple(80,80,1536,4,1536,3,3,1,1,1,1,32),
std::make_tuple(80,80,1536,4,256,1,1,0,0,1,1,1),
std::make_tuple(80,80,64,4,256,1,1,0,0,1,1,1),
std::make_tuple(80,80,256,4,1536,1,1,0,0,1,1,1),
std::make_tuple(80,80,256,4,3072,1,1,0,0,1,1,1),
std::make_tuple(80,80,3072,4,3072,3,3,1,1,2,2,32),
std::make_tuple(40,40,3072,4,512,1,1,0,0,1,1,1),
std::make_tuple(80,80,256,4,512,1,1,0,0,2,2,1),
std::make_tuple(40,40,512,4,3072,1,1,0,0,1,1,1),
std::make_tuple(40,40,3072,4,3072,3,3,1,1,1,1,32),
std::make_tuple(40,40,512,4,6144,1,1,0,0,1,1,1),
std::make_tuple(40,40,6144,4,6144,3,3,1,1,2,2,32),
std::make_tuple(20,20,6144,4,1024,1,1,0,0,1,1,1),
std::make_tuple(40,40,512,4,1024,1,1,0,0,2,2,1),
std::make_tuple(20,20,1024,4,6144,1,1,0,0,1,1,1),
std::make_tuple(20,20,6144,4,6144,3,3,1,1,1,1,32),
std::make_tuple(20,20,1024,4,12288,1,1,0,0,1,1,1),
std::make_tuple(20,20,12288,4,12288,3,3,1,1,2,2,32),
std::mak

In [31]:
print(len(arglist))
print(len(set(arglist)))

104
24


In [20]:
summary(model,(3, 320, 320))

torch.Size([2, 3, 320, 320])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 160, 160]           9,408
       BatchNorm2d-2         [-1, 64, 160, 160]             128
              ReLU-3         [-1, 64, 160, 160]               0
         MaxPool2d-4           [-1, 64, 80, 80]               0
            Conv2d-5         [-1, 1536, 80, 80]          98,304
       BatchNorm2d-6         [-1, 1536, 80, 80]           3,072
              ReLU-7         [-1, 1536, 80, 80]               0
            Conv2d-8         [-1, 1536, 80, 80]         663,552
       BatchNorm2d-9         [-1, 1536, 80, 80]           3,072
             ReLU-10         [-1, 1536, 80, 80]               0
           Conv2d-11          [-1, 256, 80, 80]         393,216
      BatchNorm2d-12          [-1, 256, 80, 80]             512
           Conv2d-13          [-1, 256, 80, 80]          16,384
      Batc