### Validation by aspect ratio

Instead of center cropping, sort validation images by aspect ratio. Crop batches of these images based on the closest aspect ratio

In [1]:
import argparse, os, shutil, time, warnings
from datetime import datetime
from pathlib import Path
import numpy as np
import sys

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import models
from fp16util import network_to_half, set_grad, copy_in_params

model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))
#print(model_names)

In [2]:
import matplotlib.pyplot as plt

In [3]:
def get_parser():
    parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
    parser.add_argument('data', metavar='DIR', help='path to dataset')
    parser.add_argument('--save-dir', type=str, default=Path.cwd(), help='Directory to save logs and models.')
    parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet50')
    # parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
    #                     choices=model_names,
    #                     help='model architecture: ' +
    #                     ' | '.join(model_names) +
    #                     ' (default: resnet18)')
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--epochs', default=45, type=int, metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('--warmup', default=0, type=int, metavar='N',
                        help='number of additional epochs to warmup')
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='manual epoch number (useful on restarts)')
    parser.add_argument('-b', '--batch-size', default=256, type=int,
                        metavar='N', help='mini-batch size (default: 256)')
    parser.add_argument('--lr', '--learning-rate', default=0.4, type=float,
                        metavar='LR', help='initial learning rate')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum')
    parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)')
    parser.add_argument('--print-freq', '-p', default=10, type=int,
                        metavar='N', help='print frequency (default: 10)')
    parser.add_argument('--resume', default='', type=str, metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--small', action='store_true', help='start with smaller images')
    parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                        help='evaluate model on validation set')
    parser.add_argument('--pretrained', dest='pretrained', action='store_true', help='use pre-trained model')
    parser.add_argument('--fp16', action='store_true', help='Run model fp16 mode.')
    parser.add_argument('--dp', action='store_true', help='Run model fp16 mode.')
    parser.add_argument('--sz',       default=224, type=int, help='Size of transformed image.')
    parser.add_argument('--decay-int', default=30, type=int, help='Decay LR by 10 every decay-int epochs')
    parser.add_argument('--loss-scale', type=float, default=1,
                        help='Loss scaling, positive power of 2 values can improve fp16 convergence.')
    parser.add_argument('--prof', dest='prof', action='store_true', help='Only run a few iters for profiling.')

    parser.add_argument('--distributed', action='store_true', help='Run distributed training')
    parser.add_argument('--dist-url', default='file://sync.file', type=str,
                        help='url used to set up distributed training')
    parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend')
    parser.add_argument('--local_rank', default=0, type=int,
                        help='Used for multi-process training. Can either be manually set ' +
                        'or automatically set by using \'python -m multiproc\'.')
    return parser

In [4]:
from pathlib import Path

In [5]:
import pickle

In [6]:
resume_path = str(Path.home()/'7-8x_train_lr3d2_e68_b128_93_success/model_best.pth.tar')

In [7]:
cudnn.benchmark = True
args = get_parser().parse_args(['/home/paperspace/data/imagenet', '--evaluate', '--resume', resume_path])
if args.local_rank > 0: sys.stdout = open(f'{args.save_dir}/GPU_{args.local_rank}.log', 'w')

## Validation dataset

In [8]:
from torch.utils.data.sampler import Sampler
import torchvision

In [9]:
import os.path

def sort_ar(valdir):
    if os.path.isfile('sorted_idxar.p'): return pickle.load(open('sorted_idxar.p', 'rb'))
    val_dataset = datasets.ImageFolder(valdir)
    sizes = [img[0].size for img in val_dataset]
    idx_ar = [(i, round(s[0]/s[1], 5)) for i,s in enumerate(sizes)]
    sorted_idxar = sorted(idx_ar, key=lambda x: x[1])
    pickle.dump(sorted_idxar, open('sorted_idxar.p', 'wb'))
    return sorted_idxar

def chunks(l, n):
    n = max(1, n)
    return (l[i:i+n] for i in range(0, len(l), n))

def map_idx2ar(idx_ar_sorted, batch_size):
    ar_chunks = list(chunks(idx_ar_sorted, batch_size))
    idx2ar = {}
    for chunk in ar_chunks:
        idxs, ars = list(zip(*chunk))
        mean = round(np.mean(ars), 5)
        for idx in idxs:
            idx2ar[idx] = mean
    return idx2ar

In [31]:
class ValDataset(datasets.ImageFolder):
    def __init__(self, root, transform=None, target_transform=None, ar_transform=None):
        super().__init__(root, transform, target_transform)
        self.ar_transform = ar_transform
    def __getitem__(self, index):
        path, target = self.imgs[index]
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.ar_transform(sample, index)
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)

        return sample, target

class ARSampler(Sampler):
    def __init__(self, indices): self.indices = indices
    def __len__(self): return len(self.indices)
    def __iter__(self): return iter(self.indices)
    

class CropArTfm(object):
    def __init__(self, idx2ar, target_size):
        self.idx2ar, self.target_size = idx2ar, target_size
    def __call__(self, img, idx):
        target_ar = self.idx2ar[idx]
        if target_ar < 1: 
            w = int(self.target_size/target_ar)
            size = (w//8*8, self.target_size)
        else: 
            h = int(self.target_size*target_ar)
            size = (self.target_size, h//8*8)
        return torchvision.transforms.functional.center_crop(img, size)

In [28]:
def create_validation_set(valdir, batch_size, target_size, tensor_tfm, use_val_sampler, use_ar_sampler):
    idx_ar_sorted = sort_ar(valdir)
    idx_sorted, _ = zip(*idx_ar_sorted)
    idx2ar = map_idx2ar(idx_ar_sorted, batch_size)
    
    if use_ar_sampler:
        val_dataset = ValDataset(valdir, transforms.Compose(tensor_tfm), ar_transform=CropArTfm(idx2ar, target_size))
        val_sampler = ARSampler(idx_sorted)
        return val_dataset, val_sampler
    
    val_tfms = [transforms.Resize(int(args.sz*1.14)), transforms.CenterCrop(args.sz)] + tensor_tfm
    val_dataset = datasets.ImageFolder(valdir,  transforms.Compose(val_tfms))
    val_sampler = None
    if use_val_sampler and args.distributed:
        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    return val_dataset, val_sampler
    

In [29]:
def get_loaders(traindir, valdir, bs, sz, val_bs=None, use_val_sampler=True, use_ar_sampler=False, min_scale=0.08):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    tensor_tfm = [transforms.ToTensor(), normalize]
    
    train_dataset = datasets.ImageFolder(
        traindir, transforms.Compose([
            transforms.RandomResizedCrop(sz, scale=(min_scale, 1.0)),
            transforms.RandomHorizontalFlip(),
        ] + tensor_tfm))
    train_sampler = (torch.utils.data.distributed.DistributedSampler(train_dataset) if args.distributed else None)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=bs, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_bs = val_bs or bs
    val_dataset, val_sampler = create_validation_set(valdir, val_bs, sz, tensor_tfm, use_val_sampler, use_ar_sampler)
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=val_bs, shuffle=False,
        num_workers=args.workers, pin_memory=True, sampler=val_sampler)

    return train_loader,val_loader,train_sampler,val_sampler

## Main method

In [13]:
# item() is a recent addition, so this helps with backward compatibility.
def to_python_float(t):
    if hasattr(t, 'item'):
        return t.item()
    else:
        return t[0]

class data_prefetcher():
    def __init__(self, loader, prefetch=True):
        self.loader,self.prefetch = iter(loader),prefetch
        if prefetch:
            self.stream = torch.cuda.Stream()
            self.preload()

    def preload(self):
        try:
            self.next_input, self.next_target = next(self.loader)
        except StopIteration:
            self.next_input = None
            self.next_target = None
            return
        with torch.cuda.stream(self.stream):
            self.next_input = self.next_input.cuda(async=True)
            self.next_target = self.next_target.cuda(async=True)

    def next(self):
        if not self.prefetch:
            input,target = next(self.loader)
            return input.cuda(async=True),target.cuda(async=True)

        torch.cuda.current_stream().wait_stream(self.stream)
        input = self.next_input
        target = self.next_target
        self.preload()
        return input, target

def validate(val_loader, model, criterion, epoch, start_time):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    end = time.time()

    prefetcher = data_prefetcher(val_loader)
    input, target = prefetcher.next()
    i = -1
    while input is not None:
        i += 1

        target = target.cuda(async=True)
        input_var = Variable(input)
        target_var = Variable(target)

        # compute output
        with torch.no_grad():
            output = model(input_var)
            loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))

        if args.distributed:
            reduced_loss = reduce_tensor(loss.data)
            prec1 = reduce_tensor(prec1)
            prec5 = reduce_tensor(prec5)
        else:
            reduced_loss = loss.data
            

        losses.update(to_python_float(reduced_loss), input.size(0))
        top1.update(to_python_float(prec1), input.size(0))
        top5.update(to_python_float(prec5), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if args.local_rank == 0 and i % args.print_freq == 0:
            output = ('Test: [{0}/{1}]\t' \
                    + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
                    + 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
                    + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' \
                    + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})').format(
                    i, len(val_loader), batch_time=batch_time, loss=losses,
                    top1=top1, top5=top5)
            print(output)
            with open(f'{args.save_dir}/full.log', 'a') as f:
                f.write(output + '\n')

        input, target = prefetcher.next()

    time_diff = datetime.now()-start_time
    print(f'~~{epoch}\t{float(time_diff.total_seconds() / 3600.0)}\t{top5.avg:.3f}\n')
    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))

    return top1.avg


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, f'{args.save_dir}/model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


def reduce_tensor(tensor):
    rt = tensor.clone()
    dist.all_reduce(rt, op=dist.reduce_op.SUM)
    size = dist.get_world_size()
    # rt /= args.world_size
    rt /= size
    return rt



In [14]:
args.fp16 = True

In [15]:
print("~~epoch\thours\ttop1Accuracy\n")
start_time = datetime.now()

if args.distributed:
    torch.cuda.set_device(args.local_rank)
    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url)
    print('Distributed: init_process_group success')

if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

# create model
#     if args.pretrained: model = models.__dict__[args.arch](pretrained=True)
#     else: model = models.__dict__[args.arch]()
# AS: force use resnet50 for now, until we figure out whether to upload model directory
import resnet
model = resnet.resnet50()

print("Loaded model")

model = model.cuda()
n_dev = torch.cuda.device_count()
if args.fp16: model = network_to_half(model)
if args.distributed: model = nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank)
elif args.dp:
    model = nn.DataParallel(model)
    args.batch_size *= n_dev

~~epoch	hours	top1Accuracy

Loaded model


In [16]:
global param_copy
if args.fp16:
    param_copy = [param.clone().type(torch.cuda.FloatTensor).detach() for param in model.parameters()]
    for param in param_copy: param.requires_grad = True
else: param_copy = list(model.parameters())

# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(param_copy, args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

print("Defined loss and optimizer")

best_prec1 = 0

Defined loss and optimizer


In [17]:
class DWrap(nn.Module):
    def __init__(self, module):
        super().__init__()
        self.module = module
    def forward(self, x):
        return self.module(x)

In [18]:
# Need to wrap inside module - sinde checkpoint had a Distributed wrapper around it
model = DWrap(model)

In [19]:
# optionally resume from a checkpoint
if args.resume:
    if os.path.isfile(args.resume):
        checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(0))
#             checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(args.gpu))
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
    else: print("=> no checkpoint found at '{}'".format(args.resume))

### Custom Data loaders

In [30]:
def tfm_wrapper(fn):
    return lambda x,idx: fn(x)

In [39]:
class ValDataset(datasets.ImageFolder):
    def __init__(self, root, transform=None, target_transform=None, ar_transform=None, resize=None):
        super().__init__(root, transform, target_transform)
        self.ar_transform = ar_transform
        self.resize = resize
    def __getitem__(self, index):
        path, target = self.imgs[index]
        sample = self.loader(path)
        if self.transform is not None:
            if self.resize: sample = self.resize(sample)
            sample = self.ar_transform(sample, index)
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)

        return sample, target


In [40]:
valdir = os.path.join(args.data, 'validation')
val_bs = 128
target_size = 288

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
tensor_tfm = [transforms.ToTensor(), normalize]

idx_ar_sorted = sort_ar(valdir)
idx_sorted, _ = zip(*idx_ar_sorted)
idx2ar = map_idx2ar(idx_ar_sorted, val_bs)

val_dataset_ar = ValDataset(valdir, transforms.Compose(tensor_tfm), ar_transform=CropArTfm(idx2ar, target_size))
val_sampler_ar = ARSampler(idx_sorted)

val_dataset_ar_rs = ValDataset(valdir, transforms.Compose(tensor_tfm), ar_transform=CropArTfm(idx2ar, target_size), resize=transforms.Resize(int(target_size*1.14)))

val_tfms = [transforms.Resize(int(target_size*1.14)), transforms.CenterCrop(target_size)] + tensor_tfm
val_dataset = datasets.ImageFolder(valdir,  transforms.Compose(val_tfms))
# val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)

        

### Test aspect ratio

In [25]:
val_loader = torch.utils.data.DataLoader(
    val_dataset_ar, batch_size=val_bs, shuffle=False,
    num_workers=args.workers, pin_memory=True, sampler=val_sampler_ar)

validate(val_loader, model, criterion, 0, start_time)

Test: [0/391]	Time 5.829 (5.829)	Loss 1.5127 (1.5127)	Prec@1 66.406 (66.406)	Prec@5 87.500 (87.500)
Test: [10/391]	Time 0.128 (1.434)	Loss 1.0293 (1.0241)	Prec@1 79.688 (74.077)	Prec@5 92.969 (92.614)
Test: [20/391]	Time 2.486 (0.962)	Loss 1.0449 (1.0817)	Prec@1 71.094 (72.693)	Prec@5 92.188 (92.225)
Test: [30/391]	Time 0.120 (0.978)	Loss 0.8711 (1.0730)	Prec@1 80.469 (73.236)	Prec@5 92.969 (91.935)
Test: [40/391]	Time 0.148 (0.778)	Loss 1.3506 (1.0414)	Prec@1 64.062 (73.990)	Prec@5 89.062 (92.226)
Test: [50/391]	Time 0.263 (0.680)	Loss 1.2803 (1.0966)	Prec@1 72.656 (73.146)	Prec@5 88.281 (91.376)
Test: [60/391]	Time 0.434 (0.621)	Loss 1.6611 (1.1348)	Prec@1 57.031 (72.080)	Prec@5 85.156 (90.868)
Test: [70/391]	Time 0.113 (0.570)	Loss 1.1621 (1.1244)	Prec@1 72.656 (72.348)	Prec@5 91.406 (91.032)
Test: [80/391]	Time 2.372 (0.593)	Loss 1.0908 (1.1000)	Prec@1 72.656 (72.820)	Prec@5 92.188 (91.310)
Test: [90/391]	Time 0.101 (0.593)	Loss 1.3398 (1.0952)	Prec@1 67.188 (72.879)	Prec@5 90.625 

73.95

### Test original with AR sort

In [37]:
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=val_bs, shuffle=False,
    num_workers=args.workers, pin_memory=True, sampler=val_sampler_ar)

validate(val_loader, model, criterion, 0, start_time)

Test: [0/391]	Time 1.912 (1.912)	Loss 1.3594 (1.3594)	Prec@1 67.188 (67.188)	Prec@5 86.719 (86.719)
Test: [10/391]	Time 0.088 (0.400)	Loss 1.0459 (0.9636)	Prec@1 79.688 (75.000)	Prec@5 89.062 (92.259)
Test: [20/391]	Time 0.088 (0.392)	Loss 0.9121 (1.0274)	Prec@1 75.000 (73.772)	Prec@5 95.312 (91.592)
Test: [30/391]	Time 0.088 (0.350)	Loss 0.8262 (1.0025)	Prec@1 82.031 (74.320)	Prec@5 93.750 (92.087)
Test: [40/391]	Time 0.088 (0.357)	Loss 1.0703 (0.9653)	Prec@1 71.094 (75.305)	Prec@5 92.969 (92.530)
Test: [50/391]	Time 0.090 (0.337)	Loss 1.2402 (1.0169)	Prec@1 69.531 (74.357)	Prec@5 92.969 (91.881)
Test: [60/391]	Time 0.088 (0.340)	Loss 1.7568 (1.0623)	Prec@1 54.688 (73.335)	Prec@5 83.594 (91.304)
Test: [70/391]	Time 0.088 (0.331)	Loss 1.1191 (1.0536)	Prec@1 74.219 (73.537)	Prec@5 89.844 (91.384)
Test: [80/391]	Time 0.088 (0.331)	Loss 0.9688 (1.0258)	Prec@1 75.000 (74.199)	Prec@5 90.625 (91.763)
Test: [90/391]	Time 0.088 (0.324)	Loss 1.0059 (1.0122)	Prec@1 73.438 (74.511)	Prec@5 93.750 

76.144

### Test AR with resize 1.14

In [41]:
val_loader = torch.utils.data.DataLoader(
    val_dataset_ar_rs, batch_size=val_bs, shuffle=False,
    num_workers=args.workers, pin_memory=True, sampler=val_sampler_ar)

validate(val_loader, model, criterion, 0, start_time)

Test: [0/391]	Time 2.462 (2.462)	Loss 1.0381 (1.0381)	Prec@1 73.438 (73.438)	Prec@5 91.406 (91.406)
Test: [10/391]	Time 0.138 (0.494)	Loss 0.9663 (0.8747)	Prec@1 79.688 (77.202)	Prec@5 92.188 (94.247)
Test: [20/391]	Time 0.123 (0.463)	Loss 0.9185 (0.9419)	Prec@1 78.125 (75.930)	Prec@5 94.531 (93.824)
Test: [30/391]	Time 0.121 (0.408)	Loss 0.7988 (0.9348)	Prec@1 85.938 (76.260)	Prec@5 92.969 (93.800)
Test: [40/391]	Time 0.133 (0.411)	Loss 1.0264 (0.9115)	Prec@1 73.438 (77.115)	Prec@5 93.750 (94.074)
Test: [50/391]	Time 0.113 (0.387)	Loss 1.1367 (0.9567)	Prec@1 73.438 (76.149)	Prec@5 91.406 (93.367)
Test: [60/391]	Time 0.113 (0.386)	Loss 1.6260 (0.9970)	Prec@1 56.250 (75.128)	Prec@5 85.938 (92.841)
Test: [70/391]	Time 0.113 (0.373)	Loss 1.0781 (0.9921)	Prec@1 73.438 (75.253)	Prec@5 92.969 (92.848)
Test: [80/391]	Time 0.105 (0.371)	Loss 0.8721 (0.9677)	Prec@1 76.562 (75.791)	Prec@5 93.750 (93.142)
Test: [90/391]	Time 0.109 (0.361)	Loss 0.8960 (0.9565)	Prec@1 78.125 (75.953)	Prec@5 96.094 

76.894