<a href="https://colab.research.google.com/github/harvard-visionlab/sroh/blob/main/2022/sroh_2022_run_validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Run Imagnet-1k Validation

For models trained on Imagenet classification, this script will run evaluate accuracy (top1, top5), and cross-entropy loss for the 50,000 imagenet validation images.

## download the validation set 

Run only once at the start of each session. Make sure you are on a GPU runtime! But also note that the download takes time, and Colab might complain that you aren't using the GPU and ask you to switch runtimes. You should just dismiss this suggestion.

In [1]:
!wget -c https://www.dropbox.com/s/6vu07wtshpqpcr2/val.tar.gz
!tar -xf val.tar.gz
!rm val.tar.gz

--2022-07-21 13:52:23--  https://www.dropbox.com/s/6vu07wtshpqpcr2/val.tar.gz
Resolving www.dropbox.com (www.dropbox.com)... 162.125.81.18, 2620:100:6017:18::a27d:212
Connecting to www.dropbox.com (www.dropbox.com)|162.125.81.18|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/6vu07wtshpqpcr2/val.tar.gz [following]
--2022-07-21 13:52:24--  https://www.dropbox.com/s/raw/6vu07wtshpqpcr2/val.tar.gz
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uce29521225558f9250c069972b3.dl.dropboxusercontent.com/cd/0/inline/BpgcJwJR9AW-nq4in-D0jD3L_pmLGZJpeCKDhHuEbBrep3QrMOA-Z1KG2QITk7V0IroCv363oGrH0a5nxzHC87BHczBw0pKdXyxkXd0GC9uCo3WaoeLIDiM5E_in0zrDQZGHXHvKRlSU4P7B0yffULm_9ClDQLVloxWPCaG3PxOPfw/file# [following]
--2022-07-21 13:52:25--  https://uce29521225558f9250c069972b3.dl.dropboxusercontent.com/cd/0/inline/BpgcJwJR9AW-nq4in-D0jD3L_pmLGZJpeCKDhHuEbBrep3QrMOA-Z1KG2QITk7V0IroCv363

## validation code

In [17]:
import torch 
from enum import Enum
from fastprogress.fastprogress import progress_bar 

device = 'cuda' if torch.cuda.is_available() else 'cpu'

@torch.no_grad()
def validate(model, val_loader):
  num_images = len(val_loader.dataset)
  assert num_images==50000, f"Expected 50K images, got {num_images}"  
  criterion = torch.nn.CrossEntropyLoss()
  model.eval()
  losses = AverageMeter('Loss', ':.4e', Summary.NONE)
  top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE)
  top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE)

  for i, (images, target) in enumerate(progress_bar(val_loader)):
      images = images.cuda(device, non_blocking=True)
      target = target.cuda(device, non_blocking=True)

      # compute output
      output = model(images)
      loss = criterion(output, target)

      # measure accuracy and record loss
      acc1, acc5 = accuracy(output, target, topk=(1, 5))
      losses.update(loss.item(), images.size(0))
      top1.update(acc1[0], images.size(0))
      top5.update(acc5[0], images.size(0))

  return top1.avg.item(), top5.avg.item(), losses.avg

class Summary(Enum):
    NONE = 0
    AVERAGE = 1
    SUM = 2
    COUNT = 3

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE):
        self.name = name
        self.fmt = fmt
        self.summary_type = summary_type
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def all_reduce(self):
        total = torch.FloatTensor([self.sum, self.count])
        dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False)
        self.sum, self.count = total.tolist()
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)
    
    def summary(self):
        fmtstr = ''
        if self.summary_type is Summary.NONE:
            fmtstr = ''
        elif self.summary_type is Summary.AVERAGE:
            fmtstr = '{name} {avg:.3f}'
        elif self.summary_type is Summary.SUM:
            fmtstr = '{name} {sum:.3f}'
        elif self.summary_type is Summary.COUNT:
            fmtstr = '{name} {count:.3f}'
        else:
            raise ValueError('invalid summary type %r' % self.summary_type)
        
        return fmtstr.format(**self.__dict__)

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res        

## run validation on Alexnet

The only important thing here is to make sure to use the "correct" validation transforms for your model. This usualy means just making sure that you use the right image size and normalization statistics. For torchvision models, the right transform is:

```
transform = transforms.Compose([
  transforms.Resize(256),
  transforms.CenterCrop(224),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406],
                       std=[0.229, 0.224, 0.225])
])

```

In [3]:
from torchvision import models, datasets, transforms 

transform = transforms.Compose([
  transforms.Resize(256),
  transforms.CenterCrop(224),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406],
                       std=[0.229, 0.224, 0.225])
])
transform

Compose(
    Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [11]:
image_folder = './val'
dataset = datasets.ImageFolder(image_folder, transform=transform)
val_loader = torch.utils.data.DataLoader(dataset, 
                                         batch_size=256, 
                                         shuffle=False, 
                                         num_workers=2, 
                                         pin_memory=True)
dataset

Dataset ImageFolder
    Number of datapoints: 50000
    Root location: ./val
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [12]:
model = models.alexnet(pretrained=True)
model.to(device)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [13]:
top1, top5, loss = validate(model, val_loader)
top1, top5, loss

(tensor(56.5440, device='cuda:0'),
 tensor(79.1080, device='cuda:0'),
 1.9101363607025146)

## supervised models from ipcl project

In [18]:
import torch

# this model is supervised in the usual way,
# except each image was presented 5 times more often (to make a fair
# comparison with IPCL models)
model, transform = torch.hub.load("harvard-visionlab/open_ipcl", 
                                  "alexnetgn_supervised_ref12_augset1_5x")
print(transform)
model.to(device)


Using cache found in /root/.cache/torch/hub/harvard-visionlab_open_ipcl_main


Compose(
    Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


alexnet_gn(
  (conv_block_1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2), bias=False)
    (1): GroupNorm(32, 96, eps=1e-05, affine=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
    (1): GroupNorm(32, 256, eps=1e-05, affine=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): GroupNorm(32, 384, eps=1e-05, affine=True)
    (2): ReLU(inplace=True)
  )
  (conv_block_4): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): GroupNorm(32, 384, eps=1e-05, affine=True)
    (2): ReLU(inplace=True)
  )
  (conv_blo

In [19]:
image_folder = './val'
dataset = datasets.ImageFolder(image_folder, transform=transform)
val_loader = torch.utils.data.DataLoader(dataset, 
                                         batch_size=256, 
                                         shuffle=False, 
                                         num_workers=2, 
                                         pin_memory=True)
dataset

Dataset ImageFolder
    Number of datapoints: 50000
    Root location: ./val
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [20]:
top1, top5, loss = validate(model, val_loader)
top1, top5, loss

(60.947998046875, 82.76799774169922, 1.677472384262085)

In [21]:
# and another version, trained with the usual 1x number of images
model, transform = torch.hub.load("harvard-visionlab/open_ipcl", 
                                  "alexnetgn_supervised_ref13_augset1_1x")
model.to(device)

Using cache found in /root/.cache/torch/hub/harvard-visionlab_open_ipcl_main
Downloading: "https://visionlab-pretrainedmodels.s3.amazonaws.com/project_instancenet/wusnet/alexnet_gn_supervised_final.pth.tar" to /root/.cache/torch/hub/checkpoints/alexnetgn_supervised_ref13_augset1_1x-1ea33cba82.pth.tar


  0%|          | 0.00/476M [00:00<?, ?B/s]

alexnet_gn(
  (conv_block_1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2), bias=False)
    (1): GroupNorm(32, 96, eps=1e-05, affine=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
    (1): GroupNorm(32, 256, eps=1e-05, affine=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): GroupNorm(32, 384, eps=1e-05, affine=True)
    (2): ReLU(inplace=True)
  )
  (conv_block_4): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): GroupNorm(32, 384, eps=1e-05, affine=True)
    (2): ReLU(inplace=True)
  )
  (conv_blo

In [22]:
image_folder = './val'
dataset = datasets.ImageFolder(image_folder, transform=transform)
val_loader = torch.utils.data.DataLoader(dataset, 
                                         batch_size=256, 
                                         shuffle=False, 
                                         num_workers=2, 
                                         pin_memory=True)
dataset

Dataset ImageFolder
    Number of datapoints: 50000
    Root location: ./val
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [23]:
top1, top5, loss = validate(model, val_loader)
top1, top5, loss

(60.593997955322266, 82.19400024414062, 1.7449316347503663)