In [1]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import vgg16, resnet152, alexnet
import torch

import os
import json
from PIL import Image
import numpy as np

In [2]:
args = {"batch_size": 16}
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


In [3]:
class ImageNetDataset(Dataset):
    def __init__(self, image_dir, annotations_file, transformations = None, device = 'cpu'):
        self.image_dir = image_dir
        self.images = os.listdir(self.image_dir)
        self.annotations = json.load(open(annotations_file))
        self.transformations = transformations
        self.device = device

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        img = Image.open(img_path).convert('RGB')
        label = int(self.annotations[self.images[idx].split('.')[0]])
        if self.transformations:
            img = self.transformations(img)
        img = img.to(self.device)
        return img, label


preprocessing = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
])
imagenetValDataset = ImageNetDataset(image_dir = './data/imagenet/ILSVRC/Data/CLS-LOC/val_10p/images/', 
                                    annotations_file='./data/imagenet/ILSVRC/Data/CLS-LOC/val_10p/imagnet_classes.json', 
                                    transformations=preprocessing, 
                                    device = device)

imagenetValDataloader = torch.utils.data.DataLoader(imagenetValDataset, batch_size=args['batch_size'], shuffle=True)


In [4]:
def compute_accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))
    res = []
    for k in topk:
        correct_k = correct[:k].contiguous().view(-1).float().sum(0)
        res.append(correct_k)
    return res

In [5]:
model = vgg16(pretrained = True)
model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
from tqdm import tqdm
accuracy_top1 = 0
accuracy_top5 = 0
count = 0
for batch_idx, (img, label) in enumerate(tqdm(imagenetValDataloader)):
    out = model(img).detach()
    count += out.shape[0]
    top1, top5 = compute_accuracy(out, label, topk = (1, 5))
    accuracy_top1 += top1
    accuracy_top5 += top5
    if(batch_idx % 100 == 0):
        print("Evaluated {}/{}: Top 1 Accuracy: {:.3f} Top 5 Accuracy: {:.3f}".format(str(count), 
                                                         str(len(imagenetValDataset)), accuracy_top1*100.0/count, accuracy_top5*100.0/count))
        
print("Evaluated {}/{}: Top 1 Accuracy: {:.3f} Top 5 Accuracy: {:.3f}".format(str(count), 
                                                         str(len(imagenetValDataset)), accuracy_top1*100.0/count, accuracy_top5*100.0/count))
        
        

  0%|▍                                                                                                                        | 1/313 [00:03<17:43,  3.41s/it]

Evaluated 16/5000: Top 1 Accuracy: 68.750 Top 5 Accuracy: 75.000


 16%|██████████████████▊                                                                                                     | 49/313 [02:44<14:46,  3.36s/it]


KeyboardInterrupt: 