In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import torch
import numpy as np
from PIL import Image
import tarfile
from torch.utils.data import Dataset,DataLoader
import os
from torchvision import transforms, models
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import xml.etree.ElementTree as ET
from tqdm import tqdm_notebook as tqdm


# Part 1

In [0]:
root_dir = "/content/drive/My Drive/deep_learning" #change to present working directory
labels = os.path.join(root_dir,"ILSVRC2012_bbox_val_v3.tgz") 
images = os.path.join(root_dir,"imagenet2500.tar")
synset = os.path.join(root_dir, "synset_words.txt")
image_folder = "imagespart"
label_folder = "val"


def open_tar(fname):
    try:
        if fname.endswith("tgz"):
            tar = tarfile.open(fname,"r:gz")
        else:
            tar = tarfile.open(fname,"r:tar")
        for item in tar:
            tar.extract(item)
    except FileNotFoundError:
        print("File not found",fname)


class imagenet_dataset(Dataset):
    def __init__(self, image_folder, label_folder, synset, transform=None):
        self.labels = [name for name in os.listdir(label_folder)]
        self.images = [name for name in os.listdir(image_folder)]
        self.i_dir = image_folder
        self.l_dir = label_folder
        self.transform = transform
        self.synset = synset
    
    def get_mapping(self,xml_string):
        tree = ET.parse(xml_string)
        all_elements = tree.findall("*name")

        return all_elements[0].text

    def __len__(self):
        
        return len(self.images)
    
    def get_labels(self):
        to_dict = open(self.synset,"r").read().split("\n")
        label_dict = {}
        for i in to_dict:
            tup = i.split(" ",1)
            if len(tup) > 1:
                label_dict.update({tup[0]:tup[1]})

        return label_dict

    def __getitem__(self, idx):
        label = self.labels[idx]
        
        image_name = self.images[idx]
        image_path = os.path.join(self.i_dir,image_name)
        image = Image.open(image_path).convert("RGB")
        label_path = os.path.join(self.l_dir,image_name.replace("JPEG","xml"))
        label = self.get_labels()[self.get_mapping(label_path)]
        label_idx = list(self.get_labels().values()).index(label)
        if self.transform is not None:
            image = self.transform(image)
        return (image,label_idx)

class rescale(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, sample):
        image = sample
        w = image.size[0]
        h = image.size[1]
        if h > w:
            new_h, new_w = self.size * h / w, self.size
        else:
            new_h, new_w = self.size, self.size * w / h
        new_h, new_w = int(new_h), int(new_w)
        rsz = transforms.Resize((new_h,new_w))

        return rsz(image)


def eval(model,device,dataloader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for data in tqdm(dataloader):
            img, target = data[0].to(device), data[1].to(device)
            output = model(img)
            _, pred = torch.max(output, 1)  # get the index of the max log-probability
            total += target.size(0)
            correct += ((target==pred).sum())
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(dataloader.dataset), 
                                                       100. * correct / len(dataloader.dataset)))



In [0]:
def main():
    if not os.path.isdir(image_folder):
        open_tar(images)
    if not os.path.isdir(label_folder):
        open_tar(labels)
    trans = transforms.Compose([rescale(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor()])
    transformed_dataset = imagenet_dataset(image_folder,label_folder,synset,transform=trans)

    device=torch.device('cuda')
    model = torch.hub.load('pytorch/vision:v0.5.0', 'shufflenet_v2_x1_0', pretrained=True)
    model.to(device)
    dataloader = DataLoader(transformed_dataset, batch_size=32, shuffle=True)

    print("Without Normalization")
    eval(model,device,dataloader)
    
    print("===============================")
    print("With Normalization")
    
    trans_norm = transforms.Compose([rescale(224),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    norm_dataset = imagenet_dataset(image_folder,label_folder,synset,transform=trans_norm)

    norm_dataloader = DataLoader(norm_dataset, batch_size=32, shuffle=True)
    eval(model,device,norm_dataloader)

if __name__ == "__main__":
    main()

Without Normalization


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.5.0


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))



Test set: Accuracy: 907/2500 (36%)

With Normalization


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))



Test set: Accuracy: 1733/2500 (69%)



# Part 2

In [68]:

class ToTensor(object):
    def __call__(self,pic):
        arr_img = np.array(pic)
        new_img = np.divide(arr_img,255.0).transpose((2,0,1))
        ret_val = torch.from_numpy(new_img)
        return ret_val.type(torch.FloatTensor)


class FiveCrop(object):
    def __init__(self,size):
        if type(size) is not tuple:
            self.size = (int(size), int(size))
        else:
            assert len(size) == 2, "Size must be a tuple of length 2"

    def __call__(self,image):
        img_h, img_w = image.size
        crop_h, crop_w = self.size
        crop1 = image.crop((0, 0, crop_w, crop_h))
        crop2 = image.crop((img_w - crop_w, 0, img_w, crop_h))
        crop3 = image.crop((0, img_h - crop_h, crop_w, img_h))
        crop4 = image.crop((img_w - crop_w, img_h - crop_h,
                   img_w, img_h))
        center_crop = transforms.CenterCrop(crop_h)(image)
        return (crop1,crop2,crop3,crop4,center_crop)


class Normalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std
    
    def __call__(self, tensor):
        tensor = tensor.clone()
        std = torch.as_tensor(self.std,  dtype=tensor.dtype, device=tensor.device)
        mean = torch.as_tensor(self.mean,  dtype=tensor.dtype, device=tensor.device)
        return tensor.sub(mean.view(-1,1,1)).div(std.view(-1,1,1))

def five_eval(model,device,dataloader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for data in tqdm(dataloader):
            img, target = data[0].to(device), data[1].to(device)
            bs, ncrops, c, h, w = img.size() #32,5,3,244,244
            output = model(img.view(-1, c, h, w)) #sums over the batches, 160,3,244,244
            avg = output.view(bs, ncrops, -1).mean(1) #average over index 1 (crops)
            _, pred = torch.max(avg, 1)  # get the index of the max log-probability
            total += target.size(0)
            correct += ((target==pred).sum())
            # break
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(dataloader.dataset), 
                                                       100. * correct / len(dataloader.dataset)))


def main():
    if not os.path.isdir(image_folder):
        open_tar(images)
    if not os.path.isdir(label_folder):
        open_tar(labels)
    normalizer=Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    
    #returns a 4D tensor of [batch, image]
    five_trans = transforms.Compose([rescale(280),
                                FiveCrop(224),
                                transforms.Lambda(lambda crops: torch.stack([normalizer(ToTensor()(crop)) for crop in crops])),
                                ])
    five_trans_no_norm = transforms.Compose([rescale(280),
                                FiveCrop(224),
                                transforms.Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])),
                                ])
    five_dataset = imagenet_dataset(image_folder,label_folder,synset,transform=five_trans)
    five_dataset_no = imagenet_dataset(image_folder, label_folder, synset, transform=five_trans_no_norm)
    device=torch.device('cuda')
    dataloader = DataLoader(five_dataset, batch_size=32, shuffle=True)
    dataloader_no = DataLoader(five_dataset_no, batch_size=32, shuffle=True)
    model = torch.hub.load('pytorch/vision:v0.5.0', 'shufflenet_v2_x1_0', pretrained=True)
    model.cuda()
    print("With normalization")
    five_eval(model,device,dataloader)

    print("========================")

    print("Without normalization")
    five_eval(model,device,dataloader_no)

if __name__ == "__main__":
    main()


With normalization


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.5.0


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Test set: Accuracy: 1742/2500 (70%)

Without normalization


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Test set: Accuracy: 764/2500 (31%)



# Part 3

In [69]:
def eval2(model,device,dataloader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for data in tqdm(dataloader):
            img, target = data[0].to(device), data[1].to(device)
            output = model(img)
            _, pred = torch.max(output, 1)  # get the index of the max log-probability
            total += target.size(0)
            correct += ((target==pred).sum())
            # break
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(dataloader.dataset), 
                                                       100. * correct / len(dataloader.dataset)))
    
def main():
    trans = transforms.Compose([rescale(330),
                                transforms.CenterCrop(330),
                                ToTensor(),
                                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    transformed_dataset = imagenet_dataset(image_folder,label_folder,synset,transform=trans)
    print("With shufflenet")
    device=torch.device('cuda')
    model = torch.hub.load('pytorch/vision:v0.5.0', 'shufflenet_v2_x1_0', pretrained=True)
    model.to(device)
    dataloader = DataLoader(transformed_dataset, batch_size=32, shuffle=True)

    eval(model,device,dataloader)
    print("With mobilenet")
    model2 = torch.hub.load('pytorch/vision:v0.5.0', 'mobilenet_v2', pretrained=True)
    model2.to(device)
    eval(model2,device,dataloader)

if __name__=="__main__":
    main()

With shufflenet


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.5.0


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Test set: Accuracy: 1715/2500 (69%)

With mobilenet


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.5.0
Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/checkpoints/mobilenet_v2-b0353104.pth


HBox(children=(IntProgress(value=0, max=14212972), HTML(value='')))

HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Test set: Accuracy: 1813/2500 (73%)

