<a href="https://colab.research.google.com/github/charleswongzx/pytorch-data-augmentation/blob/master/pytorch_data_augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Augmentation Exercise

## Important Imports


In [0]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import datasets, transforms
from tqdm import tqdm
import torch.nn.functional as F 
import torch.optim as optim
import numpy as np
from torch.utils.data import SubsetRandomSampler, DataLoader
from torch.utils.data.dataset import Dataset

from PIL import Image
import matplotlib.pyplot as plt

from glob import glob
import xml.etree.ElementTree as ET
import tarfile

%matplotlib inline

## Mounting Gdrive


In [2]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Loading Data

In [0]:
data_folder = 'gdrive/My Drive/Deep Learning/ILSVRC2012/'
img_folder = data_folder+'images/'
label_folder = data_folder+'labels/'

# tar = tarfile.open(data_folder+'imagenet2500.tar')
# tar.extractall(path=img_folder)
# tar.close()

In [0]:
# tar2 = tarfile.open(data_folder+'ILSVRC2012_bbox_val_v3.tgz')
# tar2.extractall(path=label_folder)
# tar2.close()

## Parsing Labels

In [0]:
def parsesynsetwords(filen):
    synsetstoclassdescriptions = {}
    indicestosynsets = {}
    synsetstoindices = {}
    ct = -1
    with open(filen) as f:
        for line in f:
            if (len(line) > 5):
                z = line.strip().split()
                descr = ''
                for i in range(1, len(z)):
                    descr = descr + ' ' + z[i]

                ct += 1
                indicestosynsets[ct] = z[0]
                synsetstoindices[z[0]] = ct
                synsetstoclassdescriptions[z[0]] = descr[1:]
    return indicestosynsets, synsetstoindices, synsetstoclassdescriptions
  
indicestosynsets, synsetstoindices, synsetstoclassdescriptions = parsesynsetwords(data_folder + 'synset_words.txt')
  
def parse_labels(label_files):
  images = []
  labels = {}
  for nm in label_files:
    tree = ET.parse(nm)
    root = tree.getroot()
    
    filename = root.findall('filename')[0]
    for obj in root.findall('object'):
        for name in obj.findall('name'):
          images.append(filename.text)
          labels[filename.text] = name.text
          
  return images, labels

In [0]:
class ILSVRCDataset(Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, image_dir, list_IDs, labels, transform=None):
        'Initialization'
        self.image_dir = image_dir
        self.labels = labels
        self.list_IDs = list_IDs
        self.transform = transform

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        path = self.image_dir + 'imagespart/'+ ID + '.JPEG'
        X = Image.open(path)
        X = X.convert('RGB')
        y = self.labels[ID]
        y = synsetstoindices[y]
        
        if self.transform:
          X = self.transform(X)

        return X, y

In [0]:
def load_data(img_folder, image_list, labels, transforms, batch_size):
  dataset = ILSVRCDataset(img_folder, image_list, labels, transforms)
  loader = DataLoader(dataset, batch_size=batch_size)
  return loader

In [0]:
# dataset_size = len(norm_train_dataset)
# indices = [i for i in range(dataset_size)]

# np.random.shuffle(indices)

# split = int(np.floor(0.2 * dataset_size))
# train_indices, val_indices = indices[split:], indices[:split]

# train_sampler = SubsetRandomSampler(train_indices)
# valid_sampler = SubsetRandomSampler(val_indices)

# train_loader = torch.utils.data.DataLoader(fashion_mnist_data, batch_size=batch_train, sampler=train_sampler)
# valid_loader = torch.utils.data.DataLoader(fashion_mnist_data, batch_size=batch_val, sampler=valid_sampler)

## Test Function

In [0]:
def test(model, device, test_loader, fivecrop=False):
    model.eval()
    
    running_loss = 0
    running_correct = 0
    
    clf_matrix = torch.zeros(1000, 1000)
    
    criterion = torch.nn.CrossEntropyLoss(reduction="sum")

    with torch.no_grad():
        for data, target in tqdm(test_loader):
            target = target.long()
            data, target = data.to(device), target.to(device)
            if fivecrop:
              bs, ncrops, c, h, w = data.size()
              output = model(data.view(-1, c, h, w))
              output = output.view(bs, ncrops, -1).mean(1)
            else:
              output = model(data)

            running_loss += criterion(output, target)
            pred = output.argmax(dim=1, keepdim=True) 
            running_correct += pred.eq(target.view_as(pred)).sum().item()
            
            for t, p in zip(target.view(-1), pred.view(-1)):
                clf_matrix[t.long(), p.long()] += 1
            
            del data, target, output
            torch.cuda.empty_cache()
                
    num_samples = float(len(test_loader.dataset))
    avg_test_loss = running_loss.item()/num_samples

    print('test_loss: {:.4f}, test_accuracy: {}/{} ({:.3f})\n'.format(
        avg_test_loss, running_correct, num_samples,
        running_correct / num_samples))
    
    clf_report = clf_matrix.diag()/clf_matrix.sum(1)
    
    return avg_test_loss, running_correct/num_samples, clf_report

## Task 1

In [0]:
def task1():
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  
  label_files = sorted(glob(label_folder+'val/'+'*.xml'))[0:250]
  print('{} files loaded.'.format(len(label_files)))
        
  image_list, labels = parse_labels(label_files)
  print('Labels parsed.')
  
  transformations = transforms.Compose([
      transforms.Resize(224), 
      transforms.CenterCrop(224),
      transforms.ToTensor()])

  norm_transformations = transforms.Compose([
      transforms.Resize(224), 
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

  densenet = models.densenet161(pretrained=True).to(device)
  print('Model loaded.')

  print("Testing without normalisation")
  loader = load_data(img_folder, image_list, labels, transformations, batch_size=64)
  test(densenet, device, loader)
  torch.cuda.empty_cache()

  print("Testing with normalisation")
  loader = load_data(img_folder, image_list, labels, norm_transformations, batch_size=64)
  test(densenet, device, loader)
  torch.cuda.empty_cache()

In [11]:
task1()

250 files loaded.
Labels parsed.


  nn.init.kaiming_normal(m.weight.data)
  0%|          | 0/6 [00:00<?, ?it/s]

Model loaded.
Testing without normalisation


100%|██████████| 6/6 [00:11<00:00,  1.94s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

test_loss: 1.5121, test_accuracy: 252/376.0 (0.670)

Testing with normalisation


100%|██████████| 6/6 [00:11<00:00,  1.94s/it]

test_loss: 1.2101, test_accuracy: 259/376.0 (0.689)






## Task 2

In [0]:
def task2():
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  
  label_files = sorted(glob(label_folder+'val/'+'*.xml'))[0:250]
  print('{} files loaded.'.format(len(label_files)))
        
  image_list, labels = parse_labels(label_files)
  print('Labels parsed.')
  
  transformations = transforms.Compose([
      transforms.Resize(280), 
      transforms.FiveCrop(224),
      transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops]))])
  norm_transformations = transforms.Compose([
      transforms.Resize(280), 
      transforms.FiveCrop(224),
      transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
      transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(crop) for crop in crops]))])

  densenet = models.densenet161(pretrained=True).to(device)
  print('Model loaded.')
  
  print("Testing performance on fivecrop without normalisation")
  loader = load_data(img_folder, image_list, labels, transformations, batch_size=64)
  test(densenet, device, loader, fivecrop=True)
  torch.cuda.empty_cache()

  print("Testing performance on fivecrop with normalisation")
  loader = load_data(img_folder, image_list, labels, norm_transformations, batch_size=64)
  test(densenet, device, loader, fivecrop=True)
  torch.cuda.empty_cache()

In [13]:
task2()

250 files loaded.
Labels parsed.


  nn.init.kaiming_normal(m.weight.data)
  0%|          | 0/6 [00:00<?, ?it/s]

Model loaded.
Testing performance on fivecrop without normalisation


100%|██████████| 6/6 [00:39<00:00,  6.49s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

test_loss: 1.2182, test_accuracy: 258/376.0 (0.686)

Testing performance on fivecrop with normalisation


100%|██████████| 6/6 [00:39<00:00,  6.55s/it]

test_loss: 1.0085, test_accuracy: 267/376.0 (0.710)






## Task 3

In [0]:
def task3(model):
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  
  label_files = sorted(glob(label_folder+'val/'+'*.xml'))[0:250]
  print('{} files loaded.'.format(len(label_files)))
        
  image_list, labels = parse_labels(label_files)
  print('Labels parsed.')
  
  transformations = transforms.Compose([
      transforms.Resize(330), 
      transforms.FiveCrop(330),
      transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops]))])

  norm_transformations = transforms.Compose([
      transforms.Resize(330), 
      transforms.FiveCrop(330),
      transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
      transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(crop) for crop in crops]))])
  
  model = None
  if model == 'densenet':
    model = models.densenet161(pretrained=True).to(device)
  else:
    model = models.resnet50(pretrained=True).to(device)
    
  print('{} loaded.'.format(model))

  print("Testing performance on fivecrop without normalisation")
  loader = load_data(img_folder, image_list, labels, transformations, batch_size=64)
  test(model, device, loader, fivecrop=True)
  torch.cuda.empty_cache()

  print("Testing performance on fivecrop with normalisation")
  loader = load_data(img_folder, image_list, labels, norm_transformations, batch_size=64)
  test(model, device, loader, fivecrop=True)
  torch.cuda.empty_cache()

In [15]:
task3('densenet')
task3('resnet50')

250 files loaded.
Labels parsed.


  0%|          | 0/6 [00:00<?, ?it/s]

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F




RuntimeError: ignored