In [1]:
%pylab inline
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data.dataloader as dataloader
import torch.optim as optim

from torch.utils.data import TensorDataset
from torch.autograd import Variable
from torchvision import transforms
from torchvision.datasets import MNIST, CIFAR10
from tqdm import tqdm
from time import sleep
import sys, os
import pickle
import cv2

SEED = 1

# CUDA?
cuda = torch.cuda.is_available()

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

Populating the interactive namespace from numpy and matplotlib


In [2]:
# Create DataLoader
# transform = transforms.Compose(
#     [transforms.ToTensor(),
#      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
transform = transforms.ToTensor()

train = CIFAR10('./data', train=True, download=True, transform=transform)
test = CIFAR10('./data', train=False, download=True, transform=transform)

dataloader_args = dict(batch_size=256,num_workers=4, 
                       pin_memory=True) if cuda else dict(batch_size=64)
train_loader = dataloader.DataLoader(train, shuffle=True, **dataloader_args)
test_loader = dataloader.DataLoader(test, shuffle=False, **dataloader_args)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
num_cls = len(classes)
p = next(iter(train_loader))[0].shape[-1]
print('class number: {}'.format(num_cls))
print('image size: {}'.format(p))

Files already downloaded and verified
Files already downloaded and verified
class number: 10
image size: 32


In [3]:
class Model(nn.Module):
    def __init__(self, p):
        super(Model, self).__init__()

        self.conv1 = nn.Sequential(
            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=p, kernel_size=3, padding=1),
            nn.BatchNorm2d(p),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=p, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Sequential(
            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )
        self.pool2 = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05)
        )

        self.conv3 = nn.Sequential(
            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc_layer1 = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True)
        )
        self.fc_layer2 = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)
        )


    def forward(self, x):
        """Perform forward."""
        
        # conv layers
        x_conv1 = self.conv1(x)
        x_conv2 = self.conv2(self.pool1(x_conv1))
        x_conv3 = self.conv3(self.pool2(x_conv2))
        x1 = self.pool3(x_conv3)
        
        # flatten
        x2 = x1.view(x1.size(0), -1)
        
        # fc layer
        x3 = self.fc_layer1(x2)
        x4 = self.fc_layer2(x3)

        return x4, x3, x_conv3, x_conv2, x_conv1
      
model = Model(p)
if cuda:
    model.cuda()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [4]:
save_path = 'cache/models'

In [13]:
# Load pre-trained model
bst_mdl = save_path+'/epoch_7.pth'
model.load_state_dict(torch.load(bst_mdl)['model'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

## Translation invariance test

In [6]:
# move input data to upper left, pad with 0
# move input to center, pad with 0 (as the control group, since resolution drops) ==> also testing scale
_loader = test_loader # train_loader
hit = 0
hit_padded = 0
hit_padded_ctrl = 0
total = 0
with torch.no_grad():
    with tqdm(len(_loader), file=sys.stdout) as pbar:
        for batch_idx, (data, target) in enumerate(_loader):
            padded_data = []
            padded_data_ctrl = []
            for i in range(data.shape[0]):
                cur_data = data[i].permute(1,2,0)
                data_img = cv2.resize(cur_data.numpy(), (p//2, p//2))
                padded_img = np.zeros_like(cur_data)
                padded_img[:p//2, :p//2] = data_img
                padded_data.append(torch.from_numpy(padded_img.transpose(2, 0, 1)[None, ...]))
                padded_img_ctrl = np.zeros_like(cur_data)
                padded_img_ctrl[p//4:3*p//4, p//4:3*p//4] = data_img
                padded_data_ctrl.append(torch.from_numpy(padded_img_ctrl.transpose(2, 0, 1)[None, ...]))
                
#                 plt.imshow(cur_data)
#                 plt.show()
#                 plt.imshow(padded_img)
#                 plt.show()
#                 plt.imshow(padded_img_ctrl)
#                 plt.show()
            
            padded_data = torch.cat(padded_data)
            padded_data_ctrl = torch.cat(padded_data_ctrl)
            if cuda:
                data, padded_data, padded_data_ctrl, target = data.cuda(), \
                padded_data.cuda(), padded_data_ctrl.cuda(), target.cuda()
            pred = model(data)[0].max(1)[1]
            pred_padded = model(padded_data)[0].max(1)[1]
            pred_padded_ctrl = model(padded_data_ctrl)[0].max(1)[1]
            
            hit += pred.eq(target).cpu().sum()
            hit_padded += pred_padded.eq(target).cpu().sum()
            hit_padded_ctrl += pred_padded_ctrl.eq(target).cpu().sum()
            total += len(target)

acc = hit.type(dtype=torch.float64)/total
acc_padded = hit_padded.type(dtype=torch.float64)/total
acc_padded_ctrl = hit_padded_ctrl.type(dtype=torch.float64)/total

print('Test accuracy:{}, padded accuracy:{}, padded control accuracy:{}'.format(acc, acc_padded, acc_padded_ctrl))

0it [00:00, ?it/s]
Test accuracy:0.8204, padded accuracy:0.2043, padded control accuracy:0.2025


In [11]:
# training the model using images being put to upper left
# test using upper-left, center, and lower-right

##### 
# re-run model definition block if want to train from scratch
#####

EPOCHS = 15
losses = []

best_acc = 0
for epoch in range(EPOCHS):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        padded_data = []
        for i in range(data.shape[0]):
            cur_data = data[i].permute(1,2,0)
            data_img = cv2.resize(cur_data.numpy(), (p//2, p//2))
            padded_img = np.zeros_like(cur_data)
            padded_img[:p//2, :p//2] = data_img
            padded_data.append(torch.from_numpy(padded_img.transpose(2, 0, 1)[None, ...]))
        padded_data = torch.cat(padded_data)
        if cuda:
            padded_data, target = padded_data.cuda(), target.cuda()
        
        optimizer.zero_grad()
        # Predict
        y_pred = model(padded_data)[0]

        # Calculate loss
        loss = F.cross_entropy(y_pred, target)
        losses.append(loss.cpu().data)      
        # Backpropagation
        loss.backward()
        optimizer.step()
        
        
        # Display
        if batch_idx % 100 == 1:
            print('\r Train Epoch: {}/{} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                  epoch+1,
                  EPOCHS,
                  batch_idx * len(data),
                  len(train_loader.dataset),
                  100. * batch_idx / len(train_loader), 
                  loss.cpu().data), 
                  end='')
    # Eval
    model.eval()
    hit_ul = 0
    hit_ctr = 0
    hit_lr = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            padded_data_ul = []
            padded_data_ctr = []
            padded_data_lr = []
            for i in range(data.shape[0]):
                cur_data = data[i].permute(1,2,0)
                data_img = cv2.resize(cur_data.numpy(), (p//2, p//2))
                padded_img_ul = np.zeros_like(cur_data)
                padded_img_ul[:p//2, :p//2] = data_img
                padded_data_ul.append(torch.from_numpy(padded_img_ul.transpose(2, 0, 1)[None, ...]))
                padded_img_ctr = np.zeros_like(cur_data)
                padded_img_ctr[p//4:3*p//4, p//4:3*p//4] = data_img
                padded_data_ctr.append(torch.from_numpy(padded_img_ctr.transpose(2, 0, 1)[None, ...]))
                padded_img_lr = np.zeros_like(cur_data)
                padded_img_lr[p//2:, p//2:] = data_img
                padded_data_lr.append(torch.from_numpy(padded_img_lr.transpose(2, 0, 1)[None, ...]))
                
#                 plt.imshow(padded_img_ul)
#                 plt.show()
#                 plt.imshow(padded_img_ctr)
#                 plt.show()
#                 plt.imshow(padded_img_lr)
#                 plt.show()
#                 import pdb; pdb.set_trace()

            padded_data_ul = torch.cat(padded_data_ul)
            padded_data_ctr = torch.cat(padded_data_ctr)
            padded_data_lr = torch.cat(padded_data_lr)
            if cuda:
                padded_data_ul, padded_data_ctr, padded_data_lr, target = \
                padded_data_ul.cuda(), padded_data_ctr.cuda(), padded_data_lr.cuda(), target.cuda()
                
            pred_ul = model(padded_data_ul)[0].max(1)[1]
            pred_ctr = model(padded_data_ctr)[0].max(1)[1]
            pred_lr = model(padded_data_lr)[0].max(1)[1]
            
            hit_ul += pred_ul.eq(target).cpu().sum()
            hit_ctr += pred_ctr.eq(target).cpu().sum()
            hit_lr += pred_lr.eq(target).cpu().sum()
            total += len(target)
            
    accuracy_ul = hit_ul.type(dtype=torch.float64)/total
    accuracy_ctr = hit_ctr.type(dtype=torch.float64)/total
    accuracy_lr = hit_lr.type(dtype=torch.float64)/total

            
    # save best
    if accuracy_ul > best_acc:
        best_acc = accuracy_ul
        torch.save({'epoch': epoch,
                  'model': model.state_dict(),
                  'optimizer': optimizer.state_dict()
                 }, '{}/padded_epoch_{}.pth'.format(save_path, epoch))
        print('\r Best model saved.\r')
      
    print('\r Train Epoch: {}/{} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Upper-Left Accuracy: {:.4f}% \t \
            Center Accuracy: {:.4f}% \t Lower-right Accuracy: {:.4f}%'.format(
        epoch+1,
        EPOCHS,
        len(train_loader.dataset), 
        len(train_loader.dataset),
        100. * batch_idx / len(train_loader), 
        loss.cpu().data,
        accuracy_ul*100,
        accuracy_ctr*100,
        accuracy_lr*100,
        end=''))



## Rotation invariance test

In [20]:
# test using roatated images (90/180/270 degrees)

_loader = test_loader # train_loader
hit = 0
hit_90 = 0
hit_180 = 0
hit_270 = 0
total = 0
with torch.no_grad():
    with tqdm(len(_loader), file=sys.stdout) as pbar:
        for batch_idx, (data, target) in enumerate(_loader):
            data_90 = data.transpose(2, 3).flip(2)
            data_180 = data.flip(2).flip(3)
            data_270 = data.transpose(2, 3).flip(3)
            
#             plt.imshow(data[0].numpy().transpose(1,2,0))
#             plt.show()
#             plt.imshow(data_90[0].numpy().transpose(1,2,0))
#             plt.show()
#             plt.imshow(data_180[0].numpy().transpose(1,2,0))
#             plt.show()
#             plt.imshow(data_270[0].numpy().transpose(1,2,0))
#             plt.show()
#             import pdb; pdb.set_trace()
            
            if cuda:
                data, data_90, data_180, data_270, target = data.cuda(), \
                data_90.cuda(), data_180.cuda(),data_270.cuda(), target.cuda()
            pred = model(data)[0].max(1)[1]
            pred_90 = model(data_90)[0].max(1)[1]
            pred_180 = model(data_180)[0].max(1)[1]
            pred_270 = model(data_270)[0].max(1)[1]
            
            hit += pred.eq(target).cpu().sum()
            hit_90 += pred_90.eq(target).cpu().sum()
            hit_180 += pred_180.eq(target).cpu().sum()
            hit_270 += pred_270.eq(target).cpu().sum()
            total += len(target)

acc = hit.type(dtype=torch.float64)/total
acc_90 = hit_90.type(dtype=torch.float64)/total
acc_180 = hit_180.type(dtype=torch.float64)/total
acc_270 = hit_270.type(dtype=torch.float64)/total

print('Test accuracy:{}, rotate 90 accuracy:{}, rotate 180 accuracy:{}, rotate 270 accuracy:{}'.format(
    acc, acc_90, acc_180, acc_270))

0it [00:00, ?it/s]
Test accuracy:0.8194, rotate 90 accuracy:0.3076, rotate 180 accuracy:0.3185, rotate 270 accuracy:0.2925


## Scale invariance test

In [22]:
# zoom original images to 64*64, and use the center part

_loader = test_loader # train_loader
hit = 0
hit_crop = 0
total = 0
with torch.no_grad():
    with tqdm(len(_loader), file=sys.stdout) as pbar:
        for batch_idx, (data, target) in enumerate(_loader):
            crop_data = []
            for i in range(data.shape[0]):
                cur_data = data[i].permute(1,2,0)
                data_img = cv2.resize(cur_data.numpy(), (int(p*2), int(p*2)))
                crop_img = data_img[p//2:3*p//2, p//2:3*p//2]
                crop_data.append(torch.from_numpy(crop_img.transpose(2, 0, 1)[None, ...]))
                
#                 plt.imshow(cur_data)
#                 plt.show()
#                 plt.imshow(crop_img)
#                 plt.show()
#                 import pdb; pdb.set_trace()
            
            crop_data = torch.cat(crop_data)
            if cuda:
                data, crop_data, target = data.cuda(), crop_data.cuda(), target.cuda()
            pred = model(data)[0].max(1)[1]
            pred_crop = model(crop_data)[0].max(1)[1]
            
            hit += pred.eq(target).cpu().sum()
            hit_crop += pred_crop.eq(target).cpu().sum()
            total += len(target)

acc = hit.type(dtype=torch.float64)/total
acc_crop = hit_crop.type(dtype=torch.float64)/total

print('Test accuracy:{}, zoomed out accuracy:{}'.format(acc, acc_crop))

0it [00:00, ?it/s]
Test accuracy:0.8134, zoomed out accuracy:0.3774


In [6]:
# train on center 16*16, test on 32*32 original

##### 
# re-run model definition block if want to train from scratch
#####

EPOCHS = 15
losses = []

best_acc = 0
for epoch in range(EPOCHS):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        padded_data = []
        for i in range(data.shape[0]):
            cur_data = data[i].permute(1,2,0)
            data_img = cv2.resize(cur_data.numpy(), (p//2, p//2))
            padded_img = np.zeros_like(cur_data)
            padded_img[p//4:3*p//4, p//4:3*p//4] = data_img
            padded_data.append(torch.from_numpy(padded_img.transpose(2, 0, 1)[None, ...]))
            
#             plt.imshow(cur_data)
#             plt.show()
#             plt.imshow(padded_img)
#             plt.show()
#             import pdb; pdb.set_trace()
            
        padded_data = torch.cat(padded_data)
        if cuda:
            padded_data, target = padded_data.cuda(), target.cuda()
        
        optimizer.zero_grad()
        # Predict
        y_pred = model(padded_data)[0]

        # Calculate loss
        loss = F.cross_entropy(y_pred, target)
        losses.append(loss.cpu().data)      
        # Backpropagation
        loss.backward()
        optimizer.step()
        
        
        # Display
        if batch_idx % 100 == 1:
            print('\r Train Epoch: {}/{} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                  epoch+1,
                  EPOCHS,
                  batch_idx * len(data),
                  len(train_loader.dataset),
                  100. * batch_idx / len(train_loader), 
                  loss.cpu().data), 
                  end='')
    # Eval
    model.eval()
    hit = 0
    hit_padded = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            padded_data = []
            for i in range(data.shape[0]):
                cur_data = data[i].permute(1,2,0)
                data_img = cv2.resize(cur_data.numpy(), (p//2, p//2))
                padded_img = np.zeros_like(cur_data)
                padded_img[p//4:3*p//4, p//4:3*p//4] = data_img
                padded_data.append(torch.from_numpy(padded_img.transpose(2, 0, 1)[None, ...]))

            padded_data = torch.cat(padded_data)
            if cuda:
                data, padded_data, target = data.cuda(), padded_data.cuda(), target.cuda()
                
            pred = model(data)[0].max(1)[1]
            pred_padded = model(padded_data)[0].max(1)[1]
            
            hit += pred.eq(target).cpu().sum()
            hit_padded += pred_padded.eq(target).cpu().sum()
            total += len(target)
            
    accuracy = hit.type(dtype=torch.float64)/total
    accuracy_padded = hit_padded.type(dtype=torch.float64)/total

            
    # save best
    if accuracy_padded > best_acc:
        best_acc = accuracy_padded
        torch.save({'epoch': epoch,
                  'model': model.state_dict(),
                  'optimizer': optimizer.state_dict()
                 }, '{}/scale_epoch_{}.pth'.format(save_path, epoch))
        print('\r Best model saved.\r')
      
    print('\r Train Epoch: {}/{} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy: {:.4f}% \t \
            Scaled-up Accuracy: {:.4f}% \t'.format(
        epoch+1,
        EPOCHS,
        len(train_loader.dataset), 
        len(train_loader.dataset),
        100. * batch_idx / len(train_loader), 
        loss.cpu().data,
        accuracy_padded*100,
        accuracy*100,
        end=''))

