# Lecture 3 - Advanced Data Loaders

In [None]:
import time
import torch
import torchvision

import numpy as np
import matplotlib.pyplot as plt

from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader

from torchvision import datasets, transforms

### Save DataLoader to Numpy

In [None]:
# Transform to normalized Tensors 
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

train_dataset = datasets.MNIST('../Datasets/', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST('../Datasets/', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=len(train_dataset))
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))

In [None]:
# Call next on the iterator to get all data.
# .numpy() call can convert the tensors to numpy.
train_dataset_array = next(iter(train_loader))[0].numpy()
train_dataset_array_labels = next(iter(train_loader))[1].numpy()

test_dataset_array = next(iter(test_loader))[0].numpy()
test_dataset_array_labels = next(iter(test_loader))[1].numpy()

In [None]:
train_dataset_array.shape

In [None]:
train_dataset_array_labels.shape

In [None]:
test_dataset_array.shape

In [None]:
test_dataset_array_labels.shape

### Save Numpy array to file

In [None]:
with open('MNIST_train.npy', 'wb') as f:
    np.save(f, train_dataset_array)
    np.save(f, train_dataset_array_labels)

In [None]:
with open('MNIST_test.npy', 'wb') as f:
    np.save(f, test_dataset_array)
    np.save(f, test_dataset_array_labels)

### Load the saved Numpy arrays to memory

In [None]:
with open('MNIST_train.npy', 'rb') as f:
    train_X = np.load(f)
    train_Y = np.load(f)

In [None]:
with open('MNIST_test.npy', 'rb') as f:
    test_X = np.load(f)
    test_Y = np.load(f)

In [None]:
train_X.shape

In [None]:
train_Y.shape

In [None]:
N_train = 64
N_test = 256

t_mnist_assn_1a_train_X = torch.Tensor(train_X)
t_mnist_assn_1a_train_Y = torch.Tensor(train_Y).type(torch.LongTensor)
t_mnist_assn_1a_test_X = torch.Tensor(test_X)
t_mnist_assn_1a_test_Y = torch.Tensor(test_Y).type(torch.LongTensor)

train_data = TensorDataset(t_mnist_assn_1a_train_X, t_mnist_assn_1a_train_Y)
train_loader = DataLoader(train_data, batch_size=N_train, shuffle=True)

test_data = TensorDataset(t_mnist_assn_1a_test_X, t_mnist_assn_1a_test_Y)
test_loader = DataLoader(test_data, batch_size=N_train, shuffle=True)

## Rotate Tensors

In [None]:
def rotate_tensor(_in_tensor, plot=True):
    """
    From: https://github.com/arundasan91/IS7033/tree/master/CNN_invariance
    """
    in_tensor = _in_tensor.clone()
    # Add one more channel to the beginning. Tensor shape = 1,1,28,28
    in_tensor.unsqueeze_(0)
    # Convert to Pytorch variable
    in_tensor = Variable(in_tensor, requires_grad=True)
    
    in_tensor_90 = in_tensor.transpose(2, 3).flip(3)
    in_tensor_180 = in_tensor.flip(2).flip(3)
    in_tensor_270 = in_tensor.transpose(2, 3).flip(2)
    
    if plot:
        plt.figure(1)
        plt.subplot(221)
        plt.gca().set_title('0 degree')
        plt.imshow(in_tensor[0][0].cpu().detach().clone(), cmap='gray')
        plt.subplot(222)
        plt.gca().set_title('+90 degree')
        plt.imshow(in_tensor_90[0][0].cpu().detach().clone(), cmap='gray')
        plt.subplot(223)
        plt.gca().set_title('+270 degree')
        plt.imshow(in_tensor_270[0][0].cpu().detach().clone(), cmap='gray')
        plt.subplot(224)
        plt.gca().set_title('+180 degree')
        plt.imshow(in_tensor_180[0][0].cpu().detach().clone(), cmap='gray')
        plt.tight_layout()
        plt.show()
    return(in_tensor, in_tensor_90, in_tensor_180, in_tensor_270)

In [None]:
test_subset = enumerate(test_loader)
batch_idx, (one_batch_of_test_subset_x, one_batch_of_test_subset_y) = next(test_subset)

In [None]:
number, number_90, number_180, number_270 = rotate_tensor(one_batch_of_test_subset_x[0])

## Custom Rotation Transform

In [None]:
class CustomRotation(object):
    """Rotate image by a fixed angle which is ready for tranform.Compose()
    From: https://github.com/arundasan91/IS7033/tree/master/CNN_invariance
    """

    def __init__(self, degrees, resample=False, expand=False, center=None):
        self.degrees = degrees
        self.resample = resample
        self.expand = expand
        self.center = center

    def __call__(self, img):
        
        return transforms.ToTensor()(
            transforms.functional.rotate(
                transforms.ToPILImage()(img), 
                self.degrees, self.resample, self.expand, self.center))

In [None]:
torch.manual_seed(13)

N_train = 64
N_test = 256

rotation = 45 # Specifies the rotation of images.

# Define the train and test loader
# Here we are adding our CustomRotation function to the transformations
train_loader_rot = torch.utils.data.DataLoader(
    datasets.MNIST('../Datasets/', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       CustomRotation(rotation),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=N_train, shuffle=True)

test_loader_rot = torch.utils.data.DataLoader(
    datasets.MNIST('../Datasets/', train=False, 
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       CustomRotation(rotation),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=N_train, shuffle=True)

In [None]:
test_subset_rot = enumerate(test_loader_rot)
batch_idx, (one_batch_of_test_subset_x_rot, one_batch_of_test_subset_y_rot) = next(test_subset_rot)

In [None]:
fig = plt.figure()
for i in range(6):
    plt.subplot(2,3,i+1)
    plt.tight_layout()
    plt.imshow(one_batch_of_test_subset_x_rot[i][0], cmap='gray', interpolation='none')
    plt.title("Ground Truth: {}".format(one_batch_of_test_subset_y_rot[i]))

### Train on rotated images

In [None]:
input_size = 784
hidden_sizes = [128, 64]
output_size = 10

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))

print(model)

In [None]:
loss_fn = nn.NLLLoss() # also called criterion sometimes.
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
start = time.time()

NUM_EPOCHS = 5
for EPOCH in range(NUM_EPOCHS):
    running_loss = 0
    for images, labels in train_loader_rot:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
    
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = loss_fn(output, labels)
        
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(EPOCH, running_loss/len(train_loader)))
        
print("\nTraining Time (in minutes) =",(time.time()-start)/60)

### Test on normal images

In [None]:
correct_count, all_count = 0, 0
for images,labels in test_loader:
    for i in range(len(labels)):
        img = images[i].view(1, 784)
        # Turn off gradients to speed up this part
        with torch.no_grad():
            logps = model(img)

        # Output of the network are log-probabilities, need to take exponential for probabilities
        ps = torch.exp(logps)
        probab = list(ps.numpy()[0])
        pred_label = probab.index(max(probab))
        true_label = labels.numpy()[i]
        if(true_label == pred_label):
            correct_count += 1
        all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))