CIFAR 10 classification:

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images
per class. There are 50000 training images and 10000 test images. The dataset is divided into
five training batches and one test batch, each with 10000 images. Consider 4 training batches
as training set (40000 images), 1 training batch (10000 images) as validation.

(a) Download the dataset (https://www.cs.toronto.edu/~kriz/cifar.html).

In [7]:
import json
import torch
import torchvision
import seaborn as sn
import torchextractor as tx
import torch.optim as optim
import torch.nn as nn
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LinearRegression
from alexnet_pytorch import AlexNet
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import cross_val_score

ModuleNotFoundError: No module named 'torch.nn'

In [None]:
# dataset has PILImage images of range [0, 1]. 
# We transform them to Tensors of normalized range [-1, 1]
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# CIFAR10: 60000 32x32 color images in 6 classes, with 10000 images per class
#Downloading training data
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True, num_workers=2)

#Downloading test data
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

testloader = torch.utils.data.DataLoader(test_data, batch_size=100, shuffle=False, num_workers=2)


#Class labels

classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck')


# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(len(labels))))


AlexNet_model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)


# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

In [None]:

preds=[]
actual=[]

with torch.no_grad():
    for data in testloader:

        images, labels = data[0].to(device), data[1].to(device)
        outputs = AlexNet_model(images)
        _, predicted = torch.max(outputs.data, 1)

        for k in range(len(outputs)):

            probabilities = torch.nn.functional.softmax(outputs[k], dim=0)

            top5_prob, top5_catid = torch.topk(probabilities, 5)

            preds.append(categories[top5_catid[0]])
            actual.append(classes[labels[k]])


In [None]:
cm = confusion_matrix(y_trueTop, y_predTop, labels=classes)
print(cm)

In [None]:
aylabels = classes 
axlabels = classes 

plt.figure(figsize=(10, 10))
sn.set(font_scale=1.4)  # for label size
sn.heatmap(cm, annot=True, annot_kws={"size": 12}, xticklabels=top_10, yticklabels=aylabels, fmt='g')  # font size
plt.show()

In [None]:

AlexNet_model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)

fc6_classifier = nn.Sequential(*list(AlexNet_model.classifier.children())[:-5])
AlexNet_model.classifier = fc6_classifier

AlexNet_model.eval()

In [None]:
model = LinearRegression(normalize=True)
print(model)

xTrain = []
yTrain = []

for i, data in enumerate(trainloader, 0):
    if (i<400):
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = AlexNet_model(inputs)
        out = (outputs.detach().numpy())
        for k in range(len(out)):
            xTrain.append(out[k])
            yTrain.append(labels[k].detach().numpy())

In [None]:
clf = make_pipeline(StandardScaler(), SGDClassifier(max_iter=1000, tol=1e-3))
clf.fit(xTrain, yTrain)

(b) Use one of deep learning frameworks (Pytorch, Tensorflow, Keras) and the AlexNet pretrained model, to classify the images in the CIFAR10 dataset. Construct a confusion
matrix that relates the CIFAR10 classes with the 10 most frequent classes from
ImageNet predicted by the model (30 points). 

In [None]:
inputs, labels = data[0].to(device), data[1].to(device)

correctV = 0
for i, data in enumerate(trainloader, 0):
    if (i>=400):
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = AlexNet_model(inputs)
        out = (outputs.detach().numpy())
        pred = clf.predict(out)
        for t,k in enumerate(pred,0):
            if (k == (labels[t])):
                correctV += 1
print(correctV/10000)

(c) Use one of deep learning frameworks (Pytorch, Tensorflow, Keras) and the AlexNet pretrained model to extract features for all the images in the CIFAR10 dataset. Use the
output of the ’fc6’ layer. Train a linear classifier (logistic regression or linear svm) and
evaluate it, using the train, validation and test partitions suggested for the dataset (30
points).

In [None]:
inputs, labels = data[0].to(device), data[1].to(device)

correct = 0
for i, data in enumerate(testloader, 0):
    if (i==i):
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = AlexNet_model(inputs)
        out = (outputs.detach().numpy())
        pred = clf.predict(out)
        for t,k in enumerate(pred,0):
            if (k == (labels[t])):
                correct += 1


In [None]:
print(correct/10000)

(d) Repeat the previous step, but this time using as features the output of the ’fc7’ layer.
Compare and discuss (40 points).

In [None]:

AlexNet_model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)

fc7_classifier = nn.Sequential(*list(AlexNet_model.classifier.children())[:-2])
AlexNet_model.classifier = fc7_classifier

AlexNet_model.eval()

In [None]:
model = LinearRegression(normalize=True)
print(model)

xTrain = []
yTrain = []

for i, data in enumerate(trainloader, 0):
    if (i<400):
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = AlexNet_model(inputs)
        out = (outputs.detach().numpy())
        for k in range(len(out)):
            xTrain.append(out[k])
            yTrain.append(labels[k].detach().numpy())


In [None]:
clf = make_pipeline(StandardScaler(), SGDClassifier(max_iter=1000, tol=1e-3))
clf.fit(xTrain, yTrain)


In [None]:
inputs, labels = data[0].to(device), data[1].to(device)

correct = 0
for i, data in enumerate(testloader, 0):
    if (i==i):
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = AlexNet_model(inputs)
        out = (outputs.detach().numpy())
        pred = clf.predict(out)#[0].reshape(1, -1))
        for t,k in enumerate(pred,0):
            if (k == (labels[t])):
                correct += 1

In [None]:
print(correct/10000)