# Image Classification Problem [50 marks]

In [None]:
import os
import torch
import torchvision
import tarfile
import pandas as pd
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as tt
from torch.utils.data import random_split
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
import torch.optim as optim
from torchvision import datasets, transforms, models
%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = '/content/drive/My Drive/cifar10_archive/'

In [None]:
# Preparing datasets for further using
# Loading all batches and concatenating them all together
# Plotting first 100 examples of images from 10 different classes
# Preprocessing loaded CIFAR-10 dataset
# Saving datasets into file


"""Importing library for object serialization
which we'll use for saving and loading serialized models"""
import pickle

# Importing other standard libraries
import numpy as np
import os
import matplotlib.pyplot as plt


# Defining function for loading single batch of CIFAR-10 dataset
def single_batch_cifar10(file):
    # Opening file for reading in binary mode
    with open(file, 'rb') as f_single_batch:
        d_single_batch = pickle.load(f_single_batch, encoding='latin1')  # dictionary type, we use 'latin1' for python3
        x = d_single_batch['data']  # numpy.ndarray type, (10000, 3072)
        y = d_single_batch['labels']  # list type
        """Initially every batch's dictionary with key 'data' has shape (10000, 3072)
        Where, 10000 - number of image samples
        3072 - three channels of image (red + green + blue)
        Every row contains an image 32x32 pixels with its three channels"""
        # Here we reshape and transpose ndarray for further use
        # At the same time method 'astype()' used for converting ndarray from int to float
        # It is used further in function 'pre_process_cifar10' as it is needed to subtract float from float
        # And for standard deviation as it is needed to divide float by float
        x = x.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype('float')  # (10000, 32, 32, 3)
        # Making numpy array from list of labels
        y = np.array(y)

        # Returning ready data
        return x, y


# Defining function for loading whole CIFAR-10 dataset
def whole_cifar10():
    # Defining lists for adding all batch's data all together
    x_collect = []
    y_collect = []

    # Defining lists for loading current batch
    x, y = [], []

    # Loading all 5 batches for training and appending them together
    for k in range(1, 6):
        # Preparing current filename
        filename = os.path.join(path, 'datasets/cifar-10-batches-py', 'data_batch_' + str(k))
        # Loading current batch
        x, y = single_batch_cifar10(filename)
        # Appending data from current batch to lists
        x_collect.append(x)
        y_collect.append(y)

    # Concatenating collected data as list of lists as one list
    x_train = np.concatenate(x_collect)  # (50000, 32, 32, 3)
    y_train = np.concatenate(y_collect)  # (50000,)

    # Releasing memory from non-needed anymore arrays
    del x, y

    # Loading data for testing
    filename = os.path.join(path, 'datasets/cifar-10-batches-py', 'test_batch')
    x_test, y_test = single_batch_cifar10(filename)

    # Returning whole CIFAR-10 data for training and testing
    return x_train, y_train, x_test, y_test


# Defining function for preprocessing CIFAR-10 dataset
def pre_process_cifar10():
    # Loading whole CIFAR-10 dataset
    x_train, y_train, x_test, y_test = whole_cifar10()

    # Normalizing whole data by dividing /255.0
    x_train /= 255.0
    x_test /= 255.0

    # # Preparing data for training, validation and testing
    # # Data for validation is taken with 1000 examples from training dataset in range from 49000 to 50000
    # batch_mask = list(range(40000, 50000))
    # x_validation = x_train[batch_mask]  # (1000, 32, 32, 3)
    # y_validation = y_train[batch_mask]  # (1000,)
    # # Data for training is taken with first 49000 examples from training dataset
    # batch_mask = list(range(40000))
    # x_train = x_train[batch_mask]  # (49000, 32, 32, 3)
    # y_train = y_train[batch_mask]  # (49000,)
    # # Data for testing is taken with first 10000 examples from testing dataset
    # batch_mask = list(range(10000))
    # x_test = x_test[batch_mask]  # (1000, 32, 32, 3)
    # y_test = y_test[batch_mask]  # (1000,)



    # Normalizing data by subtracting mean image and dividing by standard deviation
    # Subtracting the dataset by mean image serves to center the data
    # It helps for each feature to have a similar range and gradients don't go out of control
    # Calculating mean image from training dataset along the rows by specifying 'axis=0'
    mean_image = np.mean(x_train, axis=0)  # numpy.ndarray (32, 32, 3)

    # Calculating standard deviation from training dataset along the rows by specifying 'axis=0'
    std = np.std(x_train, axis=0)  # numpy.ndarray (32, 32, 3)
    # Saving calculated 'mean_image' and 'std' into 'pickle' file
    # We will use them when preprocess input data for classifying
    # We will need to subtract and divide input image for classifying
    # As we're doing now for training, validation and testing data
    dictionary = {'mean_image': mean_image, 'std': std}
    with open(path+'datasets/'+'mean_and_std.pickle', 'wb') as f_mean_std:
        pickle.dump(dictionary, f_mean_std)

    # Subtracting calculated mean image from datasets
    x_train -= mean_image
    # x_validation -= mean_image
    x_test -= mean_image

    # Dividing then every dataset by standard deviation
    x_train /= std
    # x_validation /= std
    x_test /= std

    # Transposing every dataset to make channels come first
    x_train = x_train.transpose(0, 3, 1, 2)  # (49000, 3, 32, 32)
    # x_validation = x_validation.transpose(0, 3, 1, 2)  # (1000, 3, 32, 32)
    x_test = x_test.transpose(0, 3, 1, 2)  # (1000, 3, 32, 32)

    # Returning result as dictionary
    d_processed = {'x_train': x_train, 'y_train': y_train, 'x_test': x_test, 'y_test': y_test}

    # Returning dictionary
    return d_processed


# Preprocessing data
data = pre_process_cifar10()
for i, j in data.items():
    print(i + ':', j.shape)

# # Saving loaded and preprocessed data into 'pickle' file
# with open(path+'datasets/'+'data.pickle', 'wb') as f:
#     pickle.dump(data, f)


x_train: (50000, 3, 32, 32)
y_train: (50000,)
x_test: (10000, 3, 32, 32)
y_test: (10000,)


In [None]:
x_train, x_valid, y_train, y_valid = train_test_split(data['x_train'], data['y_train'], test_size=0.2, stratify=data['y_train'], random_state=0)

In [None]:
print(x_train.shape)
print(x_valid.shape)
print(y_train.shape)
print(y_valid.shape)

(40000, 3, 32, 32)
(10000, 3, 32, 32)
(40000,)
(10000,)


In [None]:
class_counts_train = pd.Series(y_train).value_counts()
class_counts_valid = pd.Series(y_valid).value_counts()

print("Train set class distribution:")
print(class_counts_train)
print("\nValidation set class distribution:")
print(class_counts_valid)

Train set class distribution:
7    4000
4    4000
1    4000
5    4000
6    4000
8    4000
9    4000
0    4000
3    4000
2    4000
dtype: int64

Validation set class distribution:
4    1000
3    1000
2    1000
8    1000
5    1000
7    1000
6    1000
0    1000
9    1000
1    1000
dtype: int64


In [None]:
# Data Augmentation Techniques

In [None]:
# Convert the numpy arrays to PyTorch tensors
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
x_valid_tensor = torch.tensor(x_valid, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid, dtype=torch.long)

In [None]:
# Create datasets and data loaders
batch_size = 400
train_dataset = torch.utils.data.TensorDataset(x_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataset = torch.utils.data.TensorDataset(x_valid_tensor, y_valid_tensor)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

In [11]:
# Define data transforms
transform = transforms.Compose([
    transforms.RandomResizedCrop(size=224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load pretrained Resnet-50 model
model = models.resnet50(pretrained=True)

# Freeze the pre-trained layers
for param in model.parameters():
    param.requires_grad = False

# Modify the last layer for CIFAR-10
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)  # 10 classes in CIFAR-10

# Replace the last layer with a new one for our 10 classes
num_classes = 10
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Define loss function and optimizer
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.1)

# Train the model
model.train()
for epoch in range(5):  # Train for 5 epochs
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Validate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in valid_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print('Accuracy on the validation set: {:.2f}%'.format(100 * accuracy))

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 146MB/s]


Accuracy on the validation set: 10.07%
