# CIFAR-10
### Kaggle link: [https://www.kaggle.com/c/cifar-10](https://www.kaggle.com/c/cifar-10)
### WandB link: [https://wandb.ai/fischly/cifar10-with-resnet18](https://wandb.ai/fischly/cifar10-with-resnet18)

## Import everything needed

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import collections
import math
import os
import shutil
import glob

from PIL import Image
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

import wandb

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import torchvision
from torchvision import datasets, transforms
import torchvision.transforms as T

np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)

## Unzip datasets

# WARNING: It can take a lot of time to uncompress!

In [None]:
# !python -m py7zr x /kaggle/input/cifar-10/train.7z

In [None]:
# !python -m py7zr x /kaggle/input/cifar-10/test.7z

In [2]:
data_dir = 'I:\\AI\\cifar'

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [72]:
def read_csv_labels(fname):
    """Read `fname` to return a filename to label dictionary."""
    with open(fname, 'r') as f:
        # Skip the file header line (column name)
        lines = f.readlines()[1:]
    tokens = [l.rstrip().split(',') for l in lines]
    return dict(((name, label) for name, label in tokens))

labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv'))
print(f'Number training examples: {len(labels)}')
print(f'Number classes: {len(set(labels.values()))}')

Number training examples: 50000
Number classes: 10


In [85]:
# generate a lookup-table with one-hot encoded values for our 10 classes
classes = set(labels.values())
ohe_table = { cl: np.array(list(bin(1 << i)[2:].zfill(len(classes))), dtype=np.int8) for i, cl in enumerate(list(classes)) }

ohe_table_reverse = list(ohe_table.keys())
def get_class_name_by_index(index):
    classes = []
    for idx in index:
        classes.append(ohe_table_reverse[len(ohe_table) - 1 - idx.item()])
    return classes

ohe_table

{'bird': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int8),
 'dog': array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0], dtype=int8),
 'horse': array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype=int8),
 'truck': array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0], dtype=int8),
 'ship': array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int8),
 'deer': array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0], dtype=int8),
 'cat': array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=int8),
 'frog': array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
 'automobile': array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
 'airplane': array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)}

In [89]:
def split_train_valid(data_dir, labels, valid_ratio):
    """Split the validation set out of the original training set and returns file paths and labels for both the new sets."""
    # the number of examples of the class that has the fewest examples in the training dataset
    n = collections.Counter(labels.values()).most_common()[-1][1]
    
    # the number of examples per class for the validation set
    n_valid_per_label = max(1, math.floor(n * valid_ratio))
    
    # for storing the resulting file paths and labels
    valid_file_paths = []
    valid_labels = []
    train_file_paths = []
    train_labels = []
    
    label_count = {}
    for train_file in os.listdir(os.path.join(data_dir, 'train')):
        label = labels[train_file.split('.')[0]]
        fname = os.path.join(data_dir, 'train', train_file)

        if label not in label_count or label_count[label] < n_valid_per_label:
            # mark as validation file
            valid_file_paths.append(fname)
            valid_labels.append(ohe_table[label])
            
            label_count[label] = label_count.get(label, 0) + 1
        else:
            # mark as train file
            train_file_paths.append(fname)
            train_labels.append(ohe_table[label])

    return ((valid_file_paths, valid_labels), (train_file_paths, train_labels))

In [75]:
# dataset for CIFAR images, where you can specify if you want to preload images into (V)RAM
class CIFARDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None, train=False, test=False, preload=False):
        self.transform = transform if train else T.Compose([T.ToTensor(), *transform.transforms])
        
        self.train = train
        self.test = test
        self.preload = preload
        
        self.file_paths = file_paths
        self.labels = labels
        self.number_of_files = len(file_paths)
        
        self.to_tensor = T.ToTensor()
        
        # to speed things up, we can preload the images into RAM/VRAM
        if self.preload:
            self.preloaded_images = []
            
            for file_path in tqdm(self.file_paths):
                img = Image.open(file_path)

                # if train, we only apply ToTensor transform. every other tranformation is done
                # when __getitem__() is called. this is needed since we usually want to get a different transform
                # for each item that is fetched from the dataset (due to RandomResizedCrop and RandomHorizontalFlip).

                # if validation or test, we apply all the transformations right away.
                # this is possible, since the transform is fixed for those cases, usually (e.g. Resize or CenterCrop).
                transform_to_apply = self.to_tensor if self.train else self.transform

                img_transformed = transform_to_apply(img).to(device)
                self.preloaded_images.append(img_transformed)
        
        # create label tensor and move it to GPU
        if not test:
            self.labels = torch.tensor(np.array(self.labels), dtype=torch.float).to(device)
                
    def __len__(self):
        return self.number_of_files

    def __getitem__(self, idx):
        if self.preload:
            if self.train:
                return self.transform(self.preloaded_images[idx]), self.labels[idx]
            else:
                return self.preloaded_images[idx], self.labels[idx] if not self.test else os.path.basename(self.file_paths[idx]).split('.')[0]
        else:
            fp = self.file_paths[idx]
            return self.transform(Image.open(fp)).to(device), self.labels[idx] if not self.test else os.path.basename(fp).split('.')[0]

In [76]:
batch_size = 64
valid_ratio = 0.1

# get the file paths for train and validation
((valid_file_paths, valid_labels), (train_file_paths, train_labels)) = split_train_valid(data_dir, labels, valid_ratio)
# get all files in the test directory as test files
test_file_paths = glob.glob(os.path.join(data_dir, 'test', '*'))

In [77]:
transform_train = torchvision.transforms.Compose([
    T.Normalize([0.4914, 0.4822, 0.4465],
                [0.2023, 0.1994, 0.2010]),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomResizedCrop((32, 32), scale=(0.5, 1), ratio=(0.5, 2)),
    T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.08)
])

transform_test = torchvision.transforms.Compose([
    T.Normalize([0.4914, 0.4822, 0.4465],
                [0.2023, 0.1994, 0.2010])])

#### (!) The next cell takes some time, because I load the train/validation images all to VRAM. This considerably speeds up learning later on.

In [86]:
train_dataset = CIFARDataset(train_file_paths, train_labels, transform_train, train=True, test=False, preload=True)
valid_dataset = CIFARDataset(valid_file_paths, valid_labels, transform_test, train=False, test=False, preload=True)
test_dataset  = CIFARDataset(test_file_paths, None, transform_test, train=False, test=True, preload=False)

In [79]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, drop_last=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size, shuffle=True, drop_last=True)
test_loader  = torch.utils.data.DataLoader(test_dataset, 2048, shuffle=False, drop_last=False)

### Model

In [88]:
def init_linear_layer(m, method):
    torch.nn.init.xavier_normal_(m.weight, nn.init.calculate_gain(method))
    torch.nn.init.constant_(m.bias, 0)
    return m

# load pretrained resnet18 model
model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.DEFAULT)
# replace the last, dense layer with our own, that outputs only the 10 classes we have in the CIFAR10 dataset
model.fc = init_linear_layer(nn.Linear(model.fc.in_features, 10), 'linear')

model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

### Training loop

In [23]:
epochs = 20
lr = 5e-4
weight_decay = 5e-5

In [17]:
wandb.init(project="cifar10-with-resnet18")

[34m[1mwandb[0m: Currently logged in as: [33mfischly[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [20]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, verbose=True, min_lr=1e-5, factor=0.5)

In [None]:
best_val_loss = 100000

for epoch in tqdm(range(epochs)):
    accurate = 0
    total = 0
    losses = 0
    
    # --- TRAINING LOOP ---
    model.train()
    for X, y in tqdm(train_loader):
        y_pred = model(X)
        loss = criterion(y_pred, y)
        
        accurate += (torch.argmax(y, 1) == torch.argmax(y_pred, 1)).sum().float()
        losses += loss.item()
        total += len(y)

        # zero the gradients before running the backward pass
        optimizer.zero_grad()

        # backward pass to compute the gradient of loss w.r.t our learnable params
        loss.backward()

        # update params
        optimizer.step()
    
    wandb.log({
        'train/loss': losses / len(train_loader),
        'train/accuracy': accurate / total
    })
    
    # --- VALIDATION LOOP ---
    model.eval()
    val_loss = 0
    with torch.inference_mode():
        correct = 0
        for X, y in tqdm(valid_loader, leave=False):
            y_pred = model(X)
            val_loss += criterion(y_pred, y) * X.size(0)
            
            correct += (torch.argmax(y, 1) == torch.argmax(y_pred, 1)).sum().item()
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model, f'models/model_{epoch}.pt')
        print(f'Saved model as models/model_{epoch}.pt')
            
    wandb.log({
        'val/loss': val_loss / len(valid_dataset),
        'val/accuracy': correct / len(valid_dataset)
    })
    

In [24]:
torch.save(model, f'models/model_done_4.pt')

### Create test submission

In [80]:
result = []
with torch.inference_mode():
    for X, lab in tqdm(test_loader):
        y_pred = model(X)
        #val_loss += criterion(y_pred, y) * X.size(0)
        
        predicted = torch.argmax(y_pred, 1).cpu()
        predicted_class = get_class_name_by_index(predicted)
        
        for i in range(len(X)):
            result.append({'id': lab[i], 'label': predicted_class[i]})
            
out_df = pd.DataFrame(result)
out_df.to_csv('submission.csv', index=False)

# out_df

  0%|          | 0/147 [00:00<?, ?it/s]