# Import necessary libraries

In [1]:
import torch
import os
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import random_split, Dataset
from torchsummary import summary
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

# Define training arguments

In [2]:
# Dataset directories
labeled_set_dir = 'data/cifar-10-batches-py'
unlabeled_set_dir = 'data'

# Number of epochs
num_epochs = 200

# Output csv file
output_csv_path = 'out_5.csv'

# Checkpoint path
checkpoint_path = 'model_5.pth'

# Augmentation strategy
augment = 'set2' #Choices: 'set2', None, 'set1'

#learning rate
lr = 0.1

# Model hyperparameters
blocks_in_layers = [2, 1, 1, 1]
num_layers = 4
dr= 0.0 
num_channels= [64, 128, 256, 512]
avg_pool_kernel_s=4

# Define CIFAR10 Dataset class

In [3]:
class CIFAR10Dataset(Dataset):
    def __init__(self, data_dir, train=True, unlabeled=False, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.train = train
        self.unlabeled = unlabeled
        # Load all data batches
        if unlabeled:
          self.data, self.id = self.load_unlabeled_data()
          self.labels = None
        else:
          self.data, self.labels = self.load_labeled_data()

    def load_cifar_batch(self, file):
        with open(file, 'rb') as fo:
            batch = pickle.load(fo, encoding='bytes')
        return batch

    def load_labeled_data(self):
        data_batches = []
        label_batches = []
        if self.train:
          for i in range(1, 6):
            batch_file = os.path.join(self.data_dir, f'data_batch_{i}')
            batch = self.load_cifar_batch(batch_file)
            data_batches.append(batch[b'data'])
            label_batches += batch[b'labels']
        else:
          batch_file = os.path.join(self.data_dir, f'test_batch')
          batch = self.load_cifar_batch(batch_file)
          data_batches.append(batch[b'data'])
          label_batches += batch[b'labels']

        data = np.vstack(data_batches).reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
        labels = np.array(label_batches)
        return data, labels

    def load_unlabeled_data(self):
        # Load the unlabeled batch
        batch_file = os.path.join(self.data_dir, 'cifar_test_nolabels.pkl')
        batch = self.load_cifar_batch(batch_file)
        data = batch[b'data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
        id = batch[b'ids'].tolist()
        return data, id

    def __len__(self):
        if not self.unlabeled:
          return len(self.labels)
        else:
          return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img = self.data[idx]
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        if not self.unlabeled:
          label = self.labels[idx]
          return img, label
        else:
          return img, self.id[idx]




# Define Resnet class. Changed version of the Resnet class in this repo: https://github.com/kuangliu/pytorch-cifar

In [4]:
class BasicBlock(nn.Module):
  expansion = 1

  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(
        in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                            stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes:
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_planes, self.expansion*planes,
                      kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(self.expansion*planes)
        )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out



class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_layers = 4, dropout= 0.3 ,num_channels=[64, 128, 256, 512] , avg_pool_kernel_s=4, num_classes=10):
    super(ResNet, self).__init__()
    assert len(num_channels) == num_layers
    assert len(num_blocks) == num_layers
    self.in_planes = 64
    self.avg_pool_kernel_s = avg_pool_kernel_s
    self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                            stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    layers = []
    for i in range(num_layers):
      stride = 1 if i == 0 else 2
      layers.append(nn.Dropout2d(p=dropout))
      layers.append( self._make_layer(block, num_channels[i], num_blocks[i], stride=stride))
    self.layers = nn.Sequential(*layers)
    self.linear = nn.Linear(num_channels[-1]*block.expansion, num_classes)

  def _make_layer(self, block, planes, num_blocks, stride):
    strides = [stride] + [1]*(num_blocks-1)
    layers = []
    for stride in strides:
        layers.append(block(self.in_planes, planes, stride))
        self.in_planes = planes * block.expansion
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layers(out)
    out = F.avg_pool2d(out, self.avg_pool_kernel_s)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

In [5]:
# If GPU available, the code uses it. Otherwise cpu is used for the training (not recommended).
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Load dataset




In [6]:
torch.manual_seed(42)
BATCH_SIZE_TRAIN = 128
BATCH_SIZE_TEST = 1000
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = CIFAR10Dataset(labeled_set_dir, train=True, unlabeled=False, transform=transform)
testset = CIFAR10Dataset(labeled_set_dir, train=False, unlabeled=False, transform=transform)

## Create validation set from the training set

In [7]:
total_size = len(trainset)
train_size = int(0.9 * total_size)
validation_size = total_size - train_size

# Split the dataset
generator = torch.Generator().manual_seed(42)
trainset, validationset = torch.utils.data.random_split(trainset, [train_size, validation_size], generator)


## Augment the training dataset

In [8]:
#Augmentation
if augment is None:
    train_transform = transform
elif augment=='set1':
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
elif augment=='set2':
    train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4), 
    transforms.RandomHorizontalFlip(),
    transforms.AutoAugment(policy= transforms.AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor(),    
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
trainset.dataset.transform = train_transform

## Create dataloader instances

In [9]:
train_loader = torch.utils.data.DataLoader(trainset,batch_size=BATCH_SIZE_TRAIN,shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(testset,batch_size=BATCH_SIZE_TEST,shuffle=False,  num_workers=1)
validation_loader = torch.utils.data.DataLoader(validationset,batch_size=BATCH_SIZE_TRAIN,shuffle=False,  num_workers=1)



# Define training and evaluating functions

In [10]:
def train_epoch(model, optimizer, scheduler, data_loader, loss_history, acc_history, criterion):
  total_samples = len(data_loader.dataset)
  model.train()
  correct_samples = 0
  losses = []
  for i, (data, target) in enumerate(data_loader):
    data = data.to(device)
    target = target.to(device)
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    _, pred = torch.max(output, dim=1)

    losses.append(loss.item())
    correct_samples += pred.eq(target).sum()  

  avg_loss = np.mean(losses)  
  loss_history.append(avg_loss)
  acc = correct_samples / total_samples
  acc_history.append(acc.item())  
  print('\nAverage training loss: ' + '{:.4f}'.format(avg_loss) +
  ' Accuracy:' + '{:5}'.format(correct_samples) + '/' + '{:5}'.format(total_samples) + ' (' +
  '{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)\n')
  scheduler.step()

In [11]:
def evaluate(model, data_loader, loss_history, acc_history, criterion, set_name='test'):
  model.eval()
  total_samples = len(data_loader.dataset)
  correct_samples = 0
  losses = []
  acc = []
  with torch.no_grad():
    for data, target in data_loader:
      data = data.to(device)
      target = target.to(device)
      output = model(data)
      loss = criterion(output, target)
      _, pred = torch.max(output, dim=1)

      losses.append(loss.item())
      correct_samples += pred.eq(target).sum()

  avg_loss = np.mean(losses)
  loss_history.append(avg_loss)
  acc = correct_samples / total_samples
  acc_history.append(acc.item())
  print('\nAverage '+ set_name + ' loss: ' + '{:.4f}'.format(avg_loss) +
  ' Accuracy:' + '{:5}'.format(correct_samples) + '/' + '{:5}'.format(total_samples) + ' (' +
  '{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)\n')

# Create model, optimizer, schedular, criterion

In [12]:
model = ResNet(BasicBlock,
               blocks_in_layers, 
               num_layers = num_layers, 
               dropout=dr,
               num_channels=num_channels, 
               avg_pool_kernel_s=avg_pool_kernel_s, 
               num_classes=10)
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=lr,
                     momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                 T_max=num_epochs)  # eta_min is the minimum lr
criterion = nn.CrossEntropyLoss()
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
         Dropout2d-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,864
       BatchNorm2d-5           [-1, 64, 32, 32]             128
            Conv2d-6           [-1, 64, 32, 32]          36,864
       BatchNorm2d-7           [-1, 64, 32, 32]             128
        BasicBlock-8           [-1, 64, 32, 32]               0
            Conv2d-9           [-1, 64, 32, 32]          36,864
      BatchNorm2d-10           [-1, 64, 32, 32]             128
           Conv2d-11           [-1, 64, 32, 32]          36,864
      BatchNorm2d-12           [-1, 64, 32, 32]             128
       BasicBlock-13           [-1, 64, 32, 32]               0
        Dropout2d-14           [-1, 64,

# Train

In [None]:
best_val_loss = np.inf  # Track the highest validation accuracy

start_time = time.time()
train_loss_history, valid_loss_history = [], []
train_acc_history, valid_acc_history = [], []
termination_count = 0 #If valid loss does not decrease in 5 consecutive epochs terminate the training
for epoch in range(1, num_epochs + 1):
  print('Epoch:', epoch)
  train_epoch(model, optimizer, scheduler, train_loader, train_loss_history, train_acc_history, criterion)
  evaluate(model, validation_loader, valid_loss_history, valid_acc_history, criterion, 'validation')
  if valid_loss_history[-1] < best_val_loss:
  #Save the best model in terms of validation loss.
    torch.save(model.state_dict(), checkpoint_path)
    best_val_loss = valid_loss_history[-1]
print('Execution time:', '{:5.2f}'.format(time.time() - start_time), 'seconds')

Epoch: 1

Average training loss: 2.2430 Accuracy: 7094/45000 (15.76%)


Average validation loss: 2.1077 Accuracy: 1018/ 5000 (20.36%)

Epoch: 2

Average training loss: 2.0824 Accuracy: 9988/45000 (22.20%)


Average validation loss: 1.9748 Accuracy: 1309/ 5000 (26.18%)

Epoch: 3

Average training loss: 1.9831 Accuracy:11999/45000 (26.66%)


Average validation loss: 1.8922 Accuracy: 1546/ 5000 (30.92%)

Epoch: 4

Average training loss: 1.8977 Accuracy:13451/45000 (29.89%)


Average validation loss: 1.7755 Accuracy: 1802/ 5000 (36.04%)

Epoch: 5

Average training loss: 1.8144 Accuracy:14998/45000 (33.33%)


Average validation loss: 1.6990 Accuracy: 1966/ 5000 (39.32%)

Epoch: 6

Average training loss: 1.7350 Accuracy:16523/45000 (36.72%)


Average validation loss: 1.5799 Accuracy: 2093/ 5000 (41.86%)

Epoch: 7

Average training loss: 1.6270 Accuracy:18593/45000 (41.32%)


Average validation loss: 1.5398 Accuracy: 2194/ 5000 (43.88%)

Epoch: 8

Average training loss: 1.5435 Accuracy:20246/

In [None]:
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint)
model.eval()

# Results

In [None]:
plt.plot(train_loss_history,'-',linewidth=3,label='Train error')
plt.plot(valid_loss_history,'-',linewidth=3,label='Validation error')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.grid(True)
plt.legend()
%matplotlib inline
plt.show()

In [None]:
plt.plot(train_acc_history,'-',linewidth=3,label='Train accuracy')
plt.plot(valid_acc_history,'-',linewidth=3,label='Validation accuracy')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.grid(True)
plt.legend()
%matplotlib inline
plt.show()

In [None]:
evaluate(model, test_loader, [], criterion)

# Predict labels for nonlabeled dataset and save the results

In [None]:
def predict_labels(model, data_loader):
  model.eval()
  predicted_labels = []
  ids = []
  with torch.no_grad():
    for data, id in data_loader:
      data = data.to(device)
      output = model(data)
      _, pred = torch.max(output, dim=1)
      predicted_labels = predicted_labels + pred.tolist()
      ids = ids + id.tolist()
  df = pd.DataFrame({
    'ID': ids,
    'Labels': predicted_labels
})
  return df

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

unlabeled_testset = CIFAR10Dataset(unlabeled_set_dir, train=False, unlabeled=True, transform=transform)
unlabeled_test_loader = torch.utils.data.DataLoader(unlabeled_testset,batch_size=BATCH_SIZE_TEST,shuffle=False)

In [None]:
df = predict_labels(model, unlabeled_test_loader)
df.to_csv(output_csv_path, index=False)