**Name: Faisal Bin Ashraf(fashr003)**

**ID: 862334529** 

**HW-3**

In [None]:
import numpy as np
np.random.seed(123) 
import pickle 
import matplotlib.pyplot as plt  

import torch
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset 
torch.manual_seed(42) 
from torchsummary import summary 



In [None]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def preprocess_data(data):
    data = data.astype('float32') / 255.0
    data = data.reshape((-1, 3, 32, 32)) # pytorch dimension = (B, C, H, W)
    mean = np.mean(data, axis=(0, 1, 2))
    std = np.std(data, axis=(0, 1, 2))
    data = (data - mean) / std
    return data

In [None]:
def run_model(model, train_loader, test_loader, ITR=100, data_aug = 'None', alpha=0.2):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    num_epochs = ITR

    train_loss_values = []
    train_acc_values = []
    test_loss_values = []
    test_acc_values = []

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            if data_aug == 'cutout':
                images = apply_cutout_minibatch(images.detach().clone(), 16)
            if data_aug == 'mixup':
                images, labels = apply_mixup_minibatch(images.detach().clone(), labels.detach().clone(), alpha)
            if data_aug == 'standard':
                images = apply_standard_minibatch(images.detach().clone(), 4) 
            if data_aug == 'all':
                images = apply_standard_minibatch(images.detach().clone(), 4) 
                images = apply_cutout_minibatch(images.detach().clone(), 16) 
                images, labels = apply_mixup_minibatch(images.detach().clone(), labels.detach().clone(), 0.2) 
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0) 
            correct += (predicted == labels).sum().item()
        
        train_loss /= len(train_loader)
        train_accuracy = correct / total
        train_loss_values.append(train_loss)
        train_acc_values.append(train_accuracy)

        model.eval()
        test_loss = 0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        test_loss /= len(test_loader)
        test_accuracy = correct / total
        test_loss_values.append(test_loss)
        test_acc_values.append(test_accuracy)
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.3f}, Test Loss: {test_loss:.4f}, Test Acc: {test_accuracy:.3f}")

    history = {}
    history['train_acc'] = train_acc_values  
    history['train_loss'] = train_loss_values 
    history['test_acc'] = test_acc_values  
    history['test_loss'] = test_loss_values 

    return history 
    


In [None]:
def plot_history(history, figsize=(12,5), title = 'Training History'):
    num_epochs = len(history['train_loss'])
    plt.figure(figsize=figsize)
    a = plt.subplot(1, 2, 1)
    a.plot(range(1, num_epochs+1), history['train_loss'], label='Train Loss')
    a.plot(range(1, num_epochs+1), history['test_loss'], label='Test Loss')
    a.set_xlabel('Epoch')
    a.set_ylabel('Loss')
    a.set_title('Training and Test Loss')
    a.legend() 

    b = plt.subplot(1, 2, 2)
    b.plot(range(1, num_epochs+1), history['train_acc'], label='Train Acc')
    b.plot(range(1, num_epochs+1), history['test_acc'], label='Test Acc')
    b.set_xlabel('Epoch')
    b.set_ylabel('Accuracy')
    b.set_title('Training and Test Accuracy')
    b.legend()

    plt.tight_layout()
    plt.show() 
    
    print(f"Final Test Accuracy is {history['test_acc'][-1]}")
    

In [None]:
#model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=False)  
from resnet20 import ResNet, BasicBlock 
model = ResNet(BasicBlock, [3, 3, 3], num_classes=10)

In [None]:
summary(model, (3, 32, 32))

In [None]:
def get_data(n=1000):
    data_path = 'cifar-10-batches-py/'

    train_data = np.empty((50000, 3072), dtype=np.uint8) 
    train_labels = np.empty((50000,), dtype=np.int64) 
    for i in range(1, 6):
        train_batch = unpickle(data_path + 'data_batch_' + str(i))
        train_data[(i - 1) * 10000: i * 10000, :] = train_batch[b'data']
        train_labels[(i - 1) * 10000: i * 10000] = train_batch[b'labels']
        

    test_batch = unpickle(data_path + 'test_batch')
    test_data = test_batch[b'data']
    test_labels = np.array(test_batch[b'labels']) 

    # Sample n examples uniformly at random for each class from the training set
    classes = np.unique(train_labels)
    sampled_train_data = []
    sampled_train_labels = []

    for class_label in classes:
        indices = np.where(train_labels == class_label)[0]
        np.random.shuffle(indices)
        sampled_indices = indices[:n]   
        sampled_train_data.extend(train_data[sampled_indices])
        sampled_train_labels.extend(train_labels[sampled_indices])

    indices = np.array(range(len(sampled_train_data)))
    np.random.shuffle(indices) 
    sampled_train_data = np.array(sampled_train_data)[indices]
    sampled_train_labels = np.array(sampled_train_labels)[indices] 

    # normalize features (zero mean and unit variance)  
    sampled_train_data = preprocess_data(sampled_train_data)
    test_data = preprocess_data(test_data) 

    return sampled_train_data, test_data, sampled_train_labels, test_labels 


In [None]:
sampled_train_data, test_data, sampled_train_labels, test_labels = get_data() 

print("Sampled Train Data Shape:", sampled_train_data.shape)
print("Sampled Train Labels Shape:", sampled_train_labels.shape)
print("Test Data Shape:", test_data.shape)
print("Test Labels Shape:", test_labels.shape)

train_dataset = TensorDataset(torch.from_numpy(sampled_train_data), torch.from_numpy(sampled_train_labels))
test_dataset = TensorDataset(torch.from_numpy(test_data), torch.from_numpy(test_labels))

batch_size = 64  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
plt.imshow(sampled_train_data[3].transpose( 1, 2, 0))

### 1. (3 pts) Train your Resnet model without augmentation and report the results.

In [None]:
history1 = run_model(model, train_loader, test_loader)

In [None]:
plot_history(history1) 

### 2. (4 pts) Implement mixup and report the results for α = 0.2 and α = 0.4 


In [None]:
def mixup(img1, img2, lb1, lb2, alpha_value):
    lam = np.random.beta(alpha_value, alpha_value) 
    mixed_image = lam * img1 + (1 - lam) * img2  
    mixed_label = lam * lb1 + (1 - lam) * lb2
    mixed_image = np.array(mixed_image)
    mixed_label = np.array(mixed_label) 

    return mixed_image, mixed_label 

In [None]:
def apply_mixup_minibatch(minibatch_images, minibatch_labels, mask_size):
    for i in range(minibatch_images.shape[0]): 
        idx = np.random.randint(minibatch_images.shape[0]) 
        img1 = minibatch_images[i]
        img2 = minibatch_images[idx]
        lb1 = minibatch_labels[i] 
        lb2 = minibatch_labels[idx] 
        new_im, new_lb = mixup(img1, img2, lb1, lb2, mask_size) 
        minibatch_images[i] = torch.from_numpy(new_im).float()
        minibatch_labels[i] = torch.from_numpy(new_lb).float() 
        
    return minibatch_images, minibatch_labels  

In [None]:
sampled_train_data, test_data, sampled_train_labels, test_labels = get_data()  

In [None]:
i=3
plt.figure(figsize=(12,5))
plt.subplot(121)
plt.imshow(sampled_train_data[i].transpose( 1, 2, 0))  

plt.subplot(122)
im, lb = mixup(sampled_train_data[i].copy(), sampled_train_data[i*2].copy(), sampled_train_labels[i].copy(), sampled_train_labels[i*2].copy(), 0.2)
plt.imshow(im.transpose( 1, 2, 0))   

plt.show() 

In [None]:

history2 = run_model(model, train_loader, test_loader, data_aug='mixup', alpha=0.2)  

In [None]:
plot_history(history2) 

In [None]:
history22 = run_model(model, train_loader, test_loader, data_aug='mixup', alpha=0.4)  

In [None]:
plot_history(history22) 

### 3. (4 pts) Cutout augmentation (K = 16)

In [None]:
def cutout(image, mask_size):
    if np.random.rand() < 0.5:
        return image

    channels, height, width = image.shape

    center_y = np.random.randint(0, height)
    center_x = np.random.randint(0, width)

    half_size = mask_size // 2
    top = max(0, center_y - half_size)
    bottom = min(height, center_y + half_size)
    left = max(0, center_x - half_size)
    right = min(width, center_x + half_size)

    image[:, top:bottom, left:right] = 0

    return image 

In [None]:
def apply_cutout_minibatch(minibatch_images, mask_size):
    for i in range(minibatch_images.shape[0]):
        minibatch_images[i] = cutout(minibatch_images[i], mask_size)
    return minibatch_images  

In [None]:
sampled_train_data, test_data, sampled_train_labels, test_labels = get_data() 

In [None]:

i=3 
plt.figure(figsize=(12,5))
plt.subplot(121)
plt.imshow(sampled_train_data[i].transpose( 1, 2, 0))  

plt.subplot(122)
plt.imshow(cutout(sampled_train_data[i].copy(), 16).transpose( 1, 2, 0))   

plt.show()


In [None]:

history3 = run_model(model, train_loader, test_loader, data_aug='cutout')  


In [None]:
plot_history(history3) 

### 4. (4 pts)  Standard augmentation 

In [None]:
def standard(image, K):
    k1 = np.random.randint(-K, K+1)
    k2 = np.random.randint(-K, K+1)
    shifted_image = np.zeros_like(image)
    if k1 >= 0 and k2 >= 0:
        shifted_image[:, :image.shape[1]-k1, :image.shape[2]-k2] = image[:, k1:, k2:]
    elif k1 >= 0 and k2 < 0:
        shifted_image[:, :image.shape[1]-k1, -k2:] = image[:, k1:, :image.shape[2]+k2]
    elif k1 < 0 and k2 >= 0:
        shifted_image[:, -k1:, :image.shape[2]-k2] = image[:, :image.shape[1]+k1, k2:]
    else:
        shifted_image[:, -k1:, -k2:] = image[:, :image.shape[1]+k1, :image.shape[2]+k2]
    
    if np.random.rand() < 0.5:
        flipped_image = np.flip(shifted_image, axis=2)
    else:
        flipped_image = shifted_image 

    return flipped_image


In [None]:
sampled_train_data, test_data, sampled_train_labels, test_labels = get_data()  

In [None]:
i=0
plt.figure(figsize=(12,5))
plt.subplot(121)
plt.imshow(sampled_train_data[i].transpose( 1, 2, 0))  

plt.subplot(122)
plt.imshow(standard(sampled_train_data[i].copy(), 10).transpose( 1, 2, 0))   

plt.show()

In [None]:
def apply_standard_minibatch(minibatch_images, K):
    for i in range(minibatch_images.shape[0]):
        std_img = standard(minibatch_images[i], K) 
        minibatch_images[i] = torch.from_numpy(std_img.copy()).float()
    return minibatch_images  

In [None]:
history4 = run_model(model, train_loader, test_loader, data_aug='standard')  


In [None]:
plot_history(history4) 

### 5. (3 pts) Combine all augmentations together. 

In [None]:
history5 = run_model(model, train_loader, test_loader, data_aug='all')   

In [None]:
plot_history(history5) 

##### Does combining improve things further? 

-> No. Combining all three augmentation does not improve further. Becasue, we have included augmentation which brings too much randomness to the dataset. For that reason, combining the augementation is not the best way to train model in this case. 

In [None]:
plt.figure(figsize=(15,12))

c = plt.subplot(2, 2, 1)
c.plot(range(1, 101), history1['train_acc'], label='No Augmentation')
c.plot(range(1, 101), history2['train_acc'], label='Mixup (alpha=0.2)') 
c.plot(range(1, 101), history22['train_acc'], label='Mixup (alpha=0.4)')
c.plot(range(1, 101), history3['train_acc'], label='Cutout') 
c.plot(range(1, 101), history4['train_acc'], label='Standard (Shift+Flip)') 
c.plot(range(1, 101), history5['train_acc'], label='All Augmentation') 
c.set_xlabel('Epoch')
c.set_ylabel('Train Accuracy')
c.set_title('Train Accuracy for different augmentation')
c.legend() 

b = plt.subplot(2, 2, 2)
b.plot(range(1, 101), history1['test_acc'], label='No Augmentation')
b.plot(range(1, 101), history2['test_acc'], label='Mixup (alpha=0.2)') 
b.plot(range(1, 101), history22['test_acc'], label='Mixup (alpha=0.4)')
b.plot(range(1, 101), history3['test_acc'], label='Cutout') 
b.plot(range(1, 101), history4['test_acc'], label='Standard (Shift+Flip)') 
b.plot(range(1, 101), history5['test_acc'], label='All Augmentation') 
b.set_xlabel('Epoch')
b.set_ylabel('Test Accuracy')
b.set_title('Test Accuracy for different augmentation')
b.legend()  

a = plt.subplot(2, 2, 3) 
a.plot(range(1, 101), history1['train_loss'], label='No Augmentation')
a.plot(range(1, 101), history2['train_loss'], label='Mixup (alpha=0.2)') 
a.plot(range(1, 101), history22['train_loss'], label='Mixup (alpha=0.4)')
a.plot(range(1, 101), history3['train_loss'], label='Cutout') 
a.plot(range(1, 101), history4['train_loss'], label='Standard (Shift+Flip)') 
a.plot(range(1, 101), history5['train_loss'], label='All Augmentation') 
a.set_xlabel('Epoch')
a.set_ylabel('Train Loss')
a.set_title('Training Loss for different augmentation')
a.legend() 

plt.tight_layout()
#plt.title(title) 
plt.show() 

### 6. (2 pts) Comment on the role of data augmentation. 

##### How does it affect test accuracy, train accuracy and the convergence of optimization? Is test accuracy higher? Does training loss converge faster? 

Based on the observed plots, it can be concluded that:

- Without augmentation, the model tends to overfit the training data.

- Mixup augmentation leads to lower train and test accuracies. This augmentation introduces excessive randomness, making it difficult for the model to learn meaningful features. However, when comparing different mixup values, alpha = 0.2 performs relatively better than alpha = 0.4. Both values reach a plateau in terms of optimization, indicating that this augmentation technique is not suitable for this dataset.

- Cutout augmentation shows improvements compared to the base case. The difference between train and test accuracies is smaller, indicating reduced overfitting. The training loss initially decreases quickly, and then the performance plateaus.

- Standard augmentation performs the best among the techniques evaluated. It exhibits the least overfitting, with the highest test accuracy. Both train and test accuracies improve over time, and the training loss continuously decreases.

- Augmenting with a combination of techniques, including mixup, does not yield good performance. Mixup introduces excessive randomness, hindering the model's ability to find patterns. Although the accuracies and losses are better than with mixup alone, this technique does not compare favorably to the others.

Overall, the standard augmentation technique performs the best. However, it is worth noting that if the model were trained for additional epochs, it could potentially achieve even better performance. 