# Evaluating Vit Model on a Independent and Identically Distributed Dataset

Making all required imports

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
import torchvision.models as models
import os
from git import Repo
from PIL import Image
from torch.utils.data import Dataset, DataLoader

Here we define the model and make the necessary modifications before we start fine tuning it on the cifar10 dataset.

*   Freeze the backbone
*   Load in the cifar dataset
*   Start fine tuning on test set of CIFAR 10






In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = timm.create_model('vit_base_patch16_224', pretrained=True)

for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.head.in_features
model.head = nn.Linear(num_ftrs, 10)

model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.head.parameters(), lr=0.001)

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)

model.train()
num_epochs = 3

for epoch in range(num_epochs):
    running_loss = 0.0
    with tqdm(total=len(trainloader), desc=f'Epoch {epoch + 1}/{num_epochs}', unit='batch') as pbar:
        for i, (images, labels) in enumerate(trainloader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            pbar.set_postfix(loss=running_loss / (i + 1))
            pbar.update(1)

            if (i + 1) % 100 == 0:
                print(f'Epoch [{epoch + 1}], Step [{i + 1}], Loss: {running_loss / (i + 1):.4f}')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 34362797.44it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


Epoch 1/3:   6%|▋         | 100/1563 [00:35<08:16,  2.95batch/s, loss=0.341]

Epoch [1], Step [100], Loss: 0.3411


Epoch 1/3:  13%|█▎        | 200/1563 [01:11<08:29,  2.67batch/s, loss=0.228]

Epoch [1], Step [200], Loss: 0.2285


Epoch 1/3:  19%|█▉        | 300/1563 [01:48<07:33,  2.78batch/s, loss=0.187]

Epoch [1], Step [300], Loss: 0.1868


Epoch 1/3:  26%|██▌       | 400/1563 [02:24<06:58,  2.78batch/s, loss=0.164]

Epoch [1], Step [400], Loss: 0.1639


Epoch 1/3:  32%|███▏      | 500/1563 [03:00<06:25,  2.76batch/s, loss=0.149]

Epoch [1], Step [500], Loss: 0.1485


Epoch 1/3:  38%|███▊      | 600/1563 [03:36<05:45,  2.78batch/s, loss=0.138]

Epoch [1], Step [600], Loss: 0.1379


Epoch 1/3:  45%|████▍     | 700/1563 [04:12<05:12,  2.76batch/s, loss=0.13]

Epoch [1], Step [700], Loss: 0.1300


Epoch 1/3:  51%|█████     | 800/1563 [04:49<04:41,  2.71batch/s, loss=0.123]

Epoch [1], Step [800], Loss: 0.1233


Epoch 1/3:  58%|█████▊    | 900/1563 [05:25<03:59,  2.77batch/s, loss=0.118]

Epoch [1], Step [900], Loss: 0.1181


Epoch 1/3:  64%|██████▍   | 1000/1563 [06:01<03:21,  2.79batch/s, loss=0.114]

Epoch [1], Step [1000], Loss: 0.1135


Epoch 1/3:  70%|███████   | 1100/1563 [06:37<02:45,  2.80batch/s, loss=0.11]

Epoch [1], Step [1100], Loss: 0.1105


Epoch 1/3:  77%|███████▋  | 1200/1563 [07:13<02:12,  2.74batch/s, loss=0.11]

Epoch [1], Step [1200], Loss: 0.1097


Epoch 1/3:  83%|████████▎ | 1300/1563 [07:49<01:35,  2.75batch/s, loss=0.107]

Epoch [1], Step [1300], Loss: 0.1070


Epoch 1/3:  90%|████████▉ | 1400/1563 [08:26<00:58,  2.79batch/s, loss=0.105]

Epoch [1], Step [1400], Loss: 0.1045


Epoch 1/3:  96%|█████████▌| 1500/1563 [09:02<00:22,  2.78batch/s, loss=0.103]

Epoch [1], Step [1500], Loss: 0.1033


Epoch 1/3: 100%|██████████| 1563/1563 [09:25<00:00,  2.77batch/s, loss=0.102]
Epoch 2/3:   6%|▋         | 100/1563 [00:36<08:48,  2.77batch/s, loss=0.0483]

Epoch [2], Step [100], Loss: 0.0483


Epoch 2/3:  13%|█▎        | 200/1563 [01:12<08:18,  2.73batch/s, loss=0.0596]

Epoch [2], Step [200], Loss: 0.0596


Epoch 2/3:  19%|█▉        | 300/1563 [01:48<07:33,  2.78batch/s, loss=0.0574]

Epoch [2], Step [300], Loss: 0.0574


Epoch 2/3:  26%|██▌       | 400/1563 [02:24<06:58,  2.78batch/s, loss=0.0578]

Epoch [2], Step [400], Loss: 0.0578


Epoch 2/3:  32%|███▏      | 500/1563 [03:00<06:22,  2.78batch/s, loss=0.0572]

Epoch [2], Step [500], Loss: 0.0572


Epoch 2/3:  38%|███▊      | 600/1563 [03:36<05:56,  2.70batch/s, loss=0.0581]

Epoch [2], Step [600], Loss: 0.0581


Epoch 2/3:  45%|████▍     | 700/1563 [04:12<05:14,  2.74batch/s, loss=0.0578]

Epoch [2], Step [700], Loss: 0.0578


Epoch 2/3:  51%|█████     | 800/1563 [04:48<04:33,  2.79batch/s, loss=0.0575]

Epoch [2], Step [800], Loss: 0.0575


Epoch 2/3:  58%|█████▊    | 900/1563 [05:24<03:57,  2.79batch/s, loss=0.0578]

Epoch [2], Step [900], Loss: 0.0578


Epoch 2/3:  64%|██████▍   | 1000/1563 [06:01<03:22,  2.78batch/s, loss=0.0598]

Epoch [2], Step [1000], Loss: 0.0598


Epoch 2/3:  70%|███████   | 1100/1563 [06:37<02:51,  2.70batch/s, loss=0.0598]

Epoch [2], Step [1100], Loss: 0.0598


Epoch 2/3:  77%|███████▋  | 1200/1563 [07:13<02:10,  2.78batch/s, loss=0.06]

Epoch [2], Step [1200], Loss: 0.0600


Epoch 2/3:  83%|████████▎ | 1300/1563 [07:49<01:34,  2.78batch/s, loss=0.0603]

Epoch [2], Step [1300], Loss: 0.0603


Epoch 2/3:  90%|████████▉ | 1400/1563 [08:25<00:58,  2.79batch/s, loss=0.0607]

Epoch [2], Step [1400], Loss: 0.0607


Epoch 2/3:  96%|█████████▌| 1500/1563 [09:01<00:22,  2.79batch/s, loss=0.0615]

Epoch [2], Step [1500], Loss: 0.0615


Epoch 2/3: 100%|██████████| 1563/1563 [09:24<00:00,  2.77batch/s, loss=0.0618]
Epoch 3/3:   6%|▋         | 100/1563 [00:36<08:43,  2.80batch/s, loss=0.0437]

Epoch [3], Step [100], Loss: 0.0437


Epoch 3/3:  13%|█▎        | 200/1563 [01:12<08:09,  2.78batch/s, loss=0.0469]

Epoch [3], Step [200], Loss: 0.0469


Epoch 3/3:  19%|█▉        | 300/1563 [01:48<07:32,  2.79batch/s, loss=0.0443]

Epoch [3], Step [300], Loss: 0.0443


Epoch 3/3:  26%|██▌       | 400/1563 [02:24<06:58,  2.78batch/s, loss=0.046]

Epoch [3], Step [400], Loss: 0.0460


Epoch 3/3:  32%|███▏      | 500/1563 [03:00<06:30,  2.72batch/s, loss=0.0468]

Epoch [3], Step [500], Loss: 0.0468


Epoch 3/3:  38%|███▊      | 600/1563 [03:37<05:45,  2.79batch/s, loss=0.046]

Epoch [3], Step [600], Loss: 0.0460


Epoch 3/3:  45%|████▍     | 700/1563 [04:13<05:09,  2.79batch/s, loss=0.0464]

Epoch [3], Step [700], Loss: 0.0464


Epoch 3/3:  51%|█████     | 800/1563 [04:49<04:34,  2.78batch/s, loss=0.0465]

Epoch [3], Step [800], Loss: 0.0465


Epoch 3/3:  58%|█████▊    | 900/1563 [05:25<03:59,  2.77batch/s, loss=0.0466]

Epoch [3], Step [900], Loss: 0.0466


Epoch 3/3:  64%|██████▍   | 1000/1563 [06:01<03:26,  2.72batch/s, loss=0.0489]

Epoch [3], Step [1000], Loss: 0.0489


Epoch 3/3:  70%|███████   | 1100/1563 [06:37<02:46,  2.78batch/s, loss=0.0493]

Epoch [3], Step [1100], Loss: 0.0493


Epoch 3/3:  77%|███████▋  | 1200/1563 [07:14<02:20,  2.59batch/s, loss=0.0495]

Epoch [3], Step [1200], Loss: 0.0495


Epoch 3/3:  83%|████████▎ | 1300/1563 [07:50<01:34,  2.78batch/s, loss=0.051]

Epoch [3], Step [1300], Loss: 0.0510


Epoch 3/3:  90%|████████▉ | 1400/1563 [08:26<00:58,  2.78batch/s, loss=0.0508]

Epoch [3], Step [1400], Loss: 0.0508


Epoch 3/3:  96%|█████████▌| 1500/1563 [09:03<00:23,  2.70batch/s, loss=0.0507]

Epoch [3], Step [1500], Loss: 0.0507


Epoch 3/3: 100%|██████████| 1563/1563 [09:25<00:00,  2.76batch/s, loss=0.0511]


Now we evaluate the model on the test set to see if the training was successful

In [5]:
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

cifar10acc = 100 * correct / total

print(f'Accuracy on CIFAR-10 test set: {100 * correct / total:.2f}%')


Files already downloaded and verified
Accuracy on CIFAR-10 test set: 97.45%




# TASK 4 Inductive Biases of Models: Semantic Biases

We will do the following:

*   Use the vit model (finetuned on CIFAR10) and validate on a variation of CIFAR10 that exhibits shape bias
*   Use the vit model (finetuned on CIFAR10) and validate on a variation of CIFAR10 that exhibits texture bias
*   Finetune the vit model on MNIST Dataset and the evaluate it on a colourized MNIST dataset to find the colour bias






In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.head.in_features
model.head = nn.Linear(num_ftrs, 10)

model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.head.parameters(), lr=0.001)

repo_url = 'https://github.com/bdevans/CIFAR-10G.git'
clone_dir = 'CIFAR-10G'


if not os.path.exists(clone_dir):
    print("Cloning the CIFAR-10G repository...")
    Repo.clone_from(repo_url, clone_dir)
    print("Repository cloned successfully.")
else:
    print("Repository already exists. Skipping clone.")


data_dir = os.path.join(clone_dir, '224x224')
data_dir = 'CIFAR-10G/224x224'

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

subdirs = ['contours', 'contours_inverted', 'line_drawings', 'line_drawings_inverted', 'silhouettes', 'silhouettes_inverted']
datasets_dict = {}
validation_loaders = {}
accuracy_dict = {}

for subdir in subdirs:
    dataset_path = os.path.join(data_dir, subdir)

    dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

    dataset_size = len(dataset)
    val_size = int(0.2 * dataset_size)
    train_size = dataset_size - val_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

    datasets_dict[subdir] = train_loader
    validation_loaders[subdir] = val_loader
    accuracy_dict[subdir] = []


Cloning the CIFAR-10G repository...
Repository cloned successfully.




**SHAPE BIAS**

In [None]:
num_epochs = 3

for subdir, validation_loader in validation_loaders.items():
    print(f"Validating on {subdir} dataset")

    for epoch in range(num_epochs):
        model.eval()
        correct = 0
        total = 0

        with tqdm(total=len(validation_loader), desc=f'Epoch {epoch + 1}/{num_epochs}', unit='batch') as pbar:
            with torch.no_grad():
                running_loss = 0.0
                for i, (images, labels) in enumerate(validation_loader):

                    images, labels = images.to(device), labels.to(device)

                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    running_loss += loss.item()

                    _, predicted = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                    pbar.set_postfix(loss=running_loss / (i + 1))
                    pbar.update(1)

        accuracy = 100 * correct / total
        accuracy_dict[subdir].append(accuracy)

        print(f"Validation Accuracy on {subdir} dataset after epoch {epoch + 1}: {accuracy:.2f}%")

    print(f"Finished validating on {subdir} dataset\n")

for subdir, accuracies in accuracy_dict.items():
    print(f"Validation accuracies for {subdir} dataset over {num_epochs} epochs: {accuracies}")


Validating on contours dataset


Epoch 1/3: 100%|██████████| 1/1 [00:00<00:00,  2.27batch/s, loss=2.65]


Validation Accuracy on contours dataset after epoch 1: 20.00%


Epoch 2/3: 100%|██████████| 1/1 [00:00<00:00,  2.59batch/s, loss=2.65]


Validation Accuracy on contours dataset after epoch 2: 20.00%


Epoch 3/3: 100%|██████████| 1/1 [00:00<00:00,  2.72batch/s, loss=2.65]


Validation Accuracy on contours dataset after epoch 3: 20.00%
Finished validating on contours dataset

Validating on contours_inverted dataset


Epoch 1/3: 100%|██████████| 1/1 [00:00<00:00,  2.64batch/s, loss=2.25]


Validation Accuracy on contours_inverted dataset after epoch 1: 20.00%


Epoch 2/3: 100%|██████████| 1/1 [00:00<00:00,  2.56batch/s, loss=2.25]


Validation Accuracy on contours_inverted dataset after epoch 2: 20.00%


Epoch 3/3: 100%|██████████| 1/1 [00:00<00:00,  2.52batch/s, loss=2.25]


Validation Accuracy on contours_inverted dataset after epoch 3: 20.00%
Finished validating on contours_inverted dataset

Validating on line_drawings dataset


Epoch 1/3: 100%|██████████| 1/1 [00:00<00:00,  2.65batch/s, loss=2.46]


Validation Accuracy on line_drawings dataset after epoch 1: 15.00%


Epoch 2/3: 100%|██████████| 1/1 [00:00<00:00,  2.67batch/s, loss=2.46]


Validation Accuracy on line_drawings dataset after epoch 2: 15.00%


Epoch 3/3: 100%|██████████| 1/1 [00:00<00:00,  2.64batch/s, loss=2.46]


Validation Accuracy on line_drawings dataset after epoch 3: 15.00%
Finished validating on line_drawings dataset

Validating on line_drawings_inverted dataset


Epoch 1/3: 100%|██████████| 1/1 [00:00<00:00,  2.48batch/s, loss=2.63]


Validation Accuracy on line_drawings_inverted dataset after epoch 1: 30.00%


Epoch 2/3: 100%|██████████| 1/1 [00:00<00:00,  2.66batch/s, loss=2.63]


Validation Accuracy on line_drawings_inverted dataset after epoch 2: 30.00%


Epoch 3/3: 100%|██████████| 1/1 [00:00<00:00,  2.60batch/s, loss=2.63]


Validation Accuracy on line_drawings_inverted dataset after epoch 3: 30.00%
Finished validating on line_drawings_inverted dataset

Validating on silhouettes dataset


Epoch 1/3: 100%|██████████| 1/1 [00:00<00:00,  2.66batch/s, loss=2.83]


Validation Accuracy on silhouettes dataset after epoch 1: 15.00%


Epoch 2/3: 100%|██████████| 1/1 [00:00<00:00,  2.54batch/s, loss=2.83]


Validation Accuracy on silhouettes dataset after epoch 2: 15.00%


Epoch 3/3: 100%|██████████| 1/1 [00:00<00:00,  2.59batch/s, loss=2.83]


Validation Accuracy on silhouettes dataset after epoch 3: 15.00%
Finished validating on silhouettes dataset

Validating on silhouettes_inverted dataset


Epoch 1/3: 100%|██████████| 1/1 [00:00<00:00,  2.64batch/s, loss=2.75]


Validation Accuracy on silhouettes_inverted dataset after epoch 1: 10.00%


Epoch 2/3: 100%|██████████| 1/1 [00:00<00:00,  2.61batch/s, loss=2.75]


Validation Accuracy on silhouettes_inverted dataset after epoch 2: 10.00%


Epoch 3/3: 100%|██████████| 1/1 [00:00<00:00,  2.63batch/s, loss=2.75]

Validation Accuracy on silhouettes_inverted dataset after epoch 3: 10.00%
Finished validating on silhouettes_inverted dataset

Validation accuracies for contours dataset over 3 epochs: [20.0, 20.0, 20.0]
Validation accuracies for contours_inverted dataset over 3 epochs: [20.0, 20.0, 20.0]
Validation accuracies for line_drawings dataset over 3 epochs: [15.0, 15.0, 15.0]
Validation accuracies for line_drawings_inverted dataset over 3 epochs: [30.0, 30.0, 30.0]
Validation accuracies for silhouettes dataset over 3 epochs: [15.0, 15.0, 15.0]
Validation accuracies for silhouettes_inverted dataset over 3 epochs: [10.0, 10.0, 10.0, 10.0]





Here we calculate the average accuracy of all the sub directories, then we use that average and the model's original accuracy on the CIFAR 10 dataset to calculate the shape bias of the vit model.
This is obtained via the formula: avg_accuracy/cifar10accuracy

In [None]:
total_accuracy_sum = 0
accuracy_count = 0

for subdir, accuracies in accuracy_dict.items():
    total_accuracy_sum += sum(accuracies)
    accuracy_count += len(accuracies)

average_accuracy = total_accuracy_sum / accuracy_count

final_value = average_accuracy / cifar10acc

print(f"Shape Bias using the CIFAR10G Dataset turn out to be: {final_value:.4f}")

Shape Bias using the CIFAR10G Dataset turn out to be: 0.1845


**TEXTURE BIAS:** This dataset is a variation of CIFAR10 with texture differences.

In [None]:
!unzip texture_bias_dataset.zip

Our code uses a CIFAR10GCustomDataset class that reads the repo from github and creates a dataset that the vit model is used to seeing and was trained on. furthermore, it divides the texture bias dataset into train and test splits, however we will only be using the test data.

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class CIFAR10GCustomDataset(Dataset):
    def __init__(self, root_dir, class_map, transform=None):
        self.root_dir = root_dir
        self.class_map = class_map
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for class_name, class_idx in class_map.items():
            class_dir = os.path.join(root_dir, class_name)
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                self.image_paths.append(img_path)
                self.labels.append(class_idx)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

data_dir = './'

class_map = {
    'airplane': 0,
    'automobile': 1,
    'bird': 2,
    'cat': 3,
    'deer': 4,
    'dog': 5,
    'frog': 6,
    'horse': 7,
    'ship': 8,
    'truck': 9
}

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

dataset = CIFAR10GCustomDataset(root_dir=data_dir, class_map=class_map, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)





Here we evaluate the vit model trained on CIFAR10 on the data which has texture changes.

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in tqdm(val_loader, desc="Validating", unit="batch"):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Validation Accuracy: {accuracy:.2f}%')
print("Texture Bias is: ", accuracy/cifar10acc)

Validating: 100%|██████████| 7/7 [00:02<00:00,  2.77batch/s]

Validation Accuracy: 16.50%
Texture Bias is:  0.17010309278350516





**COLOUR BIAS**. in the following cell we do:

*   Load the MNIST dataset
*   Freeze the model's backbone and get it ready for finetuning
*   Divide the dataset into training and test sets






In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.Grayscale(3),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)

valset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
valloader = DataLoader(valset, batch_size=32, shuffle=False, num_workers=4)

model2 = timm.create_model('vit_base_patch16_224', pretrained=True)

for param in model2.parameters():
    param.requires_grad = False

num_ftrs = model2.head.in_features
model2.head = nn.Linear(num_ftrs, 10)

model2.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model2.head.parameters(), lr=0.001)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 13353157.84it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 405816.08it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3752318.87it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4377419.29it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Here we start finetuning the vgg model for 3 epochs

In [None]:
num_epochs = 3
model2.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    with tqdm(total=len(trainloader), desc=f'Epoch {epoch + 1}/{num_epochs}', unit='batch') as pbar:
        for i, (images, labels) in enumerate(trainloader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model2(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            pbar.set_postfix(loss=running_loss / (i + 1))
            pbar.update(1)

Epoch 1/3: 100%|██████████| 1875/1875 [10:38<00:00,  2.93batch/s, loss=0.208]
Epoch 2/3: 100%|██████████| 1875/1875 [10:49<00:00,  2.89batch/s, loss=0.101]
Epoch 3/3: 100%|██████████| 1875/1875 [10:48<00:00,  2.89batch/s, loss=0.0838]


Here we validate on the MNIST test set to check whether our model was properly trained or not

In [None]:
model2.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in tqdm(valloader, desc="Evaluating", unit="batch"):
        images, labels = images.to(device), labels.to(device)
        outputs = model2(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Validation Accuracy on MNIST: {accuracy:.2f}%')

Evaluating: 100%|██████████| 313/313 [01:48<00:00,  2.90batch/s]

Validation Accuracy on MNIST: 97.34%





Now we repeat the process but this time we use the colourized MNIST dataset and we avoid finetuning on this new dataset

In [None]:
repo_url = 'https://github.com/jayaneetha/colorized-MNIST.git'
clone_dir = 'colorized-MNIST'

if not os.path.exists(clone_dir):
    print("Cloning the colorized MNIST repository...")
    Repo.clone_from(repo_url, clone_dir)
    print("Repository cloned successfully.")
else:
    print("Repository already exists. Skipping clone.")

test_data_dir = os.path.join(clone_dir, 'testing')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform_color = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

colorized_mnist_test = datasets.ImageFolder(root=test_data_dir, transform=transform_color)
testloader = DataLoader(colorized_mnist_test, batch_size=32, shuffle=False, num_workers=4)

model.eval()

correct = 0
total = 0
with torch.no_grad():
    for images, labels in tqdm(testloader, desc="Evaluating on Colorized MNIST", unit="batch"):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy2 = 100 * correct / total
print(f'Accuracy on Colorized MNIST test set: {accuracy2:.2f}%')


Cloning the colorized MNIST repository...




Repository cloned successfully.


Evaluating on Colorized MNIST: 100%|██████████| 313/313 [01:48<00:00,  2.88batch/s]

Accuracy on Colorized MNIST test set: 58.16%





We evaluate our model (trained on MNIST) on the colourized MNIST dataset to observe colour biases.


*   colour bias = colorizedMNISTaccuracy/MNISTaccuracy




In [None]:
color_bias = accuracy2/accuracy
print("The colour bias is: ",color_bias)

The colour bias is: 0.597


#  TASK 5 Inductive Biases of Models: Locality Biases


In [None]:
!unzip noised_cifar10_test.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ship/4361.png           
  inflating: ship/8785.png           
  inflating: ship/2874.png           
  inflating: ship/4846.png           
  inflating: ship/4351.png           
  inflating: ship/3848.png           
  inflating: ship/4769.png           
  inflating: ship/202.png            
  inflating: ship/7144.png           
  inflating: ship/8704.png           
  inflating: ship/4497.png           
  inflating: ship/1366.png           
  inflating: ship/6691.png           
  inflating: ship/9720.png           
  inflating: ship/5879.png           
  inflating: ship/6961.png           
  inflating: ship/6747.png           
  inflating: ship/8608.png           
  inflating: ship/8303.png           
  inflating: ship/6474.png           
  inflating: ship/3243.png           
  inflating: ship/4389.png           
  inflating: ship/1897.png           
  inflating: ship/8208.png           
  inflating: ship/1358.

Now we will use more variations of the CIFAR10 dataset to notice the effects of different biases on the accuracy of a model

*   We will once again use a CIFAR10GCustomDataset class that converts our modified cifar10 dataset into a form that our model understands
*   We also use this entire dataset as testing data as we do not have to finetune our model on these sets again



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class CIFAR10GCustomDataset(Dataset):
    def __init__(self, root_dir, class_map, transform=None):
        self.root_dir = root_dir
        self.class_map = class_map
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for class_name, class_idx in class_map.items():
            class_dir = os.path.join(root_dir, class_name)
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                self.image_paths.append(img_path)
                self.labels.append(class_idx)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

data_dir = './'

# Mapping folder names to class indices (CIFAR-10 class names)
class_map = {
    'airplane': 0,
    'automobile': 1,
    'bird': 2,
    'cat': 3,
    'deer': 4,
    'dog': 5,
    'frog': 6,
    'horse': 7,
    'ship': 8,
    'truck': 9
}

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = CIFAR10GCustomDataset(root_dir=data_dir, class_map=class_map, transform=transform)

test_loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=4)



Here we validate the model on this dataset with added noise. we will then compare the accuracies with and without noise in the cifar10 dataset and evaluate how much the accuracy drops of our model

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Validating", unit="batch"):

        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Validation Accuracy: {accuracy:.2f}%')

print("Change in accuracy: ", cifar10acc - accuracy)


Validating: 100%|██████████| 313/313 [01:49<00:00,  2.85batch/s]

Validation Accuracy: 17.63%
Change in accuracy:  79.37





**Now we look at the effect of scrambled images on the accuracy**:
Here we load in the cifar10 dataset with scrambled images

In [None]:
!unzip scrambled_cifar10_test.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ship/4361.png           
  inflating: ship/8785.png           
  inflating: ship/2874.png           
  inflating: ship/4846.png           
  inflating: ship/4351.png           
  inflating: ship/3848.png           
  inflating: ship/4769.png           
  inflating: ship/202.png            
  inflating: ship/7144.png           
  inflating: ship/8704.png           
  inflating: ship/4497.png           
  inflating: ship/1366.png           
  inflating: ship/6691.png           
  inflating: ship/9720.png           
  inflating: ship/5879.png           
  inflating: ship/6961.png           
  inflating: ship/6747.png           
  inflating: ship/8608.png           
  inflating: ship/8303.png           
  inflating: ship/6474.png           
  inflating: ship/3243.png           
  inflating: ship/4389.png           
  inflating: ship/1897.png           
  inflating: ship/8208.png           
  inflating: ship/1358.

In [None]:
dataset = CIFAR10GCustomDataset(root_dir=data_dir, class_map=class_map, transform=transform)

test_loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=4)



Here we validate the model on this dataset with scrambled. we will then compare the accuracies with and without scrambled images in the cifar10 dataset and evaluate how much the accuracy drops of our model

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Validating", unit="batch"):

        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Validation Accuracy: {accuracy:.2f}%')

print("Change in accuracy: ", cifar10acc - accuracy)


Validating: 100%|██████████| 313/313 [01:51<00:00,  2.81batch/s]

Validation Accuracy: 13.04%
Change in accuracy:  83.96000000000001





In [None]:
save_path = "./vitcifar10.pth"
torch.save(model.state_dict(), save_path)
print(f'Model saved to {save_path}')

Model saved to ./vitcifar10.pth


In [7]:
!unzip cifar10_styled_100.zip

Archive:  cifar10_styled_100.zip
  inflating: styled_image_0_20.png   
  inflating: styled_image_5_45.png   
  inflating: styled_image_2_59.png   
  inflating: styled_image_9_71.png   
  inflating: styled_image_0_72.png   
  inflating: styled_image_9_29.png   
  inflating: styled_image_6_69.png   
  inflating: styled_image_7_28.png   
  inflating: styled_image_1_78.png   
  inflating: styled_image_6_84.png   
  inflating: styled_image_6_80.png   
  inflating: styled_image_1_75.png   
  inflating: styled_image_2_47.png   
  inflating: styled_image_7_12.png   
  inflating: styled_image_5_93.png   
  inflating: styled_image_7_55.png   
  inflating: styled_image_6_13.png   
  inflating: styled_image_2_86.png   
  inflating: styled_image_8_44.png   
  inflating: styled_image_6_54.png   
  inflating: styled_image_3_65.png   
  inflating: styled_image_0_27.png   
  inflating: styled_image_9_48.png   
  inflating: styled_image_6_42.png   
  inflating: styled_image_2_38.png   
  inflating: styl

Global Style changes: First we created a dataset that had a style change modelled on cifar 10. the photo we used to introduce a style change was starry night by van gogh. We evaluate our model (trained on CIFAR10) on these new images. Moreover, we then calculate the change in accuracy after introducing a style change

In [14]:
class StyledImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform

        self.image_filenames = [f for f in os.listdir(root_dir) if f.startswith('styled_image_')]

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_filenames[idx])
        image = Image.open(img_name).convert('RGB')
        label = int(self.image_filenames[idx].split('_')[2].split('.')[0])

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

styled_images_path = './'
dataset = StyledImageDataset(root_dir=styled_images_path, transform=transform)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

model.to(device)
model.eval()

correct = 0
total = 0

with torch.no_grad():
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

acc = 100 * correct / total
print(f'Accuracy: {100 * correct / total:.2f}%')


Accuracy: 75.00%


In [15]:
print("Drop in accuracy is: ", cifar10acc - acc)


Drop in accuracy is:  22.450000000000003
