# Task 5: Inductive Biases of Models: Locality Biases

In [2]:
%%writefile VIT_Model.py
import timm
import torch
import torch.nn as nn


class ViTClassifier(nn.Module):
    def __init__(self, num_classes, pretrained=True, finetune="classifier"):
        super(ViTClassifier, self).__init__()
        self.vit = timm.create_model('vit_base_patch16_224', pretrained=pretrained)
        self.vit.head = nn.Linear(self.vit.head.in_features, num_classes)

        if finetune == "classifier":
            #freezing the backbone
            for param in self.vit.parameters():
                param.requires_grad = False
            #unfreezing the classifier
            for param in self.vit.head.parameters():
                param.requires_grad = True

    def forward(self, x):
        return self.vit(x)

def load_vit_model(num_classes, device):
    model = ViTClassifier(num_classes)
    model = model.to(device)
    return model



Writing VIT_Model.py


In [3]:
%%writefile data.py

import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from PIL import Image

class CustomCIFAR10Dataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(self.root_dir)) 
        self.image_paths = []
        self.labels = []
        
        for idx, class_name in enumerate(self.classes):
            class_dir = os.path.join(self.root_dir, class_name)
            if os.path.isdir(class_dir):
                for img_file in os.listdir(class_dir):
                    img_path = os.path.join(class_dir, img_file)
                    self.image_paths.append(img_path)
                    self.labels.append(idx)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

def get_data_loaders(batch_size, root_dir, img_size=224, num_workers=2):
    # Define transforms for train and validation datasets
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = CustomCIFAR10Dataset(root_dir=os.path.join(root_dir, 'train'), transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

    test_dataset = CustomCIFAR10Dataset(root_dir=os.path.join(root_dir, 'test'), transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_loader, test_loader

def load_data(batch_size, root_dir, img_size=224, num_workers=2):
    train_loader, test_loader = get_data_loaders(batch_size=batch_size, root_dir=root_dir, img_size=img_size, num_workers=num_workers)
    return train_loader, test_loader


Writing data.py


In [4]:
%%writefile Fine_Tune.py

import torch
import torch.optim as optim
import torch.nn as nn
from VIT_Model import load_vit_model
from data import load_data


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 10
batch_size = 64
learning_rate = 1e-4
num_epochs = 3

root_dir = "/kaggle/input/cifar10/cifar10"
train_loader, test_loader = load_data(batch_size=batch_size, root_dir=root_dir)

model = load_vit_model(num_classes, device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

def train_model(model, train_loader, criterion, optimizer, device, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if (i + 1) % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

        print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {running_loss/len(train_loader):.4f}')

    torch.save(model.state_dict(), 'fine_tuned_vit_cifar10.pth')
    print("Model fine-tuned and saved successfully.")

def evaluate_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

train_model(model, train_loader, criterion, optimizer, device, num_epochs)
evaluate_model(model, test_loader, device)


Writing Fine_Tune.py


In [5]:
!python Fine_Tune.py

model.safetensors: 100%|██████████████████████| 346M/346M [00:00<00:00, 383MB/s]
Epoch [1/3], Step [100/782], Loss: 0.8708
Epoch [1/3], Step [200/782], Loss: 0.5909
Epoch [1/3], Step [300/782], Loss: 0.5100
Epoch [1/3], Step [400/782], Loss: 0.4517
Epoch [1/3], Step [500/782], Loss: 0.2669
Epoch [1/3], Step [600/782], Loss: 0.1764
Epoch [1/3], Step [700/782], Loss: 0.3157
Epoch [1/3], Average Loss: 0.5273
Epoch [2/3], Step [100/782], Loss: 0.2288
Epoch [2/3], Step [200/782], Loss: 0.1261
Epoch [2/3], Step [300/782], Loss: 0.1179
Epoch [2/3], Step [400/782], Loss: 0.1835
Epoch [2/3], Step [500/782], Loss: 0.1851
Epoch [2/3], Step [600/782], Loss: 0.2977
Epoch [2/3], Step [700/782], Loss: 0.3492
Epoch [2/3], Average Loss: 0.2039
Epoch [3/3], Step [100/782], Loss: 0.1132
Epoch [3/3], Step [200/782], Loss: 0.2189
Epoch [3/3], Step [300/782], Loss: 0.1418
Epoch [3/3], Step [400/782], Loss: 0.1791
Epoch [3/3], Step [500/782], Loss: 0.1451
Epoch [3/3], Step [600/782], Loss: 0.2028
Epoch [3/3]

In [43]:
import shutil
shutil.make_archive("finetune-cifar10", 'zip', "/kaggle/working/fine_tuned_vit_cifar10")


'/kaggle/working/finetune-cifar10.zip'

In [7]:
%%writefile data.py
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class BaseDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        self.classes = sorted(os.listdir(self.root_dir))  

        for idx, class_name in enumerate(self.classes):
            class_dir = os.path.join(self.root_dir, class_name)
            if os.path.isdir(class_dir):
                for img_file in os.listdir(class_dir):
                    img_path = os.path.join(class_dir, img_file)
                    self.image_paths.append(img_path)
                    self.labels.append(idx)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

def get_data_loader(dataset_class, root_dir, batch_size=64, img_size=224, num_workers=2):
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    dataset = dataset_class(root_dir=root_dir, transform=transform)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    
    return loader

class NoisyCIFAR10Dataset(BaseDataset):
    pass


Overwriting data.py


In [10]:
%%writefile inference.py
import torch
from VIT_Model import load_vit_model
from data import get_data_loader, NoisyCIFAR10Dataset

def run_inference_on_dataset(model_path, dataset_class, root_dir, num_classes=10, batch_size=64, img_size=224, device=None):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = load_vit_model(num_classes, device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    print("Fine-tuned model loaded successfully.")
    loader = get_data_loader(dataset_class, root_dir=root_dir, batch_size=batch_size, img_size=img_size)

    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy on dataset "{root_dir}": {accuracy:.2f}%')

def main(model_path, root_dir, dataset_class=NoisyCIFAR10Dataset):
    run_inference_on_dataset(model_path, dataset_class, root_dir)

if __name__ == "__main__":
    model_path = '/kaggle/working/fine_tuned_vit_cifar10.pth'
    root_dir = '/kaggle/input/cifar-noise-images'
    main(model_path, root_dir)


Overwriting inference.py


In [11]:
# !python inference.py
from inference import main, NoisyCIFAR10Dataset

model_path = '/kaggle/working/fine_tuned_vit_cifar10.pth'
root_dir = '/kaggle/input/cifarbasesubset/Cifar_Basedataset'

# inference
main(model_path, root_dir, NoisyCIFAR10Dataset)

  model.load_state_dict(torch.load(model_path, map_location=device))


Fine-tuned model loaded successfully.
Accuracy on dataset "/kaggle/input/cifarbasesubset/Cifar_Basedataset": 95.40%


In [15]:
# !python inference.py
from inference import main, NoisyCIFAR10Dataset

model_path = '/kaggle/working/fine_tuned_vit_cifar10.pth'
root_dir = '/kaggle/input/cifarnoisyy/CifarNoisy'

# inference
main(model_path, root_dir, NoisyCIFAR10Dataset)

Fine-tuned model loaded successfully.
Accuracy on dataset "/kaggle/input/cifarnoisyy/CifarNoisy": 85.55%


In [13]:
# !python inference.py
from inference import main, NoisyCIFAR10Dataset

model_path = '/kaggle/working/fine_tuned_vit_cifar10.pth'
root_dir = '/kaggle/input/cifarscrambled/Cifar_Scrambled'

# inference
main(model_path, root_dir, NoisyCIFAR10Dataset)

Fine-tuned model loaded successfully.
Accuracy on dataset "/kaggle/input/cifarscrambled/Cifar_Scrambled": 50.60%


In [14]:
# !python inference.py
from inference import main, NoisyCIFAR10Dataset

model_path = '/kaggle/working/fine_tuned_vit_cifar10.pth'
root_dir = '/kaggle/input/cifarstylized/Cifar_StylizedImages'

# inference
main(model_path, root_dir, NoisyCIFAR10Dataset)

Fine-tuned model loaded successfully.
Accuracy on dataset "/kaggle/input/cifarstylized/Cifar_StylizedImages": 43.16%
