# Task 0: Model Selection
* Model Selected: VIT (Timms vit_base_patch16_224)
* Pretrained on: ImageNet-21k
* Parameters: 86.6M

In [9]:
!git clone https://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@github.com/iamzammad/ATML-PA1.git

fatal: destination path 'ATML-PA1' already exists and is not an empty directory.


In [10]:
%cd /kaggle/working/ATML-PA1/VIT
!git pull origin main

/kaggle/working/ATML-PA1/VIT
From https://github.com/iamzammad/ATML-PA1
 * branch            main       -> FETCH_HEAD
Already up to date.


# Task 2: Evaluation on an IID Dataset

**Accuracy of Pre-trained VIT on Cifar-10**

In [12]:
from inference import load_and_evaluate
load_and_evaluate(model_type='pretrained')

Files already downloaded and verified
Files already downloaded and verified
Evaluating pretrained model...
Accuracy on CIFAR-10 test set: 7.50%


**Fine-Tuning VIT on Cifar-10**

In [13]:
!python Fine_Tune.py

Files already downloaded and verified
Files already downloaded and verified
vit.cls_token False
vit.pos_embed False
vit.patch_embed.proj.weight False
vit.patch_embed.proj.bias False
vit.blocks.0.norm1.weight False
vit.blocks.0.norm1.bias False
vit.blocks.0.attn.qkv.weight False
vit.blocks.0.attn.qkv.bias False
vit.blocks.0.attn.proj.weight False
vit.blocks.0.attn.proj.bias False
vit.blocks.0.norm2.weight False
vit.blocks.0.norm2.bias False
vit.blocks.0.mlp.fc1.weight False
vit.blocks.0.mlp.fc1.bias False
vit.blocks.0.mlp.fc2.weight False
vit.blocks.0.mlp.fc2.bias False
vit.blocks.1.norm1.weight False
vit.blocks.1.norm1.bias False
vit.blocks.1.attn.qkv.weight False
vit.blocks.1.attn.qkv.bias False
vit.blocks.1.attn.proj.weight False
vit.blocks.1.attn.proj.bias False
vit.blocks.1.norm2.weight False
vit.blocks.1.norm2.bias False
vit.blocks.1.mlp.fc1.weight False
vit.blocks.1.mlp.fc1.bias False
vit.blocks.1.mlp.fc2.weight False
vit.blocks.1.mlp.fc2.bias False
vit.blocks.2.norm1.weight Fals

**Finetuned Accuracy Backbone frozen**

In [14]:
from inference import load_and_evaluate
load_and_evaluate(model_type='fine-tuned')

Files already downloaded and verified
Files already downloaded and verified
Evaluating fine-tuned model...
Accuracy on CIFAR-10 test set: 94.45%


**Finetuned Accuracy Backbone Unfrozen**

In [30]:
!python inference.py

Files already downloaded and verified
Files already downloaded and verified
Accuracy on CIFAR-10 test set: 96.27%


# Task 3: Evaluation for Domain Generalization

In [2]:
import os
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

def get_pacs_loaders(batch_size=32):
    print("Entered get_pacs_loaders function")
    
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])


    root_dir = '/kaggle/input/pacs-dataset/pacs_data/pacs_data'  

    try:
        pacs_photos = ImageFolder(root=os.path.join(root_dir, 'photo'), transform=transform)
        pacs_art = ImageFolder(root=os.path.join(root_dir, 'art_painting'), transform=transform)
        pacs_cartoon = ImageFolder(root=os.path.join(root_dir, 'cartoon'), transform=transform)
        pacs_sketch = ImageFolder(root=os.path.join(root_dir, 'sketch'), transform=transform)
    except FileNotFoundError as e:
        print(f"Error loading dataset: {e}")
        return

    loaders = {
        'Photos': DataLoader(pacs_photos, batch_size=batch_size, shuffle=True),
        'Art_paintings': DataLoader(pacs_art, batch_size=batch_size, shuffle=True),
        'Cartoons': DataLoader(pacs_cartoon, batch_size=batch_size, shuffle=True),
        'Sketches': DataLoader(pacs_sketch, batch_size=batch_size, shuffle=True),
    }

    print(f"Number of photos: {len(pacs_photos)}")
    print(f"Number of art paintings: {len(pacs_art)}")
    print(f"Number of cartoons: {len(pacs_cartoon)}")
    print(f"Number of sketches: {len(pacs_sketch)}")

    for category, loader in loaders.items():
        images, labels = next(iter(loader))
        print(f"Sample from {category}:")
        print(f"Image batch shape: {images.shape}")
        print(f"Label batch shape: {labels.shape}")
    
    return loaders

if __name__ == "__main__":
    print("Loading PACS dataset...")
    loaders = get_pacs_loaders()



Loading PACS dataset...
Entered get_pacs_loaders function
Number of photos: 1670
Number of art paintings: 2048
Number of cartoons: 2344
Number of sketches: 3929
Sample from Photos:
Image batch shape: torch.Size([32, 3, 224, 224])
Label batch shape: torch.Size([32])
Sample from Art_paintings:
Image batch shape: torch.Size([32, 3, 224, 224])
Label batch shape: torch.Size([32])
Sample from Cartoons:
Image batch shape: torch.Size([32, 3, 224, 224])
Label batch shape: torch.Size([32])
Sample from Sketches:
Image batch shape: torch.Size([32, 3, 224, 224])
Label batch shape: torch.Size([32])


In [5]:
import timm
import torch
import torch.nn as nn


class ViTClassifier(nn.Module):
    def __init__(self, num_classes, pretrained=True, finetune="classifier"):
        super(ViTClassifier, self).__init__()
        self.vit = timm.create_model('vit_base_patch16_224', pretrained=pretrained)
        self.vit.head = nn.Linear(self.vit.head.in_features, num_classes)

        if finetune == "classifier":
            #freezing the backbone
            for param in self.vit.parameters():
                param.requires_grad = False
            #unfreezing the classifier
            for param in self.vit.head.parameters():
                param.requires_grad = True

    def forward(self, x):
        return self.vit(x)

def load_vit_model(num_classes, device):
    model = ViTClassifier(num_classes)
    model = model.to(device)
    return model

In [9]:
import torch

def evaluate_model(model, data_loaders, device):
    model.eval()
    total_accuracy = {}
    
    with torch.no_grad():
        for domain, loader in data_loaders.items():
            correct = 0
            total = 0
            
            for images, labels in loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            
            accuracy = 100 * correct / total
            total_accuracy[domain] = accuracy
            print(f"Accuracy on {domain}: {accuracy:.2f}%")
    
    return total_accuracy

def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    pacs_loaders = get_pacs_loaders(batch_size=64)  
    num_classes = 7 
    model = load_vit_model(num_classes, device)
    accuracy = evaluate_model(model, pacs_loaders, device)
    print("Final Accuracies across domains:", accuracy)

# Run inference
main()


Entered get_pacs_loaders function
Number of photos: 1670
Number of art paintings: 2048
Number of cartoons: 2344
Number of sketches: 3929
Sample from Photos:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Art_paintings:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Cartoons:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Sketches:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Accuracy on Photos: 2.93%
Accuracy on Art_paintings: 11.04%
Accuracy on Cartoons: 3.75%
Accuracy on Sketches: 9.88%
Final Accuracies across domains: {'Photos': 2.934131736526946, 'Art_paintings': 11.03515625, 'Cartoons': 3.7542662116040955, 'Sketches': 9.875286332400101}


In [36]:
!python inference.py

Entered get_pacs_loaders function
Number of photos: 1670
Number of art paintings: 2048
Number of cartoons: 2344
Number of sketches: 3929
Sample from Photos:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Art_paintings:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Cartoons:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Sketches:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
model.safetensors: 100%|██████████████████████| 346M/346M [00:01<00:00, 181MB/s]
Accuracy on Photos: 15.45%
Accuracy on Art_paintings: 17.82%
Accuracy on Cartoons: 17.19%
Accuracy on Sketches: 25.83%
Final Accuracies: {'Photos': 15.449101796407186, 'Art_paintings': 17.822265625, 'Cartoons': 17.19283276450512, 'Sketches': 25.833545431407483}


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
import time

# Assume `load_vit_model` and `get_pacs_loaders` are already defined in cells

def train_model(model, train_loader, criterion, optimizer, device, num_epochs=3):
    model.train()
    scaler = GradScaler()  
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        start_time = time.time() 
        
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            with autocast(): 
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if (batch_idx + 1) % 10 == 0:
                print(f"Epoch [{epoch + 1}/{num_epochs}], Batch [{batch_idx + 1}/{len(train_loader)}], Loss: {loss.item():.4f}")

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = 100 * correct / total
        print(f"Epoch [{epoch + 1}/{num_epochs}] completed in {time.time() - start_time:.2f} seconds. "
              f"Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    pacs_loaders = get_pacs_loaders(batch_size=64)
    
    # domain 'Photos'
    train_loader = pacs_loaders['Photos']
    num_classes = 7 
    model = load_vit_model(num_classes, device)

    for name, param in model.named_parameters():
        print(name, param.requires_grad)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
    train_model(model, train_loader, criterion, optimizer, device, num_epochs=3)

    torch.save(model.state_dict(), 'fine_tuned_vit_pacs.pth')
    print("Model fine-tuned and saved successfully.")

main()


Entered get_pacs_loaders function
Number of photos: 1670
Number of art paintings: 2048
Number of cartoons: 2344
Number of sketches: 3929
Sample from Photos:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Art_paintings:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Cartoons:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Sketches:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
vit.cls_token False
vit.pos_embed False
vit.patch_embed.proj.weight False
vit.patch_embed.proj.bias False
vit.blocks.0.norm1.weight False
vit.blocks.0.norm1.bias False
vit.blocks.0.attn.qkv.weight False
vit.blocks.0.attn.qkv.bias False
vit.blocks.0.attn.proj.weight False
vit.blocks.0.attn.proj.bias False
vit.blocks.0.norm2.weight False
vit.blocks.0.norm2.bias False
vit.blocks.0.mlp.fc1.weight False
vit.blocks.0.mlp.fc1.bias Fals

  scaler = GradScaler()
  with autocast():


Epoch [1/3], Batch [10/27], Loss: 2.0335
Epoch [1/3], Batch [20/27], Loss: 1.7466
Epoch [1/3] completed in 14.23 seconds. Loss: 2.0524, Accuracy: 28.98%
Epoch [2/3], Batch [10/27], Loss: 1.2980
Epoch [2/3], Batch [20/27], Loss: 0.9211
Epoch [2/3] completed in 14.42 seconds. Loss: 1.1251, Accuracy: 71.44%
Epoch [3/3], Batch [10/27], Loss: 0.6090
Epoch [3/3], Batch [20/27], Loss: 0.4847
Epoch [3/3] completed in 14.30 seconds. Loss: 0.6199, Accuracy: 88.14%
Model fine-tuned and saved successfully.


In [13]:
import shutil
shutil.make_archive("fine_tuned_vit_pacs", 'zip', "/kaggle/working/fine_tuned_vit_pacs")

'/kaggle/working/fine_tuned_vit_pacs.zip'

In [14]:
import torch


def evaluate_model(model, data_loaders, device):
    model.eval()  
    total_accuracy = {}
    
    with torch.no_grad(): 
        for domain, loader in data_loaders.items():
            correct = 0
            total = 0
            
            for images, labels in loader:
                images, labels = images.to(device), labels.to(device)  
                outputs = model(images) 
                _, predicted = torch.max(outputs.data, 1) 
                
                total += labels.size(0) 
                correct += (predicted == labels).sum().item()  
            
            accuracy = 100 * correct / total  
            total_accuracy[domain] = accuracy 
            print(f"Accuracy on {domain}: {accuracy:.2f}%")  
    
    return total_accuracy 

def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  

    pacs_loaders = get_pacs_loaders(batch_size=64) 
    num_classes = 7  
    model = load_vit_model(num_classes, device)
    model.load_state_dict(torch.load('fine_tuned_vit_pacs.pth')) 
    model.to(device) 

    accuracy = evaluate_model(model, pacs_loaders, device)
    print("Final Accuracies on PACS Domains:", accuracy)

main()


Entered get_pacs_loaders function
Number of photos: 1670
Number of art paintings: 2048
Number of cartoons: 2344
Number of sketches: 3929
Sample from Photos:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Art_paintings:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Cartoons:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])
Sample from Sketches:
Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])


  model.load_state_dict(torch.load('fine_tuned_vit_pacs.pth'))  # Make sure the path is correct


Accuracy on Photos: 92.34%
Accuracy on Art_paintings: 49.90%
Accuracy on Cartoons: 29.31%
Accuracy on Sketches: 20.64%
Final Accuracies on PACS Domains: {'Photos': 92.33532934131736, 'Art_paintings': 49.90234375, 'Cartoons': 29.308873720136518, 'Sketches': 20.64138457622805}


SVNHH

In [20]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

def get_svhn_data(batch_size=128):
    transform = transforms.Compose([
        transforms.Resize((224, 224)), 
        transforms.ToTensor(),
        # transforms.Normalize((0.5,), (0.5,))
    ])

    train_dataset = datasets.SVHN(root='./datamodule', download=True, split='train', transform=transform)
    test_dataset = datasets.SVHN(root='./datamodule', download=True, split='test', transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

# train_loader, test_loader = get_svhn_data(batch_size=128)


In [21]:
import timm
import torch
import torch.nn as nn

class ViTClassifier(nn.Module):
    def __init__(self, num_classes, pretrained=True, finetune="classifier"):
        super(ViTClassifier, self).__init__()
        self.vit = timm.create_model('vit_base_patch16_224', pretrained=pretrained)
        self.vit.head = nn.Linear(self.vit.head.in_features, num_classes)

        if finetune == "classifier":
            # Freezing the backbone
            for param in self.vit.parameters():
                param.requires_grad = False
            # Unfreezing the classifier
            for param in self.vit.head.parameters():
                param.requires_grad = True

    def forward(self, x):
        return self.vit(x)

def load_vit_model(num_classes, device):
    model = ViTClassifier(num_classes)
    model = model.to(device)
    return model

# Example usage (to be run in another cell if needed):
# num_classes = 7  # Adjust based on your dataset
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = load_vit_model(num_classes, device)


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from timm import create_model
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.cuda.amp import autocast, GradScaler 

# Function to get SVHN data
def get_svhn_data(batch_size=128):
    transform = transforms.Compose([
        transforms.Resize((224, 224)), 
        transforms.ToTensor(),
    ])

    train_dataset = datasets.SVHN(root='./datamodule', download=True, split='train', transform=transform)
    test_dataset = datasets.SVHN(root='./datamodule', download=True, split='test', transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def fine_tune_vit(num_epochs=3, learning_rate=1e-4, batch_size=32):
    """Fine-tune a pre-trained ViT model on the SVHN dataset."""
    
    train_loader, test_loader = get_svhn_data(batch_size=batch_size)
    model = create_model('vit_base_patch16_224', pretrained=True, num_classes=10)  # SVHN has 10 classes
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scaler = GradScaler()

    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            if i % 100 == 99:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
                running_loss = 0.0

    torch.save(model.state_dict(), 'fine_tuned_vit_svhn.pth')
    print("Model fine-tuned and saved successfully.")

fine_tune_vit(batch_size=64)


Downloading http://ufldl.stanford.edu/housenumbers/train_32x32.mat to ./datamodule/train_32x32.mat
100%|███████████████████████| 182040794/182040794 [00:05<00:00, 30664021.04it/s]
Downloading http://ufldl.stanford.edu/housenumbers/test_32x32.mat to ./datamodule/test_32x32.mat
100%|█████████████████████████| 64275384/64275384 [00:04<00:00, 15330836.16it/s]
model.safetensors: 100%|██████████████████████| 346M/346M [00:01<00:00, 243MB/s]
  scaler = GradScaler()
  with autocast():
Epoch [1/3], Step [100/1145], Loss: 2.2939
Epoch [1/3], Step [200/1145], Loss: 2.2622
Epoch [1/3], Step [300/1145], Loss: 2.2500
Epoch [1/3], Step [400/1145], Loss: 2.2515
Epoch [1/3], Step [500/1145], Loss: 2.2586
Epoch [1/3], Step [600/1145], Loss: 2.2602
Epoch [1/3], Step [700/1145], Loss: 2.2540
Epoch [1/3], Step [800/1145], Loss: 2.2489
Epoch [1/3], Step [900/1145], Loss: 2.2911
Epoch [1/3], Step [1000/1145], Loss: 2.2499
Epoch [1/3], Step [1100/1145], Loss: 2.2495
Epoch [2/3], Step [100/1145], Loss: 2.2409


In [7]:
import torch
import torch.nn as nn
from timm import create_model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def inference_vit(batch_size=32):
    """Run inference on the SVHN test dataset using the fine-tuned ViT model."""
    
    _, test_loader = get_svhn_data(batch_size=batch_size)

    model = create_model('vit_base_patch16_224', pretrained=False, num_classes=10)  # 10 classes
    model.load_state_dict(torch.load('fine_tuned_vit_svhn.pth')) 
    model = model.to(device)
    model.eval()

    criterion = nn.CrossEntropyLoss()

    correct = 0
    total = 0
    test_loss = 0.0

    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            loss = criterion(outputs, labels)
            test_loss += loss.item()
    
    accuracy = 100 * correct / total
    avg_test_loss = test_loss / len(test_loader)
    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Average Test Loss: {avg_test_loss:.4f}')

inference_vit(batch_size=32) 

Using downloaded and verified file: ./datamodule/train_32x32.mat
Using downloaded and verified file: ./datamodule/test_32x32.mat
  model.load_state_dict(torch.load('fine_tuned_vit_svhn.pth'))  # Load fine-tuned weights
Test Accuracy: 93.44%
Average Test Loss: 0.2232
