# Task 4: Inductive Biases of Models: Semantic Biases
# 

In [1]:
import timm
import torch
import torch.nn as nn

class ViTClassifier(nn.Module):
    def __init__(self, num_classes, pretrained=True, finetune="classifier"):
        super(ViTClassifier, self).__init__()
        self.vit = timm.create_model('vit_base_patch16_224', pretrained=pretrained)
        self.vit.head = nn.Linear(self.vit.head.in_features, num_classes)

        if finetune == "classifier":
            # Freezing the backbone
            for param in self.vit.parameters():
                param.requires_grad = False
            # Unfreezing the classifier
            for param in self.vit.head.parameters():
                param.requires_grad = True

    def forward(self, x):
        return self.vit(x)

def load_vit_model(num_classes, device):
    model = ViTClassifier(num_classes)
    model = model.to(device)
    return model

In [9]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

IMAGE_SIZE = 224
TRAIN_TFMS = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

TEST_TFMS = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
])

class Animal10Dataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.class_to_idx = {label: idx for idx, label in enumerate(os.listdir(root_dir))}
        
        for label in os.listdir(root_dir):
            label_dir = os.path.join(root_dir, label)
            if os.path.isdir(label_dir):
                for img_file in os.listdir(label_dir):
                    img_path = os.path.join(label_dir, img_file)
                    self.image_paths.append(img_path)
                    self.labels.append(self.class_to_idx[label])  

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        
        try:
            image = Image.open(img_path).convert('RGB')
        except (OSError, FileNotFoundError):
            print(f"Skipping invalid image: {img_path}")
            return self.__getitem__((idx + 1) % len(self.image_paths))  

        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label) 

# Function to get DataLoader
def get_dataloader(root_dir, batch_size=32, num_workers=1):
    """Load the Animal10 dataset and return train and test DataLoader."""
    train_dataset = Animal10Dataset(os.path.join(root_dir, 'raw-img'), transform=TRAIN_TFMS)
    test_dataset = Animal10Dataset(os.path.join(root_dir, 'raw-img'), transform=TEST_TFMS)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_loader, test_loader


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from timm import create_model
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler 

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def fine_tune_vit(num_epochs=3, learning_rate=1e-4, batch_size=32, root_dir='/kaggle/input/animals10'):
    """Fine-tune a pre-trained ViT model on the Animal10 dataset."""
    
    train_loader, test_loader = get_dataloader(root_dir=root_dir, batch_size=batch_size)
    model = ViTClassifier(num_classes=10) 
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scaler = GradScaler()

    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()

            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            if i % 100 == 99:  
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
                running_loss = 0.0

    torch.save(model.state_dict(), 'fine_tuned_vit_animal10.pth')
    print("Model fine-tuned and saved successfully.")

fine_tune_vit(num_epochs=3, batch_size=64, root_dir='/kaggle/input/animals10')  # Adjust batch size as needed


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

  scaler = GradScaler()
  with autocast():


Epoch [1/3], Step [100/410], Loss: 1.3425
Epoch [1/3], Step [200/410], Loss: 0.4062
Epoch [1/3], Step [300/410], Loss: 0.2227
Epoch [1/3], Step [400/410], Loss: 0.1561
Epoch [2/3], Step [100/410], Loss: 0.1151
Epoch [2/3], Step [200/410], Loss: 0.0909
Epoch [2/3], Step [300/410], Loss: 0.0867
Epoch [2/3], Step [400/410], Loss: 0.0732
Epoch [3/3], Step [100/410], Loss: 0.0662
Epoch [3/3], Step [200/410], Loss: 0.0655
Epoch [3/3], Step [300/410], Loss: 0.0591
Epoch [3/3], Step [400/410], Loss: 0.0601
Model fine-tuned and saved successfully.


In [None]:
import shutil
shutil.make_archive("finetune-animals10", 'zip', "/kaggle/working/fine_tuned_vit_animal10")


In [13]:
import torch
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def inference_vit(batch_size=32, dataset_dir='/kaggle/input/animals10'):
    """Run inference on the provided dataset using the fine-tuned ViT model."""

    _, test_loader = get_dataloader(root_dir=dataset_dir, batch_size=batch_size)

    model = ViTClassifier(num_classes=10) 
    model.load_state_dict(torch.load('fine_tuned_vit_animal10.pth'))  
    model = model.to(device)
    model.eval()

    correct = 0
    total = 0
    predictions = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Running Inference", unit="batch"):
            inputs = inputs.to(device)
            labels = labels.to(device)  
            
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            predictions.extend(zip(labels.cpu().numpy(), predicted.cpu().numpy()))

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total if total > 0 else 0
    print(f'Test Accuracy: {accuracy:.2f}%')

print("\nInference on Full Animals dataset:")
inference_vit(batch_size=32, dataset_dir='/kaggle/input/animals10')  



Inference on Full Animals dataset:


  model.load_state_dict(torch.load('fine_tuned_vit_animal10.pth'))  # Load fine-tuned weights
Running Inference: 100%|██████████| 819/819 [02:44<00:00,  4.98batch/s]

Test Accuracy: 98.87%





INFERENCE ON SUBSET MAIN DATA

In [18]:
import os
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from tqdm import tqdm 
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Custom Dataset for loading images
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_to_idx = {name: idx for idx, name in enumerate(os.listdir(root_dir))}
        
        for label in os.listdir(root_dir):
            label_dir = os.path.join(root_dir, label)
            if os.path.isdir(label_dir):
                for img_file in os.listdir(label_dir):
                    img_path = os.path.join(label_dir, img_file)
                    self.images.append(img_path)
                    self.labels.append(self.class_to_idx[label]) 

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        label = self.labels[idx]
        return image, label

def get_transform():
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

def get_dataloader(root_dir, batch_size):
    dataset = CustomDataset(root_dir=root_dir, transform=get_transform())
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    return dataloader

def inference_vit(model, dataset_dir, batch_size=32):
    """Run inference on the provided dataset using the fine-tuned ViT model."""
    
    test_loader = get_dataloader(root_dir=dataset_dir, batch_size=batch_size)
    model.eval()

    correct = 0
    total = 0
    predictions = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Running Inference", unit="batch"):
            inputs = inputs.to(device)
            labels = labels.to(device)  
            
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            predictions.extend(zip(labels.cpu().numpy(), predicted.cpu().numpy()))

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total if total > 0 else 0
    print(f'Test Accuracy: {accuracy:.2f}%')

def run_inference_on_dataset(model_path, dataset_dir, batch_size=32):
    model = ViTClassifier(num_classes=10)
    model.load_state_dict(torch.load(model_path))  
    model = model.to(device)
    
    print("\nRunning inference...")
    inference_vit(model, dataset_dir=dataset_dir, batch_size=batch_size)


model_path = 'fine_tuned_vit_animal10.pth'
dataset_dir = '/kaggle/input/animals-base-shape/val 2'
run_inference_on_dataset(model_path, dataset_dir, batch_size=32)


  model.load_state_dict(torch.load(model_path))  # Load fine-tuned weights



Running inference...


Running Inference: 100%|██████████| 32/32 [00:09<00:00,  3.23batch/s]

Test Accuracy: 99.10%





In [19]:
new_dataset_dir = '/kaggle/input/canny-animals/Canny_Animals'  # Change this path to your new dataset
run_inference_on_dataset(model_path='fine_tuned_vit_animal10.pth', dataset_dir=new_dataset_dir, batch_size=32)


  model.load_state_dict(torch.load(model_path))  # Load fine-tuned weights



Running inference...


Running Inference: 100%|██████████| 32/32 [00:12<00:00,  2.59batch/s]

Test Accuracy: 57.00%





In [20]:
new_dataset_dir = '/kaggle/input/base-animal-style-and-color/small_animal_dataset_updated'  # Change this path to your new dataset
run_inference_on_dataset(model_path='fine_tuned_vit_animal10.pth', dataset_dir=new_dataset_dir, batch_size=32)


  model.load_state_dict(torch.load(model_path))  # Load fine-tuned weights



Running inference...


Running Inference: 100%|██████████| 31/31 [00:11<00:00,  2.62batch/s]

Test Accuracy: 99.90%





In [23]:
new_dataset_dir = '/kaggle/input/stylized-animals/Stylized_images'  # Change this path to your new dataset
run_inference_on_dataset(model_path='fine_tuned_vit_animal10.pth', dataset_dir=new_dataset_dir, batch_size=32)

  model.load_state_dict(torch.load(model_path))  # Load fine-tuned weights



Running inference...


Running Inference: 100%|██████████| 11/11 [00:03<00:00,  2.78batch/s]

Test Accuracy: 16.29%





In [25]:
new_dataset_dir = '/kaggle/input/grayscale-animals'  # Change this path to your new dataset
run_inference_on_dataset(model_path='fine_tuned_vit_animal10.pth', dataset_dir=new_dataset_dir, batch_size=32)

  model.load_state_dict(torch.load(model_path))  # Load fine-tuned weights



Running inference...


Running Inference: 100%|██████████| 16/16 [00:04<00:00,  3.63batch/s]

Test Accuracy: 99.60%



