In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
import torchvision.models as models

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [3]:
# Best hyperparameters
patch_size = 8
embedding_dim = 256
num_heads = 4
num_layers = 6
learning_rate = 5e-5
batch_size = 16
num_epochs = 10  


In [4]:
# CIFAR-10 dataset
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 84830153.77it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [5]:
class VisionTransformer(nn.Module):
    def __init__(self, num_classes, patch_size, embedding_dim, num_heads, num_layers):
        super(VisionTransformer, self).__init__()
        self.patch_embedding = nn.Conv2d(3, embedding_dim, kernel_size=patch_size, stride=patch_size)
        self.positional_encoding = nn.Parameter(torch.randn(1, (128 // patch_size) ** 2 + 1, embedding_dim))
        self.transformer_layers = nn.ModuleList([
            nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=num_heads) for _ in range(num_layers)
        ])
        self.fc = nn.Linear(embedding_dim, num_classes)

    def forward(self, x):
        batch_size = x.size(0)
        x = self.patch_embedding(x)
        x = x.flatten(2).transpose(1, 2)
        x = torch.cat((x, self.positional_encoding[:, :x.size(1), :].repeat(batch_size, 1, 1)), dim=1)
        for layer in self.transformer_layers:
            x = layer(x)
        x = x.mean(dim=1)
        x = self.fc(x)
        return x

# Initialize the model
model = VisionTransformer(num_classes=10, patch_size=patch_size, embedding_dim=embedding_dim, 
                          num_heads=num_heads, num_layers=num_layers).to(device)


In [6]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


In [7]:
# Training loop
best_accuracy = 0.0
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    correct = 0
    total_samples = 0
    
    # Use tqdm for progress bar
    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f'Epoch {epoch + 1}/{num_epochs}')
    
    for i, (images, labels) in progress_bar:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track training accuracy
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)

        # Track total loss
        total_loss += loss.item()

        # Update progress bar
        progress_bar.set_postfix({'Loss': loss.item(), 'Accuracy': (correct / total_samples) * 100})

    # Evaluate on the validation set
    model.eval()
    correct = 0
    total_samples = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    # Calculate accuracy
    accuracy = correct / total_samples * 100

    # Print validation accuracy for each epoch
    print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Accuracy: {accuracy:.2f}%')

    # Save the model if it has the best accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        # Save model to /kaggle/working/
        torch.save(model.state_dict(), '/kaggle/working/best_model.pth')
        print(f'Saving model with validation accuracy: {best_accuracy:.2f}%')


Epoch 1/10: 100%|██████████| 3125/3125 [08:26<00:00,  6.17it/s, Loss=1.63, Accuracy=30.3]


Epoch [1/10], Validation Accuracy: 37.45%
Saving model with validation accuracy: 37.45%


Epoch 2/10: 100%|██████████| 3125/3125 [08:27<00:00,  6.15it/s, Loss=1.57, Accuracy=41.3]


Epoch [2/10], Validation Accuracy: 44.08%
Saving model with validation accuracy: 44.08%


Epoch 3/10: 100%|██████████| 3125/3125 [08:27<00:00,  6.16it/s, Loss=1.42, Accuracy=47.8] 


Epoch [3/10], Validation Accuracy: 50.34%
Saving model with validation accuracy: 50.34%


Epoch 4/10: 100%|██████████| 3125/3125 [08:26<00:00,  6.16it/s, Loss=1.93, Accuracy=51.8] 


Epoch [4/10], Validation Accuracy: 53.31%
Saving model with validation accuracy: 53.31%


Epoch 5/10: 100%|██████████| 3125/3125 [08:27<00:00,  6.16it/s, Loss=1.22, Accuracy=54.9] 


Epoch [5/10], Validation Accuracy: 55.96%
Saving model with validation accuracy: 55.96%


Epoch 6/10: 100%|██████████| 3125/3125 [08:27<00:00,  6.16it/s, Loss=1.55, Accuracy=57.2] 


Epoch [6/10], Validation Accuracy: 58.42%
Saving model with validation accuracy: 58.42%


Epoch 7/10: 100%|██████████| 3125/3125 [08:27<00:00,  6.16it/s, Loss=1.44, Accuracy=59.2] 


Epoch [7/10], Validation Accuracy: 60.60%
Saving model with validation accuracy: 60.60%


Epoch 8/10: 100%|██████████| 3125/3125 [08:27<00:00,  6.16it/s, Loss=0.762, Accuracy=60.9]


Epoch [8/10], Validation Accuracy: 62.63%
Saving model with validation accuracy: 62.63%


Epoch 9/10: 100%|██████████| 3125/3125 [08:26<00:00,  6.17it/s, Loss=1.25, Accuracy=62.6] 


Epoch [9/10], Validation Accuracy: 62.99%
Saving model with validation accuracy: 62.99%


Epoch 10/10: 100%|██████████| 3125/3125 [08:26<00:00,  6.16it/s, Loss=0.902, Accuracy=63.6]


Epoch [10/10], Validation Accuracy: 63.86%
Saving model with validation accuracy: 63.86%


In [8]:
# Load the best model
model.load_state_dict(torch.load('/kaggle/working/best_model.pth'))

# Evaluate on the test set
model.eval()
correct = 0
total_samples = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)

# Calculate accuracy
vit_accuracy = correct / total_samples * 100
print(f'Vision Transformer Test Accuracy: {vit_accuracy:.2f}%')


Vision Transformer Test Accuracy: 63.86%


In [11]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# ImageNet normalization parameters
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


In [12]:
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from tqdm import tqdm


def fine_tune_and_evaluate(model, train_loader, test_loader, num_epochs=5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()
        for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    # Evaluation
    model.eval()
    correct = 0
    total_samples = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
    accuracy = correct / total_samples * 100
    return accuracy

# Load pre-trained ResNet-18 and VGG-16, replace final layer, and fine-tune
resnet18 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 10)
resnet18_accuracy = fine_tune_and_evaluate(resnet18, train_loader, test_loader)
print(f'Fine-Tuned ResNet-18 Test Accuracy: {resnet18_accuracy:.2f}%')

vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, 10)
vgg16_accuracy = fine_tune_and_evaluate(vgg16, train_loader, test_loader)
print(f'Fine-Tuned VGG-16 Test Accuracy: {vgg16_accuracy:.2f}%')


Epoch 1/5: 100%|██████████| 3125/3125 [02:41<00:00, 19.31it/s]
Epoch 2/5: 100%|██████████| 3125/3125 [02:41<00:00, 19.40it/s]
Epoch 3/5: 100%|██████████| 3125/3125 [02:41<00:00, 19.40it/s]
Epoch 4/5: 100%|██████████| 3125/3125 [02:40<00:00, 19.41it/s]
Epoch 5/5: 100%|██████████| 3125/3125 [02:40<00:00, 19.42it/s]


Fine-Tuned ResNet-18 Test Accuracy: 94.15%


Epoch 1/5: 100%|██████████| 3125/3125 [13:35<00:00,  3.83it/s]
Epoch 2/5: 100%|██████████| 3125/3125 [13:33<00:00,  3.84it/s]
Epoch 3/5: 100%|██████████| 3125/3125 [13:32<00:00,  3.85it/s]
Epoch 4/5: 100%|██████████| 3125/3125 [13:31<00:00,  3.85it/s]
Epoch 5/5: 100%|██████████| 3125/3125 [13:30<00:00,  3.85it/s]


Fine-Tuned VGG-16 Test Accuracy: 88.98%


In [13]:
print(f'Vision Transformer Test Accuracy: {vit_accuracy:.2f}%')
print(f'ResNet-18 Test Accuracy: {resnet18_accuracy:.2f}%')
print(f'VGG-16 Test Accuracy: {vgg16_accuracy:.2f}%')


Vision Transformer Test Accuracy: 63.86%
ResNet-18 Test Accuracy: 94.15%
VGG-16 Test Accuracy: 88.98%
