In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
from google.colab import drive


In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Define dataset transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


Mounted at /content/drive


In [None]:
# Load the dataset
dataset = datasets.ImageFolder(root='/content/drive/MyDrive/test2', transform=transform)

# Split the dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
def train_model(model, criterion, optimizer, train_loader, val_loader, epochs=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

            # Calculate training accuracy
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_train_accuracy = 100 * correct_train / total_train


        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_train_accuracy:.2f}%")


    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {100 * correct / total:.2f}%")
    return model

In [None]:
# Step 1: Transfer Learning with ResNet-50
resnet_model = models.resnet50(pretrained=True)
for param in resnet_model.parameters():
    param.requires_grad = False

# Replace the final layer
num_features = resnet_model.fc.in_features
resnet_model.fc = nn.Linear(num_features, len(dataset.classes))

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet_model.fc.parameters(), lr=0.001)



In [None]:
print("Training ResNet-50...")
resnet_model = train_model(resnet_model, criterion, optimizer, train_loader, val_loader)

Training ResNet-50...
Epoch 1/5, Loss: 0.4120, Training Accuracy: 82.95%
Epoch 2/5, Loss: 0.4280, Training Accuracy: 82.39%
Epoch 3/5, Loss: 0.4190, Training Accuracy: 85.80%
Epoch 4/5, Loss: 0.4143, Training Accuracy: 86.36%
Epoch 5/5, Loss: 0.4190, Training Accuracy: 82.95%
Validation Accuracy: 65.91%


In [None]:
# Step 2: Fine-Tuning the last two layers
for param in list(resnet_model.parameters())[-2:]:
    param.requires_grad = True
optimizer = optim.Adam(resnet_model.parameters(), lr=0.0001)


In [None]:
print("Fine-tuning ResNet-50...")
resnet_model = train_model(resnet_model, criterion, optimizer, train_loader, val_loader)


Fine-tuning ResNet-50...
Epoch 1/5, Loss: 0.4421, Training Accuracy: 81.82%
Epoch 2/5, Loss: 0.4399, Training Accuracy: 81.82%
Epoch 3/5, Loss: 0.4308, Training Accuracy: 84.09%
Epoch 4/5, Loss: 0.4352, Training Accuracy: 84.09%
Epoch 5/5, Loss: 0.4228, Training Accuracy: 83.52%
Validation Accuracy: 65.91%


In [None]:
# Step 3: Adding an extra convolutional layer
class CustomResNet(nn.Module):
    def __init__(self, original_model, num_classes):
        super(CustomResNet, self).__init__()
        self.features = nn.Sequential(*list(original_model.children())[:-2])
        self.new_layer = nn.Conv2d(2048, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.new_layer(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


In [None]:
custom_resnet = CustomResNet(resnet_model, len(dataset.classes))
optimizer = optim.Adam(custom_resnet.parameters(), lr=0.0001)

print("Training ResNet-50 with extra layer...")
custom_resnet = train_model(custom_resnet, criterion, optimizer, train_loader, val_loader)

Training ResNet-50 with extra layer...
Epoch 1/5, Loss: 0.6969, Training Accuracy: 57.95%
Epoch 2/5, Loss: 0.5481, Training Accuracy: 76.70%
Epoch 3/5, Loss: 0.4767, Training Accuracy: 76.14%
Epoch 4/5, Loss: 0.4172, Training Accuracy: 80.68%
Epoch 5/5, Loss: 0.4162, Training Accuracy: 82.95%
Validation Accuracy: 68.18%


In [None]:
# Step 4: Apply Vision Transformer (ViT) and record performance
from transformers import AutoImageProcessor, AutoModelForImageClassification

# Load ViT
processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
vit_model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=len(dataset.classes))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:

# Evaluation using Vision Transformer
def vit_train_model(model, criterion, optimizer, train_loader, val_loader, epochs=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    train_loss = []
    val_accuracy = []
    train_accuracy = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

            # Calculate training accuracy
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_train_accuracy = 100 * correct_train / total_train

        train_loss.append(epoch_loss)
        train_accuracy.append(epoch_train_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_train_accuracy:.2f}%")

    # Validation
    model.eval()
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).logits
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    accuracy = 100 * correct_val / total_val
    val_accuracy.append(accuracy)
    print(f"Validation Accuracy: {accuracy:.2f}%")

    return model, train_loss, train_accuracy, val_accuracy

In [None]:

# Train ViT model
optimizer_vit = optim.Adam(vit_model.parameters(), lr=0.0001)
criterion_vit = nn.CrossEntropyLoss()


In [None]:
vit_model, vit_train_loss, vit_train_accuracy, vit_val_accuracy = vit_train_model(vit_model, criterion_vit, optimizer_vit, train_loader, val_loader, epochs=5)


Epoch 1/5, Loss: 0.6602, Training Accuracy: 61.36%
Epoch 2/5, Loss: 0.4614, Training Accuracy: 88.07%
Epoch 3/5, Loss: 0.2958, Training Accuracy: 94.89%
Epoch 4/5, Loss: 0.1667, Training Accuracy: 97.16%
Epoch 5/5, Loss: 0.0984, Training Accuracy: 98.86%
Validation Accuracy: 65.91%
