In [None]:
!pip install -U efficientnet
!pip install tensorflow
# !pip install efficientnet_pytorch

In [None]:
from google.colab import userdata, files
import efficientnet.keras as efn
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

import torch
from torchvision import datasets, transforms
from torch.utils.data import WeightedRandomSampler
import torch.nn as nn
import torch.optim as optim
import torchvision

# Load the Kaggle Data

In [None]:
os.environ["KAGGLE_KEY"] = userdata.get('KAGGLE_KEY')
os.environ["KAGGLE_USERNAME"] = userdata.get('KAGGLE_USERNAME')

In [None]:
! pip install -q kaggle

In [None]:
! kaggle datasets list

In [None]:
! kaggle competitions download -c bttai-ajl-2025

In [None]:
! mkdir kaggle_data

In [None]:
! unzip bttai-ajl-2025.zip -d kaggle_data

# Data Preparation

In [None]:
# Global Variables

train_dir = '/content/kaggle_data/train/train'
test_dir = '/content/kaggle_data/test/test'
IMG_SIZE = 224
BATCH_SIZE = 256
# 16, 32, 64, 128, 256, 512, 1024+ for potential batch size
# Start small, increase gradually, monitor stability
epochs = 25 # tested with 20 for EfficientNet7 and it was 62% which is slightly lower than previous models
# For epochs, 10–50 for small datasets
# Start with a larger number, use early stopping to avoid overfitting
VALIDATION_SPLIT = 0.2

In [None]:
# Define transformations
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load training data
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)

# Create a DataLoader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
# Compute class weights
class_counts = np.array([len(os.listdir(os.path.join(train_dir, cls))) for cls in train_dataset.classes])
class_weights = 1. / class_counts
sample_weights = class_weights[train_dataset.targets]

# Create a weighted sampler
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(train_dataset), replacement=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, sampler=sampler)

import torch, gc

torch.cuda.empty_cache()
gc.collect()



5897

# Model Training

In [None]:
import torch, gc
import torch.nn as nn
import torch.optim as optim
import torchvision

# Restart runtime first to free all GPU memory!

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Clear any cached memory (just in case)
torch.cuda.empty_cache()
gc.collect()

# Load pretrained EfficientNet_V2_M (≈10 GB VRAM)
weights = torchvision.models.EfficientNet_V2_M_Weights.IMAGENET1K_V1
model = torchvision.models.efficientnet_v2_m(weights=weights)

# Replace classification head
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, len(train_dataset.classes))

model = model.to(device)

# Loss + optimizer
criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights, dtype=torch.float32, device=device))
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    torch.cuda.empty_cache()
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Save checkpoint
torch.save({
    'model_state_dict': model.state_dict(),
    'class_to_idx': train_dataset.class_to_idx
}, 'efficientnet_v2_m_finetuned.pth')


Epoch [1/25], Loss: 2.2617
Epoch [2/25], Loss: 1.1013
Epoch [3/25], Loss: 0.5926
Epoch [4/25], Loss: 0.4054
Epoch [5/25], Loss: 0.2495
Epoch [6/25], Loss: 0.1835
Epoch [7/25], Loss: 0.1376
Epoch [8/25], Loss: 0.1117
Epoch [9/25], Loss: 0.0931
Epoch [10/25], Loss: 0.0729
Epoch [11/25], Loss: 0.0729
Epoch [12/25], Loss: 0.0744
Epoch [13/25], Loss: 0.0543
Epoch [14/25], Loss: 0.0519
Epoch [15/25], Loss: 0.0462
Epoch [16/25], Loss: 0.0406
Epoch [17/25], Loss: 0.0348
Epoch [18/25], Loss: 0.0453
Epoch [19/25], Loss: 0.0327
Epoch [20/25], Loss: 0.0260
Epoch [21/25], Loss: 0.0332
Epoch [22/25], Loss: 0.0281
Epoch [23/25], Loss: 0.0303
Epoch [24/25], Loss: 0.0532
Epoch [25/25], Loss: 0.0447


In [None]:
import torch

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

train_accuracy = correct / total
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")

Training Accuracy: 99.27%


# Evaluation

In [None]:
checkpoint = torch.load('vit_l_16_finetuned.pth', map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)
model.eval()

# Prepare test images list
test_images = [os.path.join(test_dir, img) for img in os.listdir(test_dir)]

predictions = []
hashes = []

for img_path in test_images:
    # Filename (without extension) as hash
    hashes.append(os.path.splitext(os.path.basename(img_path))[0])

    # Load + transform
    img = Image.open(img_path).convert('RGB')
    img = train_transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(img)
        pred_idx = output.argmax(dim=1).item()
        predictions.append(train_dataset.classes[pred_idx])

# Write to CSV
df = pd.DataFrame({'md5hash': hashes, 'label': predictions})
df.to_csv('vit_predictions.csv', index=False)

In [None]:
# Download the files locally
from google.colab import files
# files.download('vit_l_16_finetuned.pth') # change name of model
files.download('efficientnet_v2_m_finetuned.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>