In [None]:
from PIL import Image
from torchvision import transforms

import pandas as pd

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import os

class CustomDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None):
        self.df = pd.read_csv(csv_path)[["filename", 'Region_ID']]
        self.img_dir = img_dir
        self.transform = transform or transforms.ToTensor()
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.df.iloc[idx]['filename'])
        with Image.open(img_name) as img:
            if self.transform:
                img = self.transform(img)
        label = self.df.iloc[idx]['Region_ID']
        return img, label-1

# Initialize dataset and dataloader
train_csv = "/kaggle/input/smai-25-sec-a-project-phase-2-region-id-prediction/labels_train.csv"
train_img_dir = "/kaggle/input/smai-25-sec-a-project-phase-2-region-id-prediction/images_train/images_train/"


val_csv = "/kaggle/input/smai-25-sec-a-project-phase-2-region-id-prediction/labels_val.csv"
val_img_dir = "/kaggle/input/smai-25-sec-a-project-phase-2-region-id-prediction/images_val/images_val/"


train_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomHorizontalFlip(),
    # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    # transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    # transforms.RandomGrayscale(p=0.05),
    # transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0)),
    # transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # transforms.RandomErasing(p=0.3, scale=(0.02, 0.2))
])

val_transform = transforms.Compose([  # Keep validation simple
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
# Initialize datasets
train_dataset = CustomDataset(train_csv, train_img_dir, transform=train_transform)
val_dataset = CustomDataset(val_csv, val_img_dir, transform=val_transform)

# DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.models import convnext_base, ConvNeXt_Base_Weights

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import time

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Setup data loaders
# Assuming train_dataset and val_dataset are already defined

batch_size=32
num_workers=4
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True if torch.cuda.is_available() else False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True if torch.cuda.is_available() else False
)

Using device: cuda


In [5]:
from torchvision.models import convnext_base, ConvNeXt_Base_Weights

def build_model(num_regions):
    # 1. Load Base model (1024-D features)
    model = convnext_base(weights=ConvNeXt_Base_Weights.IMAGENET1K_V1)
    
    # 2. Ensure classifier matches Base's dimensions
    model.classifier = nn.Sequential(
        nn.AdaptiveAvgPool2d(1),
        nn.Flatten(),
        nn.LayerNorm(1024),  # ← Must be 1024 for Base
        nn.Linear(1024, num_regions)  # ← Input dim 1024
    )
    return model

In [None]:
# Training function
def train_model(model, train_loader, val_loader, num_regions, num_epochs=10):
    model = model.to(device)
    
    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    # Use AdamW which typically works better with ConvNeXt models
    optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.05)
    
    # Learning rate scheduler - reduces learning rate when validation loss plateaus
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.1, patience=2, verbose=True
    )
    
    # Training metrics tracking
    best_val_acc = 0.0
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    
    for epoch in range(num_epochs):
        start_time = time.time()
        
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        train_bar = tqdm(train_loader, desc="Training")
        
        for inputs, labels in train_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero the gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Track metrics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Update progress bar
            train_bar.set_postfix(loss=loss.item(), acc=correct/total)
        
        # Calculate epoch statistics
        epoch_train_loss = running_loss / len(train_loader.dataset)
        epoch_train_acc = 100. * correct / total
        train_losses.append(epoch_train_loss)
        train_accs.append(epoch_train_acc)
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            val_bar = tqdm(val_loader, desc="Validation")
            for inputs, labels in val_bar:
                inputs, labels = inputs.to(device), labels.to(device)
                
                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # Track metrics
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
                
                # Update progress bar
                val_bar.set_postfix(loss=loss.item(), acc=val_correct/val_total)
        
        # Calculate epoch statistics
        epoch_val_loss = val_loss / len(val_loader.dataset)
        epoch_val_acc = 100. * val_correct / val_total
        val_losses.append(epoch_val_loss)
        val_accs.append(epoch_val_acc)
        
        # Update learning rate based on validation loss
        scheduler.step(epoch_val_loss)
        
        # Save best model
        if epoch_val_acc > best_val_acc:
            best_val_acc = epoch_val_acc
            torch.save(model.state_dict(), "/kaggle/working/best_convnext_region_classifier.pth")
            print(f"New best model saved with validation accuracy: {best_val_acc:.2f}%")
        
        # Print epoch summary
        time_taken = time.time() - start_time
        print(f"Epoch {epoch+1}/{num_epochs} completed in {time_taken:.2f}s")
        print(f"Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.2f}%")
        print(f"Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.2f}%")
        print("-" * 50)
    
    # Plot training history
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Loss Curves')
    
    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='Train Accuracy')
    plt.plot(val_accs, label='Val Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    plt.title('Accuracy Curves')
    
    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()
    
    return model, train_losses, val_losses, train_accs, val_accs

# Function to evaluate model on validation set
def evaluate_model(model, val_loader):
    model = model.to(device)
    model.eval()
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate accuracy
    accuracy = 100. * np.mean(np.array(all_preds) == np.array(all_labels))
    print(f"Validation Accuracy: {accuracy:.2f}%")
    
    # Could add more metrics like confusion matrix, per-class accuracy, etc.
    return accuracy, all_preds, all_labels


# Configure parameters
batch_size = 32
num_epochs = 15

# Get the number of unique regions from the dataset
# Placeholder: Replace with actual code to get num_regions
# num_regions = len(set(train_dataset.targets))
num_regions = 15  # Example value, replace with actual number of regions

# Build model
model = build_model(num_regions)

# Train model
model, train_losses, val_losses, train_accs, val_accs = train_model(
    model, train_loader, val_loader, num_regions, num_epochs
)

# Load best model
model.load_state_dict(torch.load("best_convnext_region_classifier.pth"), weights_only=True)

# Evaluate on validation set
accuracy, all_preds, all_labels = evaluate_model(model, val_loader)

print(f"Final validation accuracy: {accuracy:.2f}%")

# Save the final model
torch.save({
    'model_state_dict': model.state_dict(),
    'num_regions': num_regions,
    'accuracy': accuracy
}, "/kaggle/working/final_convnext_region_classifier.pth")



Epoch 1/15


Training: 100%|██████████| 205/205 [05:03<00:00,  1.48s/it, acc=0.616, loss=0.599]
Validation: 100%|██████████| 12/12 [00:03<00:00,  3.82it/s, acc=0.84, loss=0.333] 


New best model saved with validation accuracy: 84.01%
Epoch 1/15 completed in 307.08s
Train Loss: 1.2431, Train Acc: 61.56%
Val Loss: 0.5372, Val Acc: 84.01%
--------------------------------------------------
Epoch 2/15


Training: 100%|██████████| 205/205 [05:01<00:00,  1.47s/it, acc=0.903, loss=0.2]  
Validation: 100%|██████████| 12/12 [00:02<00:00,  4.01it/s, acc=0.892, loss=0.208]


New best model saved with validation accuracy: 89.16%
Epoch 2/15 completed in 305.76s
Train Loss: 0.3516, Train Acc: 90.29%
Val Loss: 0.3010, Val Acc: 89.16%
--------------------------------------------------
Epoch 3/15


Training: 100%|██████████| 205/205 [05:02<00:00,  1.47s/it, acc=0.968, loss=0.0588]
Validation: 100%|██████████| 12/12 [00:02<00:00,  4.06it/s, acc=0.908, loss=0.0813]


New best model saved with validation accuracy: 90.79%
Epoch 3/15 completed in 305.80s
Train Loss: 0.1310, Train Acc: 96.76%
Val Loss: 0.2950, Val Acc: 90.79%
--------------------------------------------------
Epoch 4/15


Training: 100%|██████████| 205/205 [05:02<00:00,  1.47s/it, acc=0.988, loss=0.0138] 
Validation:   0%|          | 0/12 [00:00<?, ?it/s]