In [None]:
import pandas as pd
import os
labels_train=pd.read_csv("labels_train.csv")
labels_val=pd.read_csv("labels_val.csv")
print(labels_train.head())
print(labels_val.head())
# Print shape
print(labels_train.shape)
print(labels_val.shape)

In [None]:

import seaborn as sns
import matplotlib.pyplot as plt
# Boxplot of regionID
plt.figure(figsize=(12, 6))
sns.boxplot(x=labels_train['Region_ID'])
plt.title('RegionID Distribution')
plt.show()

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import convnext_tiny, ConvNeXt_Tiny_Weights
from torchvision import models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os

# filename timestamp  latitude  longitude  angle  Region_ID
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class SMAIDataset(Dataset):
    def __init__(self,preprocess_df,root_dir,transform=None):
        self.preprocess_df=preprocess_df
        self.root_dir=root_dir
        self.transform=transform

    def __len__(self):
        return len(self.preprocess_df)

    def __getitem__(self,idx):
        img_name=os.path.join(self.root_dir,self.preprocess_df.iloc[idx,0])
        image=Image.open(img_name)
        region_id = torch.tensor(self.preprocess_df.iloc[idx, 5], dtype=torch.long)
        if self.transform:
            image=self.transform(image)
        return image,region_id


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=10):
    best_val_loss = float('inf')
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0 
        total = 0
        for images, labels in train_loader:
            labels= labels - 1
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader.dataset)
        train_acc = 100 * correct / total
        
        # Validation
        model.eval()
        val_running_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                labels = labels - 1
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_running_loss / len(val_loader.dataset)
        val_acc = 100 * correct / total
        scheduler.step(val_loss)

        print(f"Epoch [{epoch+1}/{epochs}] - Train Loss: {train_loss:.4f}, Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%, LR: {optimizer.param_groups[0]['lr']:.2e}")


        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_region_model.pth")
            print(f"Best model saved at epoch {epoch+1}")

        if optimizer.param_groups[0]['lr'] < 1e-6:
            break

    return model

# Region Prediction

In [None]:
torch.cuda.empty_cache()
# Load pretrained weights & transform
weights = ConvNeXt_Tiny_Weights.DEFAULT
# transform = weights.transforms()
transform=transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

# Load model
model = convnext_tiny(weights=weights,stochastic_depth_prob=0.5)
num_features = model.classifier[2].in_features
# 15 classes
model.classifier[2] = nn.Sequential(
    nn.Flatten(),
    nn.LayerNorm(num_features,eps=1e-6),
    nn.Dropout(p=0.5),
    nn.Linear(num_features, 15))
model = model.to(device)

# 5. Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)

# Datasets
train_dataset = SMAIDataset(labels_train, 'images_train', transform=transform)
val_dataset = SMAIDataset(labels_val, 'images_val', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=1, factor=0.5)

# Training
print("Training for regionID")
model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=40)
