In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torch.cuda.amp import GradScaler, autocast

# ⚙️ Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# 📁 Paths
TRAIN_DIR = 'D:/Projects/iit_ropar_project/train'
TEST_DIR = 'D:/Projects/iit_ropar_project/test'
TRAIN_CSV = 'D:/Projects/iit_ropar_project/train_labels.csv'
TEST_CSV = 'D:/Projects/iit_ropar_project/test_ids.csv'
SUBMISSION_CSV = 'D:/Projects/iit_ropar_project/sample_submission.csv'

IMG_SIZE = 224
BATCH_SIZE = 32
NUM_CLASSES = 4
EPOCHS = 30
LABELS = ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']
label2idx = {label: i for i, label in enumerate(LABELS)}
idx2label = {i: label for label, i in label2idx.items()}

# 🧱 Dataset Class
class SoilDataset(Dataset):
    def __init__(self, df, img_dir, labels=True, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.has_labels = labels

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['image_id']
        img_path = os.path.join(self.img_dir, img_id)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        if self.has_labels:
            label = label2idx[self.df.iloc[idx]['label']]
            return image, label
        else:
            return image, img_id

# 🔧 Transforms
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    transforms.GaussianBlur(3),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
    transforms.RandomErasing()
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# 📊 Load Data
df = pd.read_csv(TRAIN_CSV)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
train_dataset = SoilDataset(train_df, TRAIN_DIR, labels=True, transform=train_transform)
val_dataset = SoilDataset(val_df, TRAIN_DIR, labels=True, transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# 🧠 Models
class CustomCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1))
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 64), nn.ReLU(),
            nn.Linear(64, NUM_CLASSES)
        )

    def forward(self, x):
        x = self.conv(x)
        return self.fc(x)

def get_model(base='mobilenet'):
    if base == 'mobilenet':
        model = models.mobilenet_v2(pretrained=True)
        model.classifier[1] = nn.Linear(model.last_channel, NUM_CLASSES)
    elif base == 'resnet':
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
    return model

# 🏋️ Training Function
def train_model(model, name):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
    scaler = GradScaler()

    best_acc, stop_counter, patience = 0, 0, 5

    for epoch in range(EPOCHS):
        model.train()
        correct, total = 0, 0
        loop = tqdm(train_loader, desc=f"[{name}] Epoch {epoch+1}/{EPOCHS}")
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            loop.set_postfix(loss=loss.item(), acc=100 * correct / total)

        val_acc = evaluate(model)
        print(f"Epoch {epoch+1}: Val Acc: {val_acc:.2f}%")
        scheduler.step()

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), f"{name}_best.pth")
            stop_counter = 0
        else:
            stop_counter += 1
            if stop_counter >= patience:
                print(f"Early stopping for {name}")
                break

    print(f"✅ Best Validation Accuracy for {name}: {best_acc:.2f}%")
    return model

# 📈 Evaluation
def evaluate(model):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total

# 🔮 Predict Test Set
def predict_test(models, weights=None):
    for model in models:
        model.eval()

    test_df = pd.read_csv(TEST_CSV)
    test_dataset = SoilDataset(test_df, TEST_DIR, labels=False, transform=val_transform)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    all_preds, all_ids = [], []

    with torch.no_grad():
        for images, img_ids in tqdm(test_loader, desc="Predicting Test Set"):
            images = images.to(device)

            outputs = []
            for i, model in enumerate(models):
                model.to(device)
                out = model(images)
                softmax_out = torch.softmax(out, dim=1)
                if weights:
                    softmax_out *= weights[i]
                outputs.append(softmax_out)
                model.to('cpu')
                torch.cuda.empty_cache()

            avg_output = sum(outputs) / (sum(weights) if weights else len(models))
            preds = torch.argmax(avg_output, dim=1).cpu().numpy()
            all_preds.extend([idx2label[i] for i in preds])
            all_ids.extend(img_ids)

    submission = pd.DataFrame({'image_id': all_ids, 'label': all_preds})
    submission.to_csv(SUBMISSION_CSV, index=False)
    print(f"📁 Saved predictions to {SUBMISSION_CSV}")

# 🚀 Main
if __name__ == '__main__':
    model1 = train_model(CustomCNN(), 'CustomCNN')
    model2 = train_model(get_model('mobilenet'), 'MobileNetV2')
    model3 = train_model(get_model('resnet'), 'ResNet50')

    model1.load_state_dict(torch.load("CustomCNN_best.pth"))
    model2.load_state_dict(torch.load("MobileNetV2_best.pth"))
    model3.load_state_dict(torch.load("ResNet50_best.pth"))

    predict_test([model1.to(device), model2.to(device), model3.to(device)], weights=[0.2, 0.4, 0.4])


Device: cuda


  scaler = GradScaler()
  with autocast():
[CustomCNN] Epoch 1/30: 100%|██████████| 31/31 [00:16<00:00,  1.90it/s, acc=24.3, loss=1.37]


Epoch 1: Val Acc: 61.63%


[CustomCNN] Epoch 2/30: 100%|██████████| 31/31 [00:14<00:00,  2.20it/s, acc=55.1, loss=1.35]


Epoch 2: Val Acc: 64.90%


[CustomCNN] Epoch 3/30: 100%|██████████| 31/31 [00:13<00:00,  2.26it/s, acc=60.8, loss=1.16]


Epoch 3: Val Acc: 68.98%


[CustomCNN] Epoch 4/30: 100%|██████████| 31/31 [00:14<00:00,  2.12it/s, acc=65.2, loss=1.25] 


Epoch 4: Val Acc: 70.20%


[CustomCNN] Epoch 5/30: 100%|██████████| 31/31 [00:28<00:00,  1.07it/s, acc=66.7, loss=0.983]


Epoch 5: Val Acc: 71.43%


[CustomCNN] Epoch 6/30: 100%|██████████| 31/31 [00:35<00:00,  1.14s/it, acc=66.8, loss=0.806]


Epoch 6: Val Acc: 72.24%


[CustomCNN] Epoch 7/30: 100%|██████████| 31/31 [00:13<00:00,  2.28it/s, acc=67.5, loss=0.927]


Epoch 7: Val Acc: 73.47%


[CustomCNN] Epoch 8/30: 100%|██████████| 31/31 [00:13<00:00,  2.25it/s, acc=66.2, loss=1.12] 


Epoch 8: Val Acc: 73.06%


[CustomCNN] Epoch 9/30: 100%|██████████| 31/31 [00:14<00:00,  2.10it/s, acc=67.8, loss=0.821]


Epoch 9: Val Acc: 73.06%


[CustomCNN] Epoch 10/30: 100%|██████████| 31/31 [00:13<00:00,  2.21it/s, acc=67.8, loss=1.03] 


Epoch 10: Val Acc: 73.88%


[CustomCNN] Epoch 11/30: 100%|██████████| 31/31 [00:15<00:00,  1.95it/s, acc=68.6, loss=0.882]


Epoch 11: Val Acc: 73.47%


[CustomCNN] Epoch 12/30: 100%|██████████| 31/31 [00:18<00:00,  1.65it/s, acc=67.7, loss=0.849]


Epoch 12: Val Acc: 73.47%


[CustomCNN] Epoch 13/30: 100%|██████████| 31/31 [00:24<00:00,  1.27it/s, acc=68, loss=1.08]   


Epoch 13: Val Acc: 74.69%


[CustomCNN] Epoch 14/30: 100%|██████████| 31/31 [00:37<00:00,  1.22s/it, acc=69.1, loss=0.857]


Epoch 14: Val Acc: 74.29%


[CustomCNN] Epoch 15/30: 100%|██████████| 31/31 [00:38<00:00,  1.23s/it, acc=67.2, loss=1.02] 


Epoch 15: Val Acc: 74.69%


[CustomCNN] Epoch 16/30: 100%|██████████| 31/31 [00:37<00:00,  1.20s/it, acc=69.1, loss=0.965]


Epoch 16: Val Acc: 74.29%


[CustomCNN] Epoch 17/30: 100%|██████████| 31/31 [00:38<00:00,  1.23s/it, acc=69.1, loss=0.766]


Epoch 17: Val Acc: 74.69%


[CustomCNN] Epoch 18/30: 100%|██████████| 31/31 [00:36<00:00,  1.18s/it, acc=69, loss=0.778]  


Epoch 18: Val Acc: 75.92%


[CustomCNN] Epoch 19/30: 100%|██████████| 31/31 [00:15<00:00,  2.05it/s, acc=69.8, loss=1.08] 


Epoch 19: Val Acc: 75.92%


[CustomCNN] Epoch 20/30: 100%|██████████| 31/31 [00:13<00:00,  2.28it/s, acc=69.2, loss=0.83] 


Epoch 20: Val Acc: 75.92%


[CustomCNN] Epoch 21/30: 100%|██████████| 31/31 [00:13<00:00,  2.28it/s, acc=70.3, loss=0.875]


Epoch 21: Val Acc: 75.92%


[CustomCNN] Epoch 22/30: 100%|██████████| 31/31 [00:35<00:00,  1.14s/it, acc=70.6, loss=0.728]


Epoch 22: Val Acc: 75.92%


[CustomCNN] Epoch 23/30: 100%|██████████| 31/31 [00:28<00:00,  1.08it/s, acc=70.4, loss=0.652]


Epoch 23: Val Acc: 75.92%
Early stopping for CustomCNN
✅ Best Validation Accuracy for CustomCNN: 75.92%


[MobileNetV2] Epoch 1/30: 100%|██████████| 31/31 [00:14<00:00,  2.14it/s, acc=72.9, loss=0.849]


Epoch 1: Val Acc: 90.20%


[MobileNetV2] Epoch 2/30: 100%|██████████| 31/31 [00:13<00:00,  2.24it/s, acc=85.4, loss=0.538]


Epoch 2: Val Acc: 93.06%


[MobileNetV2] Epoch 3/30: 100%|██████████| 31/31 [00:13<00:00,  2.30it/s, acc=89.6, loss=0.496]


Epoch 3: Val Acc: 94.29%


[MobileNetV2] Epoch 4/30: 100%|██████████| 31/31 [00:13<00:00,  2.28it/s, acc=90.6, loss=0.614]


Epoch 4: Val Acc: 92.24%


[MobileNetV2] Epoch 5/30: 100%|██████████| 31/31 [00:13<00:00,  2.27it/s, acc=91, loss=0.576]  


Epoch 5: Val Acc: 94.29%


[MobileNetV2] Epoch 6/30: 100%|██████████| 31/31 [00:13<00:00,  2.26it/s, acc=92.8, loss=0.426]


Epoch 6: Val Acc: 95.10%


[MobileNetV2] Epoch 7/30: 100%|██████████| 31/31 [00:23<00:00,  1.32it/s, acc=94.2, loss=0.561]


Epoch 7: Val Acc: 95.10%


[MobileNetV2] Epoch 8/30: 100%|██████████| 31/31 [00:41<00:00,  1.34s/it, acc=93.7, loss=0.496]


Epoch 8: Val Acc: 93.47%


[MobileNetV2] Epoch 9/30: 100%|██████████| 31/31 [00:42<00:00,  1.37s/it, acc=94.2, loss=0.399]


Epoch 9: Val Acc: 95.10%


[MobileNetV2] Epoch 10/30: 100%|██████████| 31/31 [00:28<00:00,  1.10it/s, acc=94.7, loss=0.447]


Epoch 10: Val Acc: 92.24%


[MobileNetV2] Epoch 11/30: 100%|██████████| 31/31 [00:13<00:00,  2.23it/s, acc=96.6, loss=0.481]


Epoch 11: Val Acc: 96.33%


[MobileNetV2] Epoch 12/30: 100%|██████████| 31/31 [00:13<00:00,  2.25it/s, acc=96.6, loss=0.394]


Epoch 12: Val Acc: 95.92%


[MobileNetV2] Epoch 13/30: 100%|██████████| 31/31 [00:42<00:00,  1.36s/it, acc=97.1, loss=0.445]


Epoch 13: Val Acc: 96.33%


[MobileNetV2] Epoch 14/30: 100%|██████████| 31/31 [00:41<00:00,  1.33s/it, acc=95, loss=0.425]  


Epoch 14: Val Acc: 96.33%


[MobileNetV2] Epoch 15/30: 100%|██████████| 31/31 [00:40<00:00,  1.32s/it, acc=96.4, loss=0.391]


Epoch 15: Val Acc: 96.33%


[MobileNetV2] Epoch 16/30: 100%|██████████| 31/31 [00:40<00:00,  1.30s/it, acc=97.7, loss=0.385]


Epoch 16: Val Acc: 96.33%
Early stopping for MobileNetV2
✅ Best Validation Accuracy for MobileNetV2: 96.33%


[ResNet50] Epoch 1/30: 100%|██████████| 31/31 [00:30<00:00,  1.02it/s, acc=77, loss=0.463]  


Epoch 1: Val Acc: 90.61%


[ResNet50] Epoch 2/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=89.3, loss=0.509]


Epoch 2: Val Acc: 91.84%


[ResNet50] Epoch 3/30: 100%|██████████| 31/31 [00:16<00:00,  1.91it/s, acc=91.3, loss=0.44] 


Epoch 3: Val Acc: 92.24%


[ResNet50] Epoch 4/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=92.8, loss=0.925]


Epoch 4: Val Acc: 95.10%


[ResNet50] Epoch 5/30: 100%|██████████| 31/31 [00:15<00:00,  1.95it/s, acc=93.1, loss=0.71] 


Epoch 5: Val Acc: 92.65%


[ResNet50] Epoch 6/30: 100%|██████████| 31/31 [00:16<00:00,  1.94it/s, acc=93.7, loss=0.507]


Epoch 6: Val Acc: 94.29%


[ResNet50] Epoch 7/30: 100%|██████████| 31/31 [00:16<00:00,  1.92it/s, acc=95.4, loss=0.408]


Epoch 7: Val Acc: 94.29%


[ResNet50] Epoch 8/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=96.2, loss=0.613]


Epoch 8: Val Acc: 94.69%


[ResNet50] Epoch 9/30: 100%|██████████| 31/31 [00:15<00:00,  1.95it/s, acc=96.8, loss=0.534]


Epoch 9: Val Acc: 95.92%


[ResNet50] Epoch 10/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=97, loss=0.542]  


Epoch 10: Val Acc: 94.69%


[ResNet50] Epoch 11/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=97.6, loss=0.385]


Epoch 11: Val Acc: 95.92%


[ResNet50] Epoch 12/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=97.2, loss=0.365]


Epoch 12: Val Acc: 95.51%


[ResNet50] Epoch 13/30: 100%|██████████| 31/31 [00:16<00:00,  1.94it/s, acc=97.6, loss=0.552]


Epoch 13: Val Acc: 97.14%


[ResNet50] Epoch 14/30: 100%|██████████| 31/31 [00:16<00:00,  1.90it/s, acc=97.6, loss=0.502]


Epoch 14: Val Acc: 95.51%


[ResNet50] Epoch 15/30: 100%|██████████| 31/31 [00:16<00:00,  1.93it/s, acc=98.2, loss=0.382]


Epoch 15: Val Acc: 95.92%


[ResNet50] Epoch 16/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=98.7, loss=0.545]


Epoch 16: Val Acc: 96.73%


[ResNet50] Epoch 17/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=98.4, loss=0.458]


Epoch 17: Val Acc: 97.14%


[ResNet50] Epoch 18/30: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, acc=99.1, loss=0.369]


Epoch 18: Val Acc: 96.73%
Early stopping for ResNet50
✅ Best Validation Accuracy for ResNet50: 97.14%


  model1.load_state_dict(torch.load("CustomCNN_best.pth"))
  model2.load_state_dict(torch.load("MobileNetV2_best.pth"))
  model3.load_state_dict(torch.load("ResNet50_best.pth"))
Predicting Test Set: 100%|██████████| 11/11 [00:04<00:00,  2.38it/s]

📁 Saved predictions to D:/Projects/iit_ropar_project/sample_submission.csv



