In [1]:
pip install torch torchvision matplotlib scikit-learn tqdm

Note: you may need to restart the kernel to use updated packages.




In [2]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

# CUDA setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Paths
TRAIN_DIR = 'D:\Projects\iit_ropar_project/train'
TEST_DIR = 'D:\Projects\iit_ropar_project/test'
TRAIN_CSV = 'D:\Projects\iit_ropar_project/train_labels.csv'
TEST_CSV = 'D:\Projects\iit_ropar_project/test_ids.csv'
SUBMISSION_CSV = 'D:\Projects\iit_ropar_project\sample_submission.csv'

IMG_SIZE = 224
BATCH_SIZE = 32
NUM_CLASSES = 4
EPOCHS = 30
LABELS = ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']

# 🔁 Label Encoding
label2idx = {label: i for i, label in enumerate(LABELS)}
idx2label = {i: label for label, i in label2idx.items()}

# 🧱 Dataset class
class SoilDataset(Dataset):
    def __init__(self, df, img_dir, labels=True, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.has_labels = labels

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['image_id']
        img_path = os.path.join(self.img_dir, img_id)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        if self.has_labels:
            label = label2idx[self.df.iloc[idx]['label']]
            return image, label
        else:
            return image, img_id

# 🔧 Transform
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# 📊 Load train data
train_df = pd.read_csv(TRAIN_CSV)
train_data, val_data = train_test_split(train_df, test_size=0.2, stratify=train_df['label'], random_state=42)

train_dataset = SoilDataset(train_data, TRAIN_DIR, labels=True, transform=transform)
val_dataset = SoilDataset(val_data, TRAIN_DIR, labels=True, transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# 🧠 Model
class CustomCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1))
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 64), nn.ReLU(),
            nn.Linear(64, NUM_CLASSES)
        )

    def forward(self, x):
        x = self.conv(x)
        return self.fc(x)

def get_model(base='mobilenet'):
    if base == 'mobilenet':
        model = models.mobilenet_v2(pretrained=True)
        model.classifier[1] = nn.Linear(model.last_channel, NUM_CLASSES)
    elif base == 'resnet':
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
    return model

# 🏋️‍♂️ Train Function
def train_model(model, name):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()

    best_acc = 0
    patience = 5
    stop_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        correct, total = 0, 0
        for images, labels in tqdm(train_loader, desc=f"{name} Epoch {epoch+1}"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = 100 * correct / total
        val_acc = evaluate(model)
        print(f"[{name}] Epoch {epoch+1}: Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), f'{name}_best.pth')
            stop_counter = 0
        else:
            stop_counter += 1
            if stop_counter >= patience:
                print(f"Early stopping {name}")
                break

    print(f"Best Val Acc for {name}: {best_acc:.2f}%")
    return model

# 📈 Evaluate
def evaluate(model):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total

# 🔮 Predict Test Set
def predict_test(models):
    for model in models:
        model.eval()

    test_df = pd.read_csv(TEST_CSV)
    test_dataset = SoilDataset(test_df, TEST_DIR, labels=False, transform=val_transform)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    all_preds, all_ids = [], []

    with torch.no_grad():  # <--- Add this
        for images, img_ids in tqdm(test_loader, desc="Predicting Test Set"):
            images = images.to(device)

            # Run each model sequentially to avoid memory overflow
            outputs = []
            for m in models:
                m.to(device)
                out = m(images)
                outputs.append(torch.softmax(out, dim=1))
                torch.cuda.empty_cache()  # Clear unused memory

            avg_output = sum(outputs) / len(outputs)
            preds = torch.argmax(avg_output, dim=1).cpu().numpy()
            all_preds.extend([idx2label[i] for i in preds])
            all_ids.extend(img_ids)

    submission = pd.DataFrame({'image_id': all_ids, 'label': all_preds})
    submission.to_csv(SUBMISSION_CSV, index=False)
    print(f"Saved predictions to {SUBMISSION_CSV}")

# 🚀 Main
if __name__ == '__main__':
    model1 = train_model(CustomCNN(), 'CustomCNN')
    model2 = train_model(get_model('mobilenet'), 'MobileNetV2')
    model3 = train_model(get_model('resnet'), 'ResNet50')

    # Load best models
    model1.load_state_dict(torch.load("CustomCNN_best.pth"))
    model2.load_state_dict(torch.load("MobileNetV2_best.pth"))
    model3.load_state_dict(torch.load("ResNet50_best.pth"))

    # Predict and savefepoch
    predict_test([model1.to(device), model2.to(device), model3.to(device)])


Device: cuda


CustomCNN Epoch 1: 100%|██████████| 62/62 [00:44<00:00,  1.39it/s]


[CustomCNN] Epoch 1: Train Acc: 33.09%, Val Acc: 57.87%


CustomCNN Epoch 2: 100%|██████████| 62/62 [00:18<00:00,  3.30it/s]


[CustomCNN] Epoch 2: Train Acc: 64.76%, Val Acc: 71.78%


CustomCNN Epoch 3: 100%|██████████| 62/62 [00:18<00:00,  3.30it/s]


[CustomCNN] Epoch 3: Train Acc: 72.28%, Val Acc: 73.42%


CustomCNN Epoch 4: 100%|██████████| 62/62 [00:18<00:00,  3.28it/s]


[CustomCNN] Epoch 4: Train Acc: 73.45%, Val Acc: 75.66%


CustomCNN Epoch 5: 100%|██████████| 62/62 [00:18<00:00,  3.29it/s]


[CustomCNN] Epoch 5: Train Acc: 76.16%, Val Acc: 77.91%


CustomCNN Epoch 6: 100%|██████████| 62/62 [00:18<00:00,  3.30it/s]


[CustomCNN] Epoch 6: Train Acc: 75.86%, Val Acc: 77.71%


CustomCNN Epoch 7: 100%|██████████| 62/62 [00:18<00:00,  3.28it/s]


[CustomCNN] Epoch 7: Train Acc: 76.98%, Val Acc: 76.69%


CustomCNN Epoch 8: 100%|██████████| 62/62 [00:19<00:00,  3.26it/s]


[CustomCNN] Epoch 8: Train Acc: 76.73%, Val Acc: 77.71%


CustomCNN Epoch 9: 100%|██████████| 62/62 [00:18<00:00,  3.27it/s]


[CustomCNN] Epoch 9: Train Acc: 77.80%, Val Acc: 78.94%


CustomCNN Epoch 10: 100%|██████████| 62/62 [00:18<00:00,  3.26it/s]


[CustomCNN] Epoch 10: Train Acc: 77.90%, Val Acc: 79.96%


CustomCNN Epoch 11: 100%|██████████| 62/62 [00:19<00:00,  3.22it/s]


[CustomCNN] Epoch 11: Train Acc: 78.31%, Val Acc: 79.14%


CustomCNN Epoch 12: 100%|██████████| 62/62 [00:18<00:00,  3.30it/s]


[CustomCNN] Epoch 12: Train Acc: 79.85%, Val Acc: 79.35%


CustomCNN Epoch 13: 100%|██████████| 62/62 [00:19<00:00,  3.25it/s]


[CustomCNN] Epoch 13: Train Acc: 79.69%, Val Acc: 82.21%


CustomCNN Epoch 14: 100%|██████████| 62/62 [00:19<00:00,  3.25it/s]


[CustomCNN] Epoch 14: Train Acc: 79.95%, Val Acc: 82.21%


CustomCNN Epoch 15: 100%|██████████| 62/62 [00:19<00:00,  3.26it/s]


[CustomCNN] Epoch 15: Train Acc: 81.18%, Val Acc: 81.19%


CustomCNN Epoch 16: 100%|██████████| 62/62 [00:18<00:00,  3.27it/s]


[CustomCNN] Epoch 16: Train Acc: 80.72%, Val Acc: 82.62%


CustomCNN Epoch 17: 100%|██████████| 62/62 [00:18<00:00,  3.27it/s]


[CustomCNN] Epoch 17: Train Acc: 80.51%, Val Acc: 82.62%


CustomCNN Epoch 18: 100%|██████████| 62/62 [00:19<00:00,  3.17it/s]


[CustomCNN] Epoch 18: Train Acc: 82.46%, Val Acc: 82.82%


CustomCNN Epoch 19: 100%|██████████| 62/62 [00:18<00:00,  3.29it/s]


[CustomCNN] Epoch 19: Train Acc: 82.56%, Val Acc: 83.84%


CustomCNN Epoch 20: 100%|██████████| 62/62 [00:19<00:00,  3.26it/s]


[CustomCNN] Epoch 20: Train Acc: 82.61%, Val Acc: 81.39%


CustomCNN Epoch 21: 100%|██████████| 62/62 [00:19<00:00,  3.25it/s]


[CustomCNN] Epoch 21: Train Acc: 82.20%, Val Acc: 84.05%


CustomCNN Epoch 22: 100%|██████████| 62/62 [00:19<00:00,  3.19it/s]


[CustomCNN] Epoch 22: Train Acc: 83.12%, Val Acc: 82.82%


CustomCNN Epoch 23: 100%|██████████| 62/62 [00:19<00:00,  3.18it/s]


[CustomCNN] Epoch 23: Train Acc: 83.12%, Val Acc: 84.66%


CustomCNN Epoch 24: 100%|██████████| 62/62 [00:19<00:00,  3.20it/s]


[CustomCNN] Epoch 24: Train Acc: 83.07%, Val Acc: 82.62%


CustomCNN Epoch 25: 100%|██████████| 62/62 [00:19<00:00,  3.24it/s]


[CustomCNN] Epoch 25: Train Acc: 84.09%, Val Acc: 83.84%


CustomCNN Epoch 26: 100%|██████████| 62/62 [00:19<00:00,  3.26it/s]


[CustomCNN] Epoch 26: Train Acc: 84.91%, Val Acc: 84.25%


CustomCNN Epoch 27: 100%|██████████| 62/62 [00:19<00:00,  3.25it/s]


[CustomCNN] Epoch 27: Train Acc: 83.68%, Val Acc: 84.66%


CustomCNN Epoch 28: 100%|██████████| 62/62 [00:19<00:00,  3.24it/s]


[CustomCNN] Epoch 28: Train Acc: 85.37%, Val Acc: 84.25%
Early stopping CustomCNN
Best Val Acc for CustomCNN: 84.66%


MobileNetV2 Epoch 1: 100%|██████████| 62/62 [00:22<00:00,  2.79it/s]


[MobileNetV2] Epoch 1: Train Acc: 86.70%, Val Acc: 97.96%


MobileNetV2 Epoch 2: 100%|██████████| 62/62 [00:22<00:00,  2.79it/s]


[MobileNetV2] Epoch 2: Train Acc: 95.81%, Val Acc: 97.75%


MobileNetV2 Epoch 3: 100%|██████████| 62/62 [00:21<00:00,  2.83it/s]


[MobileNetV2] Epoch 3: Train Acc: 96.11%, Val Acc: 97.34%


MobileNetV2 Epoch 4: 100%|██████████| 62/62 [00:22<00:00,  2.80it/s]


[MobileNetV2] Epoch 4: Train Acc: 97.54%, Val Acc: 99.18%


MobileNetV2 Epoch 5: 100%|██████████| 62/62 [00:22<00:00,  2.76it/s]


[MobileNetV2] Epoch 5: Train Acc: 97.75%, Val Acc: 98.57%


MobileNetV2 Epoch 6: 100%|██████████| 62/62 [00:22<00:00,  2.79it/s]


[MobileNetV2] Epoch 6: Train Acc: 97.39%, Val Acc: 99.80%


MobileNetV2 Epoch 7: 100%|██████████| 62/62 [00:21<00:00,  2.83it/s]


[MobileNetV2] Epoch 7: Train Acc: 98.26%, Val Acc: 99.39%


MobileNetV2 Epoch 8: 100%|██████████| 62/62 [00:21<00:00,  2.84it/s]


[MobileNetV2] Epoch 8: Train Acc: 98.82%, Val Acc: 98.57%


MobileNetV2 Epoch 9: 100%|██████████| 62/62 [00:21<00:00,  2.84it/s]


[MobileNetV2] Epoch 9: Train Acc: 98.77%, Val Acc: 99.18%


MobileNetV2 Epoch 10: 100%|██████████| 62/62 [00:21<00:00,  2.83it/s]


[MobileNetV2] Epoch 10: Train Acc: 99.03%, Val Acc: 98.98%


MobileNetV2 Epoch 11: 100%|██████████| 62/62 [00:22<00:00,  2.80it/s]


[MobileNetV2] Epoch 11: Train Acc: 98.41%, Val Acc: 99.80%
Early stopping MobileNetV2
Best Val Acc for MobileNetV2: 99.80%


ResNet50 Epoch 1: 100%|██████████| 62/62 [00:32<00:00,  1.92it/s]


[ResNet50] Epoch 1: Train Acc: 90.33%, Val Acc: 98.16%


ResNet50 Epoch 2: 100%|██████████| 62/62 [00:32<00:00,  1.93it/s]


[ResNet50] Epoch 2: Train Acc: 96.68%, Val Acc: 97.75%


ResNet50 Epoch 3: 100%|██████████| 62/62 [00:32<00:00,  1.91it/s]


[ResNet50] Epoch 3: Train Acc: 95.96%, Val Acc: 98.57%


ResNet50 Epoch 4: 100%|██████████| 62/62 [00:32<00:00,  1.88it/s]


[ResNet50] Epoch 4: Train Acc: 97.08%, Val Acc: 98.16%


ResNet50 Epoch 5: 100%|██████████| 62/62 [00:32<00:00,  1.89it/s]


[ResNet50] Epoch 5: Train Acc: 98.21%, Val Acc: 99.39%


ResNet50 Epoch 6: 100%|██████████| 62/62 [00:32<00:00,  1.89it/s]


[ResNet50] Epoch 6: Train Acc: 98.11%, Val Acc: 98.98%


ResNet50 Epoch 7: 100%|██████████| 62/62 [00:32<00:00,  1.92it/s]


[ResNet50] Epoch 7: Train Acc: 97.03%, Val Acc: 99.59%


ResNet50 Epoch 8: 100%|██████████| 62/62 [00:32<00:00,  1.93it/s]


[ResNet50] Epoch 8: Train Acc: 98.87%, Val Acc: 98.98%


ResNet50 Epoch 9: 100%|██████████| 62/62 [00:32<00:00,  1.93it/s]


[ResNet50] Epoch 9: Train Acc: 98.01%, Val Acc: 98.98%


ResNet50 Epoch 10: 100%|██████████| 62/62 [00:32<00:00,  1.93it/s]


[ResNet50] Epoch 10: Train Acc: 97.80%, Val Acc: 99.59%


ResNet50 Epoch 11: 100%|██████████| 62/62 [00:32<00:00,  1.93it/s]


[ResNet50] Epoch 11: Train Acc: 97.85%, Val Acc: 98.98%


ResNet50 Epoch 12: 100%|██████████| 62/62 [00:31<00:00,  1.94it/s]


[ResNet50] Epoch 12: Train Acc: 97.85%, Val Acc: 99.59%
Early stopping ResNet50
Best Val Acc for ResNet50: 99.59%


  model1.load_state_dict(torch.load("CustomCNN_best.pth"))
  model2.load_state_dict(torch.load("MobileNetV2_best.pth"))
  model3.load_state_dict(torch.load("ResNet50_best.pth"))
Predicting Test Set: 100%|██████████| 11/11 [00:06<00:00,  1.72it/s]

Saved predictions to D:\Projects\iit_ropar_project\sample_submission.csv





In [23]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm

# ✅ Setup
device = torch.device("cpu")
print("Device:", device)

# ✅ Paths
TEST_DIR = 'D:/Projects/iit_ropar_project/test'
TEST_CSV = 'D:/Projects/iit_ropar_project/test_ids.csv'
SUBMISSION_CSV = 'D:/Projects/iit_ropar_project/sample_submission.csv'
IMG_SIZE = 224
BATCH_SIZE = 16  # Reduced for safety on GPU

LABELS = ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']
label2idx = {label: i for i, label in enumerate(LABELS)}
idx2label = {i: label for label, i in label2idx.items()}

# ✅ Dataset Class
class SoilDataset(Dataset):
    def __init__(self, df, img_dir, labels=False, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.has_labels = labels

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['image_id']
        img_path = os.path.join(self.img_dir, img_id)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_id

# ✅ Transforms
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# ✅ Models
class CustomCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1))
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 64), nn.ReLU(),
            nn.Linear(64, len(LABELS))
        )

    def forward(self, x):
        x = self.conv(x)
        return self.fc(x)

def get_model(base='mobilenet'):
    if base == 'mobilenet':
        model = models.mobilenet_v2(pretrained=False)
        model.classifier[1] = nn.Linear(model.last_channel, len(LABELS))
    elif base == 'resnet':
        model = models.resnet50(pretrained=False)
        model.fc = nn.Linear(model.fc.in_features, len(LABELS))
    return model

# ✅ Predict Function
def predict_test(models):
    for model in models:
        model.eval()
        model.to(device)  # Send to CPU (already set)

    test_df = pd.read_csv(TEST_CSV)
    test_dataset = SoilDataset(test_df, TEST_DIR, labels=False, transform=val_transform)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    all_preds, all_ids = [], []

    with torch.no_grad():
        for images, img_ids in tqdm(test_loader, desc="Predicting Test Set"):
            images = images.to(device)
            outputs = []
            for m in models:
                out = m(images)  # All models are already on CPU
                outputs.append(torch.softmax(out, dim=1))
            avg_output = sum(outputs) / len(outputs)
            preds = torch.argmax(avg_output, dim=1).numpy()
            all_preds.extend([idx2label[i] for i in preds])
            all_ids.extend(img_ids)

    submission = pd.DataFrame({'image_id': all_ids, 'label': all_preds})
    submission.to_csv(SUBMISSION_CSV, index=False)
    print(f"✅ Saved predictions to {SUBMISSION_CSV}")

# ✅ Run Prediction
if __name__ == '__main__':
    # Load models and weights
    model1 = CustomCNN()
    model1.load_state_dict(torch.load("CustomCNN_best.pth", map_location=device))

    model2 = get_model('mobilenet')
    model2.load_state_dict(torch.load("MobileNetV2_best.pth", map_location=device))

    model3 = get_model('resnet')
    model3.load_state_dict(torch.load("ResNet50_best.pth", map_location=device))

    # Run prediction
    predict_test([model1, model2, model3])


Device: cpu


  model1.load_state_dict(torch.load("CustomCNN_best.pth", map_location=device))
  model2.load_state_dict(torch.load("MobileNetV2_best.pth", map_location=device))
  model3.load_state_dict(torch.load("ResNet50_best.pth", map_location=device))
Predicting Test Set: 100%|██████████| 22/22 [00:20<00:00,  1.09it/s]

✅ Saved predictions to D:/Projects/iit_ropar_project/sample_submission.csv



