In [None]:
# Soil or Not-Soil Detection using Soil-Type Classifier
# import libraries
import torch
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import numpy as np


In [None]:
# Configurations for paths and model settings
TEST_DIR = "C:\\Users\\debdi\\Downloads\\soil_competition-2025\\test"
TEST_IDS_CSV = "C:\\Users\\debdi\\Downloads\\soil_competition-2025\\test_ids.csv"
MODEL_PATH = "C:\\Users\\debdi\\Downloads\\soil_competition-2025\\best_model.pth"
BATCH_SIZE = 32
NUM_CLASSES = 4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CLASSES = ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']

# Image preprocessing for input to the model
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),    # Resize all images to 224x224
    transforms.ToTensor(),            # Convert images to PyTorch tensors
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])      # Normalize to pretrained stats
])

# Custom dataset class for loading test images
class TestSoilDataset(Dataset):
    def __init__(self, img_dir, test_ids_csv, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.test_ids_df = pd.read_csv(test_ids_csv)
        self.image_ids = self.test_ids_df['image_id'].tolist()

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_name = self.image_ids[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_name

# Load the test dataset
test_dataset = TestSoilDataset(TEST_DIR, TEST_IDS_CSV, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
# The model was trained to classify 4 types of soil (Red, Black, Alluvial, Clay) for Challenge 1
# For Challenge 2, we repurpose this model to detect if an image is a soil image or not
model = models.resnet18(pretrained=False)
model.fc = torch.nn.Linear(model.fc.in_features, NUM_CLASSES)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model = model.to(DEVICE)
model.eval()

# Predict Probabilities and Convert to Binary Labels
predictions = []
threshold = 0.90  # High confidence threshold for soil

with torch.no_grad():
    for images, image_ids in tqdm(test_loader, desc="Predicting"):
        images = images.to(DEVICE)
        outputs = model(images)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        max_probs, _ = torch.max(probs, dim=1)
        labels = (max_probs > threshold).long().cpu().numpy()  # 1 = soil, 0 = not soil

        for img_id, label in zip(image_ids, labels):
            predictions.append((img_id, label))


Predicting: 100%|██████████| 31/31 [00:37<00:00,  1.21s/it]


In [15]:
# Save Submission
submission_df = pd.DataFrame(predictions, columns=["image_id", "label"])
submission_path = "C:\\Users\\debdi\\Downloads\\soil_competition-2025\\submission.csv"
submission_df.to_csv(submission_path, index=False)
print(f"Inference complete. Submission saved to {submission_path}")

Inference complete. Submission saved to C:\Users\debdi\Downloads\soil_competition-2025\submission.csv
