In [None]:
'''
inference.ipynb

Title: Soil Type Classification - Inference Notebook
Team: Team Cygnus
Authors: Vaibhav Sharma, Shreya Khantal, Prasanna Saxena
Model: ResNet50
Best Model: 'best_resnet50.pth'

'''
# --- Step 1: Import Libraries ---
import os
import torch
import numpy as np
import pandas as pd
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# --- Step 2: Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

IMAGE_SIZE = 224
BATCH_SIZE = 32
NUM_CLASSES = 4

# Class mapping
class_names = ['Alluvial soil', 'Black Soil', 'Clay soil', 'Red soil']
inv_label_mapping = {i: name for i, name in enumerate(class_names)}

# --- Step 3: Define Dataset Class ---
class SoilDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image_id']
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image

# --- Step 4: Define Transform (Replace with actual values if known) ---
mean = [0.51927466, 0.41479487, 0.32805257]
std = [0.27258596, 0.25516909, 0.22726975]

test_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# --- Step 4: Preprocessing through Resizing & Augmentation 
augment_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.3),
    transforms.RandomRotation(20),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])


# --- Step 5: Load Test Data ---
DATA_DIR = "/kaggle/input/soil-classification/soil_classification-2025"
TEST_IMG_DIR = os.path.join(DATA_DIR, "test")
test_df = pd.read_csv(os.path.join(DATA_DIR, "test_ids.csv"))

# Ensure file extension
if not test_df['image_id'].iloc[0].endswith('.jpg'):
    test_df['image_id'] = test_df['image_id'].apply(lambda x: x + '.jpg')

test_dataset = SoilDataset(test_df, TEST_IMG_DIR, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# --- Step 6: Load Trained Model ---
def create_model(num_classes):
    model = models.resnet50(weights=None)
    in_features = model.fc.in_features
    model.fc = torch.nn.Sequential(
        torch.nn.Dropout(0.3),
        torch.nn.Linear(in_features, num_classes)
    )
    return model

model = create_model(NUM_CLASSES)
model.load_state_dict(torch.load('best_resnet50.pth', map_location=device))
model.to(device)
model.eval()

# --- Step 7: Inference ---
def predict(model, dataloader):
    predictions = []
    with torch.no_grad():
        for inputs in tqdm(dataloader, desc="Predicting"):
            inputs = inputs.to(device)
            outputs = model(inputs)
            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)
            predictions.extend(preds.cpu().numpy())
    return predictions

test_preds = predict(model, test_loader)
test_labels = [inv_label_mapping[pred] for pred in test_preds]

# --- Step 8: Create Submission ---
submission_df = pd.DataFrame({
    'image_id': test_df['image_id'],
    'soil_type': test_labels
})
submission_df.to_csv("submission.csv", index=False)
print("Submission saved to submission.csv")
submission_df.head()
