In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/soil-classification-part-2/soil_competition-2025'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models

from sklearn.ensemble import IsolationForest

label_map = {"Alluvial soil": 0, "Black Soil": 1, "Clay soil": 2, "Red soil": 3}

class SoilDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.data = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.data.iloc[index, 0])
        image = Image.open(img_path).convert("RGB")
        label = label_map[self.data.iloc[index, 1]]
        if self.transform:
            image = self.transform(image)
        return image, label

def resize_and_centercrop224(img):
        original_width, original_height = img.size
        target_width, target_height = (224, 224)

        # Calculate aspect ratios
        original_aspect = original_width / original_height
        target_aspect = target_width / target_height

        if original_aspect > target_aspect:
            # Original image is wider than target: Resize based on height
            new_height = target_height
            new_width = int(new_height * original_aspect)
        else:
            # Original image is taller than target (or same aspect): Resize based on width
            new_width = target_width
            new_height = int(new_width / original_aspect)

        # Resize the image
        img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

        # Calculate coordinates for centercropping
        left = (new_width - target_width) / 2
        top = (new_height - target_height) / 2
        right = (new_width + target_width) / 2
        bottom = (new_height + target_height) / 2

        # Crop the image
        img_cropped = img_resized.crop((left, top, right, bottom))

        return img_cropped
        
def preprocess_images(input_dir, output_dir, size=224):
    os.makedirs(output_dir, exist_ok=True)
    for filename in os.listdir(input_dir):
        img = Image.open(os.path.join(input_dir, filename)).convert("RGB")
        img = resize_and_centercrop224(img)
        img.save(os.path.join(output_dir, filename))

def add_spatial_features(features):
    """
    Args:
        features: Tensor of shape [batch, channels, height, width]
                  (e.g., from ResNet's layer4 output: [64, 512, 7, 7])
    Returns:
        Tensor with spatial features added: [batch, channels+2, height, width]
    """
    batch, channels, height, width = features.shape
    
    # Create x/y coordinates (0 to 1 scale)
    x_coord = torch.linspace(0, 1, width).view(1, 1, -1).repeat(batch, height, 1).to(features.device)
    y_coord = torch.linspace(0, 1, height).view(1, -1, 1).repeat(batch, width, 1).permute(0,2,1).to(features.device)
    
    # Concatenate with original features
    return torch.cat([
        features,
        x_coord.unsqueeze(1),  # Add channel dim
        y_coord.unsqueeze(1)
    ], dim=1)

# Preprocess images
preprocess_images("/kaggle/input/soil-classification/soil_classification-2025/train", "train_resized")

# Load dataframe
df = pd.read_csv("/kaggle/input/soil-classification/soil_classification-2025/train_labels.csv")

# Initialize ResNet without final layer
model = models.resnet18(pretrained=True)
model = nn.Sequential(*list(model.children())[:-1])  # Remove last FC layeroriginal_
model = nn.DataParallel(model).to("cuda")
model.eval()

# Use all training data for feature extraction
full_dataset = SoilDataset(
    dataframe=df,
    root_dir="train_resized",
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
)

train_loader = DataLoader(full_dataset, batch_size=64, shuffle=False, num_workers=8)

# Extract features
soil_features = []
with torch.no_grad():
    for images, _ in tqdm(train_loader):
        images = images.to("cuda")
        features = model(images).flatten(1)  # [batch_size, 512]
        soil_features.append(features.cpu().numpy())

soil_features = np.concatenate(soil_features)

# 4. Train One-Class SVM
svm = OneClassSVM(kernel='rbf', gamma='auto', nu=0.05)  # Tune nu parameter
svm.fit(soil_features)

# 5. Inference on Test Set
test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

submission = []
test_dir = "/kaggle/input/soil-classification-part-2/soil_competition-2025/test"

for filename in tqdm(sorted(os.listdir(test_dir))):
    # Load and preprocess
    img_path = os.path.join(test_dir, filename)
    img = Image.open(img_path).convert("RGB")
    img_tensor = test_transform(img).unsqueeze(0).to("cuda")
    
    # Extract features
    with torch.no_grad():
        features = model(img_tensor).flatten(1).cpu().numpy()
    
    # Predict
    pred = svm.predict(features)[0]
    label = "1" if pred == 1 else "0"
    submission.append((filename, label))

# Save results
pd.DataFrame(submission, columns=["image_id", "label"]).to_csv("submission.csv", index=False)
print("Submission file generated!")

100%|██████████| 20/20 [00:02<00:00,  8.09it/s]
100%|██████████| 967/967 [00:11<00:00, 87.26it/s]

Submission file generated!



