In [21]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report

from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn


In [22]:
train_image_dir = "/kaggle/input/binary-classification/soil_competition-2025/train"      # All soil images
test_image_dir = "/kaggle/input/binary-classification/soil_competition-2025/test"        # Soil and non-soil images
test_id_file = "/kaggle/input/binary-classification/soil_competition-2025/test_ids.csv" # Or .txt or other, update if needed


In [23]:
import pandas as pd

# Create dataframe for training: All images are soil => label = 1
train_files = [f for f in os.listdir(train_image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
train_df = pd.DataFrame({'image_id': train_files})
train_df['label'] = 1

# Remove anything that's not a real image
test_files = [f for f in os.listdir(test_image_dir)
              if f.lower().endswith(('.png', '.jpg', '.jpeg')) and f != 'image_id']
test_df = pd.DataFrame({'image_id':test_files})

In [24]:
test_df

Unnamed: 0,image_id
0,465084323936570da664f0ca8dc90326.jpg
1,1aa0b12029d35e778dba5bff1255c638.jpg
2,6df2c3dcd4fb59298c7a73467ea72eeb.jpg
3,107f25ebd87f581ea57c630a2dcdf50c.jpg
4,dc35d58782615e4f9582c6b32c8b956e.jpg
...,...
962,ef98accfe0ea56499544211d9c96056b.jpg
963,31475ede49d15c279ef04d048c6f059c.jpg
964,b954b5ae3f475d399bdec4b036ad0628.jpg
965,f65a998dafe653e19762b202c0ee5815.jpg


In [25]:
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
from torchvision import transforms

class SoilDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None, include_label=True):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform
        self.include_label = include_label

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_path = os.path.join(self.image_dir, row.image_id)

        # Safely load image
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"❌ Image not found: {img_path}")

        image = cv2.imread(img_path)
        if image is None:
            raise ValueError(f"❌ Could not read image: {img_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image)

        if self.include_label:
            label = row.label
            return image, label
        else:
            return image, row.image_id


In [26]:
from sklearn.model_selection import train_test_split

image_size = 224

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

train_dataset = SoilDataset(train_df, train_image_dir, transform=transform)
val_dataset = SoilDataset(val_df, train_image_dir, transform=transform)
test_dataset = SoilDataset(test_df, test_image_dir, transform=transform, include_label=False)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [27]:
from torchvision import models
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pretrained ResNet18, remove final layer
resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity()
resnet = resnet.to(device)
resnet.eval()




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [28]:
import numpy as np
from sklearn.svm import OneClassSVM

# Feature extraction function
def extract_features(loader):
    features_list = []
    labels_list = []
    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            features = resnet(images).cpu().numpy()
            features_list.append(features)
            labels_list.append(labels)
    return np.vstack(features_list), np.hstack(labels_list)

train_features, _ = extract_features(train_loader)

# Train One-Class SVM: soil = normal, everything else = outlier
svm_model = OneClassSVM(kernel='rbf', nu=0.1, gamma='scale')
svm_model.fit(train_features)
print("✅ One-Class SVM trained.")


✅ One-Class SVM trained.


In [29]:
test_predictions = []

with torch.no_grad():
    for images, image_ids in DataLoader(test_dataset, batch_size=32, shuffle=False):
        images = images.to(device)
        features = resnet(images).cpu().numpy()
        preds = svm_model.predict(features)  # 1 = in-class (Soil), -1 = outlier (Non-Soil)

        for img_id, pred in zip(image_ids, preds):
            label = '1' if pred == 1 else '0'
            test_predictions.append((img_id, label))

# Save predictions
submission = pd.DataFrame(test_predictions, columns=['image_id', 'label'])
submission.to_csv("submission.csv", index=False)
print("✅ Predictions saved to submission.csv")


✅ Predictions saved to submission.csv
