In [None]:
# prediction.ipynb

import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
import torch
from torchvision import transforms
import joblib

# Paths
TEST_DIR = '/kaggle/input/soil-classification-part-2/soil_competition-2025/test'

# Device and model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14').to(device)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def extract_dino_features(image_path):
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        feat = model(img_tensor)
    return feat.squeeze().cpu().numpy()

# Load saved models
svm = joblib.load('../src/models/svm_dinov2_vits14.pkl')
scaler = joblib.load('../src/models/scaler_dinov2_vits14.pkl')

# Gather test image paths
test_fnames = os.listdir(TEST_DIR)
test_paths = [os.path.join(TEST_DIR, fname) for fname in test_fnames]

X_test = []
valid_fnames = []

print("Extracting features and predicting on test images...")
for path in tqdm(test_paths):
    try:
        feat = extract_dino_features(path)
        X_test.append(feat)
        valid_fnames.append(os.path.basename(path))
    except Exception as e:
        print(f"Skipping {path}: {e}")

X_test = np.array(X_test)
X_test_scaled = scaler.transform(X_test)

preds = svm.predict(X_test_scaled)  # +1 = soil, -1 = non-soil
labels = [1 if p == 1 else 0 for p in preds]

# Save predictions
output_df = pd.DataFrame({
    'image_id': valid_fnames,
    'label': labels
})

output_df.to_csv('submission_dinov2_vits14.csv', index=False)
print("Predictions saved to submission_dinov2_vits14.csv")

