In [None]:
# training.ipynb

import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from PIL import Image
import torch
from torchvision import transforms
import joblib

# Paths (update if needed)
TRAIN_DIR = '/kaggle/input/soil-classification-part-2/soil_competition-2025/train'
LABEL_CSV = '/kaggle/input/soil-classification-part-2/soil_competition-2025/train_labels.csv'

# Device and model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14').to(device)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def extract_dino_features(image_path):
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        feat = model(img_tensor)
    return feat.squeeze().cpu().numpy()

# Load train labels & prepare file paths
df = pd.read_csv(LABEL_CSV)
df['file_path'] = df['image_id'].apply(lambda x: os.path.join(TRAIN_DIR, x))

# Extract features from train images
X_train = []
print("Extracting features from training images...")
for path in tqdm(df['file_path']):
    try:
        feat = extract_dino_features(path)
        X_train.append(feat)
    except Exception as e:
        print(f"Error reading {path}: {e}")

X_train = np.array(X_train)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Train One-Class SVM
svm = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale')
svm.fit(X_train_scaled)

# Save models to src/models (create this folder if needed)
os.makedirs('../src/models', exist_ok=True)
joblib.dump(svm, '../src/models/svm_dinov2_vits14.pkl')
joblib.dump(scaler, '../src/models/scaler_dinov2_vits14.pkl')

print("Training complete. Models saved to ../src/models/")
