```
Author: Annam.ai IIT Ropar
Team Name: RAV
Team Members: VIGNESH J, ASHWATH VINODKUMAR, RAHUL BHARGAV TALLADA
Leaderboard Rank: 13
```

# This is the notebook used for training the soil classification model.

## Setup and Configuration

Installing required packages and importing necessary libraries for the soil classification task.

In [None]:
!pip install open-clip-torch pandas pillow scikit-learn matplotlib tqdm --quiet

import sys
import os
sys.path.append('../src')

import open_clip
import torch
import pandas as pd
import numpy as np
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Import custom utilities
from preprocessing import load_and_preprocess_image, batch_preprocess_images, normalize_embeddings
from postprocessing import analyze_predictions, plot_confusion_matrix

# Configuration
MODEL_NAME = "ViT-H-14"
PRETRAINED = "laion2b_s32b_b79k"
BATCH_SIZE = 8
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
RANDOM_SEED = 42

print(f"Using device: {DEVICE}")

## Data Loading

Loading the training dataset for the soil classification task.

In [None]:
# Load metadata
DATA_DIR = Path("../data")
if not DATA_DIR.exists():
    # For Kaggle environment
    DATA_DIR = Path("/kaggle/input/soil-classification-part-2/soil_competition-2025")

train_df = pd.read_csv(DATA_DIR / "train_labels.csv")

print(f"Training samples: {len(train_df)}")
print(f"Sample data:")
display(train_df.head())

## Model Loading

Loading the CLIP model and preprocessing transforms.

In [None]:
# Load model and preprocessor
print(f"Loading {MODEL_NAME} model with {PRETRAINED} weights...")
model, _, preprocess = open_clip.create_model_and_transforms(
    model_name=MODEL_NAME,
    pretrained=PRETRAINED
)
model = model.to(DEVICE).eval()
print("Model loaded successfully!")

## Text Prompts for Zero-Shot Classification

Defining text prompts for soil and non-soil categories to use with CLIP model.

In [None]:
# Binary classification prompts
soil_prompts = [
    "A photograph of soil, earth, dirt, or ground surface",
    "Agricultural soil in a field or garden",
    "Natural earth surface with visible soil texture",
    "Close-up view of soil, dirt, or earth material",
    "Ground surface showing soil composition",
    "Farmland soil or agricultural earth",
    "Natural soil formation with organic matter"
]

non_soil_prompts = [
    "A photograph with no soil, dirt, or earth visible",
    "Indoor scene without any ground or soil",
    "Water, sky, buildings, or other non-soil objects",
    "Concrete, asphalt, or artificial surfaces",
    "Plants, rocks, or objects without visible soil",
    "Urban environment without natural earth",
    "Clean surfaces without dirt or soil material"
]

print(f"Number of soil prompts: {len(soil_prompts)}")
print(f"Number of non-soil prompts: {len(non_soil_prompts)}")

## Computing Text Embeddings

Precomputing text embeddings for the soil and non-soil categories.

In [None]:
# Precompute text embeddings for binary classification
with torch.no_grad():
    # Soil embeddings
    soil_embeddings = []
    for prompt in soil_prompts:
        text = open_clip.tokenize([prompt]).to(DEVICE)
        soil_embeddings.append(model.encode_text(text))
    soil_text_embedding = torch.mean(torch.cat(soil_embeddings), dim=0, keepdim=True)
    
    # Non-soil embeddings
    non_soil_embeddings = []
    for prompt in non_soil_prompts:
        text = open_clip.tokenize([prompt]).to(DEVICE)
        non_soil_embeddings.append(model.encode_text(text))
    non_soil_text_embedding = torch.mean(torch.cat(non_soil_embeddings), dim=0, keepdim=True)
    
    # Normalize embeddings
    soil_text_embedding = normalize_embeddings(soil_text_embedding)
    non_soil_text_embedding = normalize_embeddings(non_soil_text_embedding)

print("Text embeddings computed successfully!")

## Preparing Training Data

Preparing the training data for the soil classification model.

In [None]:
# Prepare training data paths
train_images = [DATA_DIR / "train" / img_id for img_id in train_df.image_id]
print(f"Total training images: {len(train_images)}")

# Display a few sample images
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for i, ax in enumerate(axes):
    if i < len(train_images):
        img = Image.open(train_images[i])
        ax.imshow(img)
        ax.set_title(f"Sample {i+1}")
        ax.axis('off')
plt.tight_layout()
plt.show()

## Computing Image Embeddings

Computing embeddings for the training images in batches to prevent memory issues.

In [None]:
# Function to get image embeddings in batches
def get_image_embeddings(image_paths):
    """Batch processing to prevent OOM errors"""
    embeddings = []
    for i in tqdm(range(0, len(image_paths), BATCH_SIZE), desc="Processing batches"):
        batch_paths = image_paths[i:i+BATCH_SIZE]
        batch = torch.stack([preprocess(Image.open(p).convert("RGB")) for p in batch_paths])
        
        with torch.no_grad():
            batch = batch.to(DEVICE)
            batch_emb = model.encode_image(batch)
            embeddings.append(batch_emb.cpu().numpy())
        
        # Memory cleanup
        del batch, batch_emb
        torch.cuda.empty_cache()
    
    return np.concatenate(embeddings)

# Compute embeddings for training images
print("Computing image embeddings for training data...")
X_train = get_image_embeddings(train_images)
print(f"Computed embeddings shape: {X_train.shape}")

## Training Data Preparation

Preparing labels for the training data. Since all training images contain soil, they are labeled as 1.

In [None]:
# All training images are soil (label = 1)
# For a more robust classifier, you might want to add negative examples (non-soil images)
y_train = np.ones(len(X_train), dtype=int)  # All are soil = 1

print(f"Training samples: {len(X_train)} (all labeled as soil=1)")

# Split data for validation
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train, y_train, test_size=0.2, random_state=RANDOM_SEED
)

print(f"Training set: {len(X_train_split)} samples")
print(f"Validation set: {len(X_val_split)} samples")

## Zero-Shot Classification Function

Implementing the zero-shot classification function to predict whether an image contains soil or not.

In [None]:
# Function to predict if an image contains soil using zero-shot classification
def predict_is_soil(image_path, threshold=0.0):
    """
    Predict if image contains soil (1) or not (0)
    Uses zero-shot CLIP classification
    """
    image = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(DEVICE)
    
    with torch.no_grad():
        # Get image features
        image_features = model.encode_image(image)
        image_features = normalize_embeddings(image_features)
        
        # Get text features
        soil_features = soil_text_embedding.to(DEVICE)
        non_soil_features = non_soil_text_embedding.to(DEVICE)
        
        # Calculate similarities
        soil_similarity = (image_features @ soil_features.T).item()
        non_soil_similarity = (image_features @ non_soil_features.T).item()
        
        # Use softmax to get probabilities
        logits = torch.tensor([non_soil_similarity, soil_similarity])
        probs = torch.softmax(logits, dim=0)
        
        # Return 1 if soil probability > non-soil probability + threshold
        is_soil = 1 if probs[1] > (probs[0] + threshold) else 0
        
    return is_soil, probs[1].item()  # Return prediction and soil probability

# Simpler version - just compare similarities
def predict_is_soil_simple(image_path):
    """Simpler version - just compare soil vs non-soil similarity"""
    image = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(DEVICE)
    
    with torch.no_grad():
        image_features = model.encode_image(image)
        image_features = normalize_embeddings(image_features)
        
        soil_features = soil_text_embedding.to(DEVICE)
        non_soil_features = non_soil_text_embedding.to(DEVICE)
        
        soil_sim = (image_features @ soil_features.T).item()
        non_soil_sim = (image_features @ non_soil_features.T).item()
        
        # Return 1 if more similar to soil than non-soil
        return 1 if soil_sim > non_soil_sim else 0

## Testing on Training Images

Testing the zero-shot classification function on a few training images.

In [None]:
# Test on a few training images to verify
print("\nTesting on a few training images (should all be 1 since they contain soil):")
sample_images = train_images[:5]

fig, axes = plt.subplots(1, 5, figsize=(20, 4))
for i, (img_path, ax) in enumerate(zip(sample_images, axes)):
    prediction, confidence = predict_is_soil(img_path)
    simple_pred = predict_is_soil_simple(img_path)
    
    # Display image and prediction
    img = Image.open(img_path)
    ax.imshow(img)
    ax.set_title(f"Pred={prediction} (conf={confidence:.3f})\nSimple={simple_pred}")
    ax.axis('off')
    
    print(f"Image {i+1}: Advanced={prediction} (conf={confidence:.3f}), Simple={simple_pred}")

plt.tight_layout()
plt.show()

## Model Evaluation

Evaluating the model on the validation set.

In [None]:
# Since we only have positive examples in our training data,
# we'll rely on zero-shot classification for the final model
print("Using zero-shot classification as our final model")

# Save the text embeddings for inference
embeddings_output = {
    "soil_embedding": soil_text_embedding.cpu().numpy(),
    "non_soil_embedding": non_soil_text_embedding.cpu().numpy()
}

# Save embeddings using numpy
np.save("../model/text_embeddings.npy", embeddings_output)
print("Text embeddings saved for inference")

## Conclusion

The training process is complete. We've created a zero-shot classifier using CLIP that can determine whether images contain soil or not. The model will be used for inference on the test set.

In [None]:
print("Training complete! The model is ready for inference.")
print("Please run the inference.ipynb notebook to generate predictions on the test set.")