In [1]:
# Essential imports for soil image analysis
import numpy as np                     # Matrix operations
import pandas as pd                    # Data handling
import os                              # File system operations
import matplotlib.pyplot as plt        # Plotting
from PIL import Image                  # Image processing

# Deep learning framework components
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# Model evaluation tools
from sklearn.metrics import f1_score

# Utility imports
from tqdm import tqdm                  # Progress visualization
import copy                            # Model state preservation
import time                            # Execution timing

# Hardware configuration
processing_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Processing will occur on: {processing_device}")

# Data paths configuration
training_images_path = '/content/drive/MyDrive/soil-classification/soil_classification-2025/train'
evaluation_images_path = '/content/drive/MyDrive/soil-classification/soil_classification-2025/test'

# Loading data annotations
training_annotations = pd.read_csv('/content/drive/MyDrive/soil-classification/soil_classification-2025/train_labels.csv')
evaluation_annotations = pd.read_csv('/content/drive/MyDrive/soil-classification/soil_classification-2025/train_labels.csv')

# Soil category encoding
soil_category_encoding = {
    'Alluvial soil': 0,
    'Black Soil': 1,
    'Clay soil': 2,
    'Red soil': 3
}

reverse_soil_encoding = {v: k for k, v in soil_category_encoding.items()}

# Apply encoding to training data
training_annotations['encoded_label'] = training_annotations['soil_type'].map(soil_category_encoding)

# Custom dataset handler
class SoilImageDataset(Dataset):
    def __init__(self, metadata, base_path, image_processor=None, evaluation_mode=False):
        self.metadata = metadata
        self.base_path = base_path
        self.image_processor = image_processor
        self.evaluation_mode = evaluation_mode

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, index):
        img_name = self.metadata.iloc[index, 0]
        full_img_path = os.path.join(self.base_path, img_name)
        img_data = Image.open(full_img_path).convert('RGB')

        if self.image_processor:
            img_data = self.image_processor(img_data)

        if self.evaluation_mode:
            return img_data, img_name
        else:
            category = self.metadata.iloc[index, -1]
            return img_data, category

# Image processing pipelines
training_processing = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

evaluation_processing = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Dataset preparation
training_dataset = SoilImageDataset(training_annotations, training_images_path, image_processor=training_processing)
evaluation_dataset = SoilImageDataset(evaluation_annotations, evaluation_images_path, image_processor=evaluation_processing, evaluation_mode=True)

# Data loading configuration
training_data_loader = DataLoader(training_dataset, batch_size=32, shuffle=True)
evaluation_data_loader = DataLoader(evaluation_dataset, batch_size=32, shuffle=False)

# Model initialization
soil_classifier = models.efficientnet_b0(pretrained=True)
soil_classifier.classifier[1] = nn.Linear(soil_classifier.classifier[1].in_features, 4)
soil_classifier = soil_classifier.to(processing_device)

# Training configuration
loss_function = nn.CrossEntropyLoss()
model_optimizer = optim.AdamW(soil_classifier.parameters(), lr=0.0001)
learning_adjuster = optim.lr_scheduler.StepLR(model_optimizer, step_size=5, gamma=0.5)

# Model training process
def execute_training(model, data_loader, total_epochs=20, early_stop_threshold=5):
    optimal_weights = copy.deepcopy(model.state_dict())
    best_performance = 0.0
    patience_tracker = 0

    for current_epoch in range(total_epochs):
        model.train()
        cumulative_loss = 0.0
        predicted_values = []
        actual_values = []

        for batch_images, batch_labels in tqdm(data_loader, desc=f"Training Epoch {current_epoch+1}/{total_epochs}"):
            batch_images, batch_labels = batch_images.to(processing_device), batch_labels.to(processing_device)
            model_optimizer.zero_grad()

            model_output = model(batch_images)
            batch_loss = loss_function(model_output, batch_labels)
            batch_loss.backward()
            model_optimizer.step()

            _, batch_predictions = torch.max(model_output, 1)
            cumulative_loss += batch_loss.item()
            predicted_values.extend(batch_predictions.cpu().numpy())
            actual_values.extend(batch_labels.cpu().numpy())

        performance_metrics = f1_score(actual_values, predicted_values, average=None)
        minimum_f1 = min(performance_metrics)
        print(f"Epoch {current_epoch+1} - Total Loss: {cumulative_loss:.4f} | Class F1 Scores: {performance_metrics} | Minimum F1: {minimum_f1:.4f}")

        learning_adjuster.step()

        if minimum_f1 > best_performance:
            best_performance = minimum_f1
            optimal_weights = copy.deepcopy(model.state_dict())
            patience_tracker = 0
            print("▲ Improved model weights saved!")
        else:
            patience_tracker += 1
            print(f"▼ No improvement detected. Patience counter: {patience_tracker}/{early_stop_threshold}")
            if patience_tracker >= early_stop_threshold:
                print("✋ Early stopping activated.")
                break

    model.load_state_dict(optimal_weights)
    return model

# Execute training
soil_classifier = execute_training(soil_classifier, training_data_loader, total_epochs=20, early_stop_threshold=4)

# Prepare evaluation data
eval_image_list = [f for f in os.listdir(evaluation_images_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
eval_metadata = pd.DataFrame({'image_identifier': eval_image_list})

eval_dataset = SoilImageDataset(eval_metadata, evaluation_images_path, image_processor=evaluation_processing, evaluation_mode=True)
eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False)

# Prediction function
def generate_predictions(model):
    model.eval()
    result_predictions = []
    image_identifiers = []

    with torch.no_grad():
        for input_images, img_ids in tqdm(eval_loader, desc="Generating Predictions"):
            input_images = input_images.to(processing_device)
            model_results = model(input_images)
            _, predicted_classes = torch.max(model_results, 1)
            result_predictions.extend(predicted_classes.cpu().numpy())
            image_identifiers.extend(img_ids)

    predicted_categories = [reverse_soil_encoding[p] for p in result_predictions]
    return pd.DataFrame({'image_id': image_identifiers, 'predicted_soil': predicted_categories})

# Create results
results_dataframe = generate_predictions(soil_classifier)

# Organize results according to reference order
reference_order = pd.read_csv('/content/drive/MyDrive/soil-classification/soil_classification-2025/test_ids.csv')

reference_order['sort_index'] = range(len(reference_order))
organized_results = pd.merge(reference_order, results_dataframe, on='image_id', how='left')
organized_results = organized_results.sort_values(by='sort_index')

# Save final output
output_directory = 'working'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

organized_results[['image_id', 'predicted_soil']].to_csv(os.path.join(output_directory, 'soil_predictions.csv'), index=False)
print("✔ Prediction results successfully saved.")

# Download results
from google.colab import files
files.download(os.path.join(output_directory, 'soil_predictions.csv'))

Processing will occur on: cuda


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 93.6MB/s]
Training Epoch 1/20: 100%|██████████| 39/39 [15:04<00:00, 23.20s/it]


Epoch 1 - Total Loss: 33.9328 | Class F1 Scores: [0.80943026 0.70656371 0.66161616 0.7890625 ] | Minimum F1: 0.6616
▲ Improved model weights saved!


Training Epoch 2/20: 100%|██████████| 39/39 [00:22<00:00,  1.75it/s]


Epoch 2 - Total Loss: 13.5336 | Class F1 Scores: [0.91969407 0.90672451 0.84577114 0.95700935] | Minimum F1: 0.8458
▲ Improved model weights saved!


Training Epoch 3/20: 100%|██████████| 39/39 [00:22<00:00,  1.74it/s]


Epoch 3 - Total Loss: 8.3968 | Class F1 Scores: [0.94555874 0.92273731 0.8992629  0.95716946] | Minimum F1: 0.8993
▲ Improved model weights saved!


Training Epoch 4/20: 100%|██████████| 39/39 [00:22<00:00,  1.75it/s]


Epoch 4 - Total Loss: 6.2110 | Class F1 Scores: [0.95437262 0.94736842 0.91315136 0.97560976] | Minimum F1: 0.9132
▲ Improved model weights saved!


Training Epoch 5/20: 100%|██████████| 39/39 [00:21<00:00,  1.78it/s]


Epoch 5 - Total Loss: 5.3173 | Class F1 Scores: [0.96479543 0.96982759 0.91729323 0.98113208] | Minimum F1: 0.9173
▲ Improved model weights saved!


Training Epoch 6/20: 100%|██████████| 39/39 [00:22<00:00,  1.73it/s]


Epoch 6 - Total Loss: 5.4066 | Class F1 Scores: [0.9645933  0.96581197 0.94       0.96798493] | Minimum F1: 0.9400
▲ Improved model weights saved!


Training Epoch 7/20: 100%|██████████| 39/39 [00:22<00:00,  1.73it/s]


Epoch 7 - Total Loss: 5.1741 | Class F1 Scores: [0.9705603  0.97167756 0.945      0.96240602] | Minimum F1: 0.9450
▲ Improved model weights saved!


Training Epoch 8/20: 100%|██████████| 39/39 [00:21<00:00,  1.79it/s]


Epoch 8 - Total Loss: 3.8188 | Class F1 Scores: [0.98288973 0.98047722 0.97014925 0.9905482 ] | Minimum F1: 0.9701
▲ Improved model weights saved!


Training Epoch 9/20: 100%|██████████| 39/39 [00:22<00:00,  1.73it/s]


Epoch 9 - Total Loss: 3.4794 | Class F1 Scores: [0.98391675 0.97603486 0.96221662 0.97551789] | Minimum F1: 0.9622
▼ No improvement detected. Patience counter: 1/4


Training Epoch 10/20: 100%|██████████| 39/39 [00:22<00:00,  1.73it/s]


Epoch 10 - Total Loss: 4.1468 | Class F1 Scores: [0.97731569 0.98047722 0.96482412 0.9829222 ] | Minimum F1: 0.9648
▼ No improvement detected. Patience counter: 2/4


Training Epoch 11/20: 100%|██████████| 39/39 [00:22<00:00,  1.74it/s]


Epoch 11 - Total Loss: 4.2103 | Class F1 Scores: [0.9772296  0.96551724 0.96259352 0.9752381 ] | Minimum F1: 0.9626
▼ No improvement detected. Patience counter: 3/4


Training Epoch 12/20: 100%|██████████| 39/39 [00:21<00:00,  1.79it/s]


Epoch 12 - Total Loss: 3.2230 | Class F1 Scores: [0.97815764 0.98706897 0.96296296 0.98467433] | Minimum F1: 0.9630
▼ No improvement detected. Patience counter: 4/4
✋ Early stopping activated.


Generating Predictions: 100%|██████████| 11/11 [00:14<00:00,  1.31s/it]


✔ Prediction results successfully saved.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>