In [1]:
import sys
sys.path.append('/workspace/cropClassification')
sys.path.append('/workspace/cropClassification/model')
import os
# if using Apple MPS, fall back to CPU for unsupported ops
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import torch
import pandas as pd
from torch.utils.data import DataLoader

from unet_uncertain import UNetWithUncertainty, UNetWithFiLM, UNetWithAttention, UNetWithAttentionDeep
from unet import originalUNet
from compiler import ModelCompiler
from dataloader import RoadsideCropImageDataset
from loss import AleatoricLoss, BalancedCrossEntropyLoss, BalancedCrossEntropyUncertaintyLoss

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")  # Use GPU with CUDA
    print("Using CUDA")
elif torch.backends.mps.is_available():
    device = torch.device("mps")  # Use Apple M1/M2 GPU with MPS (Metal Performance Shaders)
    print("Using MPS")
else:
    device = torch.device("cpu")  # Fall back to CPU
    print("Using CPU")

Using CUDA


In [3]:
config = {
    "model": {
        "type": "UNetWithUncertainty", 
        "params": {
            "in_channels": 10,  # Since we are using 9-channel input images
            "out_channels": 3   # Number of output classes for segmentation
        }
    },
    "training": {
        "epochs": 100,
        "batch_size": 64,
        "learning_rate": 0.01,
        "optimizer": {
            "type": "Adam",
            "params": {
                "lr": 0.1
            }
        },
        "scheduler": {
            "type": "StepLR",
            "params": {
                "step_size": 10,
                "gamma": 0.8
            }
        },
        "criterion": BalancedCrossEntropyLoss,
        "classwise_weights": [0.40847337, 0.24774314, 0.34378349],
        "resume" : False,
        "resume_epoch" : None,
    },
    "validation": {
        "epochs": 50,
        "batch_size": 16
    },
    "dataset": {
        "train_csv": "/workspace/data/masked_data_csiss/training/train_chipping_csv_w_anc.csv",  # Path to the training DataFrame (includes npy file paths)
        "val_csv": "/workspace/data/masked_data_csiss/validation/validation_chipping_csv_w_anc.csv", # Path to the validation DataFrame (includes npy file paths)
        "train_root_path": "/workspace/data/masked_data_csiss/training",
        "val_root_path": "/workspace/data/masked_data_csiss/validation",
        "image_column": "img_chip_path",             # Column containing the image paths (npy files)
        "mask_column": "lbl_chip_path",              # Column containing the mask paths
        "train_mean": [93.35909, 112.25017, 77.58077, 113.76227, 209.35867, 
                       33.197624, 47.543148, 115.10287, 116.46991, 0.4089894],  # Mean values for training set normalization
        "train_std": [52.684902, 47.859867, 48.691204, 48.87318, 79.684715, 
                      55.719467, 22.692726, 52.473957, 49.270615, 0.18757315],   # Std values for training set normalization
        "val_mean": [86.912254, 111.033424, 76.70707, 111.44443, 222.71599, 
                     32.308598, 51.314415, 117.117516, 114.03275, 0.4002856],    # Mean values for validation set normalization
        "val_std": [53.11299, 48.662193, 49.91134, 49.917503, 60.49611, 
                    55.28856, 21.68052, 54.39602, 50.282692, 0.1894794],     # Std values for validation set normalization
        "classwise_norm": {
            "Other": {
                "mean": [110.14499, 138.28326, 117.67161, 138.71075, 233.10298, 95.24617, 
                         61.51174, 95.255585, 143.12497, 0.5121758],
                "std": [49.80741, 58.08042, 73.33471, 56.73531, 46.3197, 107.53641, 
                        27.30666, 48.58509, 61.8592, 0.22473]
            },
            "Maize": {
                "mean": [90.51625, 108.20322, 72.41352, 109.907, 203.1153, 24.78379, 
                         45.99272, 118.35542, 112.66669, 0.39319476],
                "std": [53.35697, 45.1693, 41.97862, 46.84034, 85.41561, 37.93971, 
                        21.92598, 52.69628, 46.29754, 0.1780796]
            },
            "Soybean": {
                "mean": [83.55291, 101.03232, 66.89771, 102.10976, 220.03397, 28.55189, 
                         47.94096, 116.89587, 103.48843, 0.36566228],
                "std": [49.91921, 47.52045, 41.91468, 48.74042, 67.89334, 47.91879, 
                        20.7751, 53.59672, 48.15541, 0.18223667]
            },
        }
    },
    "evaluation": {
        "filename": "csiss_street_view_crop_classification.csv",  # Evaluation metrics to be used
        "class_mapping": {
            0: "Background",
            1: "Maize",
            2: "Soybean"
        }
    }
}

In [4]:
model = UNetWithAttention(n_channels=config['model']['params']['in_channels'],
                          n_classes=config['model']['params']['out_channels'],
                          ancillary_data_dim=3)
model_comp = ModelCompiler(model=model,
                           params_init="/workspace/notebook/outputs-UWA-ep100-bceloss-clsweighted-lr0.01-gamma0.8/UNetWithAttention_ep100/chkpt/final_checkpoint.pth.tar")

---------- GPU (CUDA) available ----------
Loading model parameters from: /workspace/notebook/outputs-UWA-ep100-bceloss-clsweighted-lr0.01-gamma0.8/UNetWithAttention_ep100/chkpt/final_checkpoint.pth.tar
Model parameters loaded successfully.
Total number of trainable parameters: 13.7M


  checkpoint = torch.load(dir_params, map_location=torch.device('cpu'))


In [5]:
# mask = model_comp.simple_predict_and_display(image_path="/workspace/data/all_sv_imgs/IMG_2022_279.jpg",
#                                csv_path="/workspace/data/masked_data_csiss/validation/validation_chipping_csv_w_anc.csv")

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import os
from tqdm import tqdm

def classify_and_evaluate(model_comp, validation_csv, image_dir, output_dir):
    """
    Perform segmentation-based classification and generate a confusion matrix.

    Args:
        model_comp (ModelCompiler): Compiled model for inference.
        validation_csv (str): Path to the validation CSV file.
        image_dir (str): Directory containing validation images.
        output_dir (str): Directory to save classification results and confusion matrix.

    Returns:
        pd.DataFrame: Confusion matrix as a DataFrame.
    """
    # Load validation data
    validation_data = pd.read_csv(validation_csv)

    # Initialize lists to store predictions and ground truth
    y_true = []
    y_pred = []
    confidence_scores = []

    # Class mapping for numeric to string
    label_map = {0: "Other", 1: "Maize", 2: "Soybean"}

    # Group by unique images
    unique_images = validation_data['origin_img'].unique()

    for img_name in tqdm(unique_images, desc="Processing Unique Images"):
        # Extract rows corresponding to this unique image
        image_rows = validation_data[validation_data['origin_img'] == img_name]
        crop_type = image_rows['crop_type'].iloc[0]  # Assume the crop type is consistent across chips
        image_path = os.path.join(image_dir, f"{img_name}.jpg")

        # Perform segmentation inference
        mask = model_comp.simple_predict_and_display(image_path=image_path, csv_path=validation_csv)

        # Determine dominant class and confidence
        unique, counts = np.unique(mask, return_counts=True)
        class_pixel_counts = dict(zip(unique, counts))
        total_pixels = mask.size

        # Predict the dominant class
        dominant_class_numeric = max(class_pixel_counts, key=class_pixel_counts.get)
        confidence = class_pixel_counts[dominant_class_numeric] / total_pixels
        dominant_class = label_map[dominant_class_numeric]

        # Store results
        y_true.append(crop_type)
        y_pred.append(dominant_class)
        confidence_scores.append(confidence)

    # Generate a confusion matrix
    cm = confusion_matrix(y_true, y_pred, labels=["Other", "Maize", "Soybean"])
    cm_df = pd.DataFrame(cm, index=["Other", "Maize", "Soybean"], columns=["Other", "Maize", "Soybean"])

    # Save classification results and confusion matrix
    classification_results = pd.DataFrame({
        "origin_img": unique_images,
        "true_class": y_true,
        "predicted_class": y_pred,
        "confidence": confidence_scores
    })
    classification_results.to_csv(os.path.join(output_dir, "classification_results.csv"), index=False)
    cm_df.to_csv(os.path.join(output_dir, "confusion_matrix.csv"))

    # Print classification report
    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=["Other", "Maize", "Soybean"]))

    return cm_df

In [None]:
cm_df = classify_and_evaluate(
    model_comp=model_comp,
    validation_csv="/workspace/data/masked_data_csiss/validation/validation_chipping_csv_w_anc.csv",
    image_dir="/workspace/data/all_sv_imgs",
    output_dir="/workspace/notebook/results"
)

In [None]:
print(cm_df)