In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix

def calculate_metric(df, category, dataset_name):
    print(f"Processing {dataset_name}...")
    true_labels = df['1'].fillna('Unknown')
    pred_labels = df['# of Shots 8'].fillna('NA_placeholder')
    
    if category == 'I':
        # Use F1 score for category I
        precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')
        return f1
    elif category == 'C':
        # Ordinal mapping for category C
        if dataset_name == 'FUSARIUM 22':
            ordinal_map = {
                'Highly Resistant': 1,
                'Resistant': 2,
                'Moderately Resistant': 3,
                'Susceptible': 4,
                'Highly Susceptible': 5,
            }
        elif dataset_name == 'Yellow Rust 19':
            ordinal_map = {
                'Resistant (R)': 1,
                'Moderately Resistant (MR)': 2,
                'MRMS': 3,
                'Moderately Susceptible (MS)': 4,
                'Susceptible (S)': 5,
                'No disease (0)': 0,
            }
        else:
            raise ValueError(f"Unknown dataset for category C: {dataset_name}")
        
        # Add unseen labels to the ordinal map with the worst score
        max_value = max(ordinal_map.values())
        all_labels = set(true_labels).union(set(pred_labels))
        for label in all_labels:
            if label not in ordinal_map:
                ordinal_map[label] = max_value + 1
        
        true_ordinal = true_labels.map(ordinal_map)
        pred_ordinal = pred_labels.map(ordinal_map)
        
        # Calculate mean absolute error
        mae = np.mean(np.abs(true_ordinal - pred_ordinal))
        
        return mae

# Define the datasets and their categories
datasets = {
    'Bean Leaf Lesions': 'I',
    'DeepWeeds': 'I',
    'Durum Wheat': 'I',
    'FUSARIUM 22': 'C',
    'IP02': 'I',
    'Mango Leaf Disease': 'I',
    'SBRD': 'I',
    'Soybean Seeds': 'I',
    'Yellow Rust 19': 'C'
}

# Get the list of model folders
model_folders = [f for f in os.listdir('results') if os.path.isdir(os.path.join('results', f))]

# Initialize results dictionary
results = {model: {} for model in model_folders}

# Process each dataset for each model
for model in model_folders:
    for dataset, category in datasets.items():
        try:
            df = pd.read_csv(f'results/{model}/{dataset}.csv', engine='python')
            metric = calculate_metric(df, category, dataset)
            results[model][dataset] = metric
        except FileNotFoundError:
            print(f"File not found: results/{model}/{dataset}.csv")
            results[model][dataset] = np.nan
        except Exception as e:
            print(f"Error processing {model}/{dataset}: {str(e)}")
            results[model][dataset] = np.nan

# Create the result table
result_table = pd.DataFrame(results).T  # Transpose to match the desired format

# Rename the index and columns
result_table.index.name = 'Model'
result_table.columns.name = 'Dataset'

# Round the results to 4 decimal places
result_table = result_table.round(4)

# Display the result table
print(result_table)

# Optionally, save the result table to a CSV file
result_table.to_csv('result_table_shot_8.csv')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

File not found: results/Claude-3-haiku/IP02.csv


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


File not found: results/Claude-3-sonnet/FUSARIUM 22.csv
File not found: results/Claude-3-sonnet/IP02.csv


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Dataset           Bean Leaf Lesions  DeepWeeds  Durum Wheat  FUSARIUM 22  \
Model                                                                      
GPT-4o                       0.9666     0.4265       1.0000       0.9333   
LLaVA v1.6 34B               0.4835     0.0895       0.3959       3.0000   
Gemini-flash-1.5             0.8692     0.3611       0.6000       0.6667   
Claude-3-haiku               0.5541     0.3095       0.5967       0.8333   
Claude-3-sonnet              0.6379     0.0469       0.6581          NaN   

Dataset             IP02  Mango Leaf Disease    SBRD  Soybean Seeds  \
Model                                                                 
GPT-4o            0.1956              0.9143  0.5778         0.4334   
LLaVA v1.6 34B    0.0000              0.1905  0.1370         0.1877   
Gemini-flash-1.5  0.2547              0.6863  0.4370         0.3600   
Claude-3-haiku       NaN              0.4229  0.1690         0.4560   
Claude-3-sonnet      NaN              0.4