In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

# Assuming your DataFrame df contains columns:
# ['Pond', 'WaterHeight', 'GroundTruth', 'PoseModel1', 'PoseModel2', 'SAMPrediction', 'X_Coordinate', 'Y_Coordinate']

# Function to calculate Prawn-Level Metrics
def calculate_metrics(df):
    metrics = {}
    for model in ['PoseModel1', 'PoseModel2', 'SAMPrediction']:
        mae = mean_absolute_error(df['GroundTruth'], df[model])
        rmse = np.sqrt(mean_squared_error(df['GroundTruth'], df[model]))
        mpe = np.mean(np.abs((df['GroundTruth'] - df[model]) / df['GroundTruth'])) * 100
        metrics[model] = {'MAE': mae, 'RMSE': rmse, 'MPE': mpe}
    return metrics

# Function to aggregate metrics by pond
def aggregate_metrics_by_pond(df):
    pond_metrics = {}
    for pond in df['Pond'].unique():
        pond_df = df[df['Pond'] == pond]
        pond_metrics[pond] = calculate_metrics(pond_df)
    return pond_metrics

# Function to analyze height vs error
def height_vs_error_analysis(df):
    for model in ['PoseModel1', 'PoseModel2', 'SAMPrediction']:
        df[f'{model}_Error'] = np.abs(df['GroundTruth'] - df[model])
        sns.scatterplot(data=df, x='WaterHeight', y=f'{model}_Error', hue='Pond')
        plt.title(f'Height vs Error for {model}')
        plt.xlabel('Water Height')
        plt.ylabel(f'{model} Error')
        plt.show()

# Function to calculate metrics for molt validation
def calculate_molt_validation_metrics(molt_df):
    molt_metrics = {}
    for model in ['PoseModel1', 'PoseModel2', 'SAMPrediction']:
        mae = mean_absolute_error(molt_df['GroundTruth'], molt_df[model])
        rmse = np.sqrt(mean_squared_error(molt_df['GroundTruth'], molt_df[model]))
        mpe = np.mean(np.abs((molt_df['GroundTruth'] - molt_df[model]) / molt_df['GroundTruth'])) * 100
        molt_metrics[model] = {'MAE': mae, 'RMSE': rmse, 'MPE': mpe}
    molt_metrics['Location'] = molt_df[['X_Coordinate', 'Y_Coordinate']].values
    return molt_metrics

# Function for side-by-side model comparisons
def side_by_side_comparisons(df):
    for pond in df['Pond'].unique():
        pond_df = df[df['Pond'] == pond]
        plt.figure(figsize=(10, 6))
        sns.scatterplot(x=pond_df['GroundTruth'], y=pond_df['PoseModel1'], label='Pose Model 1')
        sns.scatterplot(x=pond_df['GroundTruth'], y=pond_df['PoseModel2'], label='Pose Model 2')
        sns.scatterplot(x=pond_df['GroundTruth'], y=pond_df['SAMPrediction'], label='SAM Prediction')
        plt.plot([df['GroundTruth'].min(), df['GroundTruth'].max()],
                 [df['GroundTruth'].min(), df['GroundTruth'].max()], 'k--', lw=2)
        plt.title(f'Comparison of Models in Pond {pond}')
        plt.xlabel('Ground Truth')
        plt.ylabel('Predictions')
        plt.legend()
        plt.show()

# Function to analyze model performance across ponds
def model_performance_across_ponds(df):
    for model in ['PoseModel1', 'PoseModel2', 'SAMPrediction']:
        sns.boxplot(x='Pond', y=model, data=df)
        plt.title(f'{model} Performance Across Ponds')
        plt.xlabel('Pond')
        plt.ylabel('Prediction Error')
        plt.show()

# Function for correlation and regression analysis
def correlation_regression_analysis(df):
    correlation_results = {}
    for pond in df['Pond'].unique():
        pond_df = df[df['Pond'] == pond]
        for model in ['PoseModel1', 'PoseModel2', 'SAMPrediction']:
            corr, _ = pearsonr(pond_df['GroundTruth'], pond_df[model])
            correlation_results[(pond, model)] = corr
            sns.regplot(x=pond_df['GroundTruth'], y=pond_df[model])
            plt.title(f'Regression Analysis: {model} in Pond {pond}')
            plt.xlabel('Ground Truth')
            plt.ylabel(model)
            plt.show()
    return correlation_results

# Example usage:
# prawn_metrics = calculate_metrics(df)
# pond_metrics = aggregate_metrics_by_pond(df)
# height_vs_error_analysis(df)
# molt_validation_metrics = calculate_molt_validation_metrics(molt_df)
# side_by_side_comparisons(df)
# model_performance_across_ponds(df)
# correlation_results = correlation_regression_analysis(df)
