## Evaluate Leave One Image Out analysis

In [1]:
import pathlib
import numpy as np
import pandas as pd

from sklearn.preprocessing import label_binarize
from sklearn.metrics import confusion_matrix, f1_score, precision_recall_curve, auc

In [2]:
def compute_avg_rank_and_pvalue(grouped_df):
    ranks = []
    p_values = []

    for _, group in grouped_df.groupby("Cell_UUID"):
        # Sort predicted probabilities in descending order
        sorted_probs = group.sort_values(by="Predicted_Probability", ascending=False).reset_index(drop=True)
        
        # Get the rank of the true class
        rank = sorted_probs[sorted_probs["Mitocheck_Phenotypic_Class"] == sorted_probs["Model_Phenotypic_Class"]].index[0] + 1
        
        # Get the p-value (predicted probability) of the true class
        p_value = sorted_probs.loc[rank - 1, "Predicted_Probability"]
        
        ranks.append(rank)
        p_values.append(p_value)

    # Calculate average rank and p-value for the group
    avg_rank = sum(ranks) / len(ranks)
    avg_p_value = sum(p_values) / len(p_values)
    
    # Calculate IQR and min/max within IQR for ranks
    iqr_rank = np.percentile(ranks, 75) - np.percentile(ranks, 25)
    min_iqr_rank = np.percentile(ranks, 25)
    max_iqr_rank = np.percentile(ranks, 75)
    
    # Calculate IQR and min/max within IQR for p-values
    iqr_p_value = np.percentile(p_values, 75) - np.percentile(p_values, 25)
    min_iqr_p_value = np.percentile(p_values, 25)
    max_iqr_p_value = np.percentile(p_values, 75)
    
    # Count number of comparisons
    count = len(ranks)
    
    return avg_rank, avg_p_value, min_iqr_rank, max_iqr_rank, min_iqr_p_value, max_iqr_p_value, count

In [3]:
# Set I/O
proba_dir = pathlib.Path("evaluations", "LOIO_probas")
loio_file = pathlib.Path(proba_dir, "compiled_LOIO_probabilites.tsv")

output_summary_file = pathlib.Path(proba_dir, "LOIO_summary_ranks_allfeaturespaces.tsv")
output_summary_phenotype_file = pathlib.Path(proba_dir, "LOIO_summary_ranks_perphenotype_allfeaturespaces.tsv")

In [4]:
loio_df = pd.read_csv(loio_file, sep="\t", index_col=0)

print(loio_df.shape)
loio_df.head()

(858600, 10)


Unnamed: 0,Cell_UUID,Metadata_DNA,Mitocheck_Phenotypic_Class,Model_Type,Model_Feature_Type,Model_Balance_Type,Model_C,Model_l1_ratio,Model_Phenotypic_Class,Predicted_Probability
0,0008551d-e7f6-4351-b680-140c3661cb59,LT0109_38/LT0109_38_381_87.tif,Interphase,final,CP,balanced,1.0,0.8,ADCCM,8e-06
1,0008551d-e7f6-4351-b680-140c3661cb59,LT0109_38/LT0109_38_381_87.tif,Interphase,final,CP,unbalanced,10.0,0.2,ADCCM,6e-06
2,0008551d-e7f6-4351-b680-140c3661cb59,LT0109_38/LT0109_38_381_87.tif,Interphase,shuffled_baseline,CP,balanced,10.0,0.2,ADCCM,0.592082
3,0008551d-e7f6-4351-b680-140c3661cb59,LT0109_38/LT0109_38_381_87.tif,Interphase,shuffled_baseline,CP,unbalanced,0.01,0.0,ADCCM,0.027845
4,0008551d-e7f6-4351-b680-140c3661cb59,LT0109_38/LT0109_38_381_87.tif,Interphase,final,CP,balanced,1.0,0.8,Anaphase,0.275263


In [5]:
phenotype_classes = loio_df.Mitocheck_Phenotypic_Class.unique().tolist()
phenotype_classes

['Interphase',
 'MetaphaseAlignment',
 'OutOfFocus',
 'Elongated',
 'Apoptosis',
 'Prometaphase',
 'Large',
 'Polylobed',
 'SmallIrregular',
 'Binuclear',
 'Hole',
 'ADCCM',
 'Metaphase',
 'Anaphase',
 'Grape']

## Get average ranks and p value of correct prediction

- Per Image
- Per model type (final vs. shuffled)
- Phenotype
- Feature Space

In [6]:
# Calculate average rank for each Metadata_DNA
rank_groups = [
    "Metadata_DNA",
    "Model_Type",
    "Mitocheck_Phenotypic_Class",
    "Model_Feature_Type",
    "Model_Balance_Type"
]

# Output data columns
output_data_columns = [
    "Average_Rank",
    "Average_P_Value",
    "Min_IQR_Rank",
    "Max_IQR_Rank", 
    "Min_IQR_P_Value",
    "Max_IQR_P_Value", 
    "Count"
]

avg_ranks = (
    loio_df.groupby(rank_groups)
    .apply(compute_avg_rank_and_pvalue)
    .reset_index()
)

avg_ranks.columns = rank_groups + ["Average_Scores"]

loio_scores_df = (
    pd.concat([
        avg_ranks.drop(columns="Average_Scores"),
        pd.DataFrame(
            avg_ranks.Average_Scores.tolist(),
            columns=output_data_columns
        )
    ], axis="columns")
)

loio_scores_df.to_csv(output_summary_file, index=False, sep="\t")

print(loio_scores_df.shape)
loio_scores_df.head()

(13180, 12)


Unnamed: 0,Metadata_DNA,Model_Type,Mitocheck_Phenotypic_Class,Model_Feature_Type,Model_Balance_Type,Average_Rank,Average_P_Value,Min_IQR_Rank,Max_IQR_Rank,Min_IQR_P_Value,Max_IQR_P_Value,Count
0,LT0003_40/LT0003_40_149_83.tif,final,Polylobed,CP,balanced,2.333333,0.591538,1.0,3.0,0.419277,0.887252,3
1,LT0003_40/LT0003_40_149_83.tif,final,Polylobed,CP,unbalanced,2.333333,0.653764,1.0,3.0,0.485112,0.980265,3
2,LT0003_40/LT0003_40_149_83.tif,final,Polylobed,CP_and_DP,balanced,2.0,0.629397,1.0,2.5,0.458653,0.943889,3
3,LT0003_40/LT0003_40_149_83.tif,final,Polylobed,CP_and_DP,unbalanced,2.0,0.65228,1.0,2.5,0.484353,0.97761,3
4,LT0003_40/LT0003_40_149_83.tif,final,Polylobed,CP_areashape_only,balanced,3.333333,0.107409,2.5,4.0,0.013583,0.161072,3


## Get average ranks and p value per phenotype

- Per model type (final vs. shuffled)
- Per Phenotype
- Per Feature space

(i.e., not on a per-image basis)

In [7]:
# Calculate average rank for each phenotype
rank_groups.remove("Metadata_DNA")  # Remove the per image to group on

avg_ranks = (
    loio_df.groupby(rank_groups)
    .apply(compute_avg_rank_and_pvalue)
    .reset_index()
)

avg_ranks.columns = rank_groups + ["Average_Scores"]

loio_scores_df = (
    pd.concat([
        avg_ranks.drop(columns="Average_Scores"),
        pd.DataFrame(
            avg_ranks.Average_Scores.tolist(),
            columns=output_data_columns
        )
    ], axis="columns")
)

loio_scores_df.to_csv(output_summary_phenotype_file, index=False, sep="\t")

print(loio_scores_df.shape)
loio_scores_df.head()

(300, 11)


Unnamed: 0,Model_Type,Mitocheck_Phenotypic_Class,Model_Feature_Type,Model_Balance_Type,Average_Rank,Average_P_Value,Min_IQR_Rank,Max_IQR_Rank,Min_IQR_P_Value,Max_IQR_P_Value,Count
0,final,ADCCM,CP,balanced,2.052632,0.521134,1.0,3.0,0.094614,0.873703,95
1,final,ADCCM,CP,unbalanced,2.263158,0.434702,1.0,3.0,0.039917,0.740537,95
2,final,ADCCM,CP_and_DP,balanced,2.084211,0.563454,1.0,2.0,0.125797,0.921281,95
3,final,ADCCM,CP_and_DP,unbalanced,2.252632,0.496073,1.0,3.0,0.090981,0.825156,95
4,final,ADCCM,CP_areashape_only,balanced,3.989474,0.19254,1.0,5.0,0.047352,0.317496,95
