# Calculate evaluation Plate and Datasplit evaluation results
Evaluation results include confusion matrices, pr curves, precision, recall, f1-score, and accuracy.
This occurs for each plate across all splits.

In [1]:
import pathlib
from collections import defaultdict

import pandas as pd
from joblib import load
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_recall_curve,
    precision_score,
    recall_score,
)

## Find the root of the git repo on the host system

In [2]:
# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")

## Define paths

### Input

In [3]:
data_path = pathlib.Path(f"{root_dir}/1.train_models/classify_genotypes/data")

evaldf = pd.read_parquet(f"{data_path}/nf1_final_model_eval_data.parquet")
model = load(f"{data_path}/trained_nf1_model.joblib")
le = load(f"{data_path}/trained_nf1_model_label_encoder.joblib")

In [4]:
evaldf

Unnamed: 0,datasplit,probability_WT,predicted_genotype,true_genotype,Metadata_Cells_Number_Object_Number,Metadata_ImageNumber,Metadata_Cells_Location_Center_Y,Metadata_Nuclei_Number_Object_Number,Metadata_WellRow,Metadata_Cytoplasm_Parent_Nuclei,...,Metadata_number_of_singlecells,Metadata_WellCol,Metadata_Plate,Metadata_Nuclei_Location_Center_Y,Metadata_Well,Metadata_Cells_Location_Center_X,Metadata_Nuclei_Location_Center_X,Metadata_Site,Metadata_gene_name,Metadata_Cytoplasm_Parent_Cells
0,shuffled_rest,0.865907,1,0,2.0,1177.0,317.145584,3.0,F,3.0,...,149,9,Plate_5,356.292837,F9,875.270222,902.972299,14,NF1,2.0
1,shuffled_rest,0.006202,0,0,5.0,32.0,318.523005,6.0,B,6.0,...,148,11,Plate_5,311.350248,B11,606.513594,615.165938,1,NF1,5.0
2,shuffled_rest,0.303340,0,0,1.0,290.0,564.884298,1.0,C,1.0,...,119,12,Plate_5,556.116579,C12,345.700749,394.213240,17,NF1,1.0
3,shuffled_rest,0.039801,0,0,9.0,1016.0,713.809386,9.0,F,9.0,...,125,12,Plate_5,700.850537,F12,276.998649,288.714100,6,NF1,9.0
4,shuffled_rest,0.057411,0,0,7.0,534.0,551.984447,9.0,D,9.0,...,106,12,Plate_5,582.018333,D12,924.846837,947.446667,16,NF1,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42735,test,0.544581,1,1,3.0,288.0,358.720822,4.0,C,4.0,...,95,11,Plate_4,376.879008,C11,708.548012,699.588793,9,NF1,3.0
42736,test,0.975218,1,1,3.0,270.0,669.784779,5.0,C,5.0,...,95,11,Plate_4,647.613644,C11,160.533679,137.917724,16,NF1,3.0
42737,test,0.347940,0,1,3.0,265.0,251.700852,5.0,C,5.0,...,95,11,Plate_4,221.622005,C11,563.757778,543.670216,11,NF1,3.0
42738,test,0.957261,1,1,2.0,277.0,226.483731,2.0,C,2.0,...,95,11,Plate_4,279.212354,C11,941.952479,898.655336,22,NF1,2.0


### Outputs

In [5]:
eval_path = pathlib.Path("model_evaluation_data")
eval_path.mkdir(parents=True, exist_ok=True)

In [6]:
gene_column = "true_genotype"

def down_sample_by_genotype(_df):
    """
    Parameters
    ----------
    _df: Pandas Dataframe
        The data to be downsampled by the gene_column column.

    Returns
    -------
        The data down-sampled by genotype.
    """

    min_gene = _df[gene_column].value_counts().min()
    return (_df.groupby(gene_column, group_keys=False)
            .apply(lambda x: x.sample(n=min_gene, random_state=0))
            )

## Calculate evaluation metrics

In [7]:
# Define evaluation metric data
# The "metrics" include precision, recall, accuracy, and f1 scores
eval_mets = {
    met: defaultdict(list) for met in
    ("metrics", "precision_recall", "confusion_matrix")
}

# Labels of confusion matrices in dataframe
cm_true_labels = [
    le.classes_[0],
    le.classes_[0],
    le.classes_[1],
    le.classes_[1]
]

cm_pred_labels = [
    le.classes_[0],
    le.classes_[1],
    le.classes_[0],
    le.classes_[1]
]

def compute_metrics(_df, _plate, _split):
    """
    Parameters
    ----------
    _df: Pandas Dataframe
        Model data to be evaluated.

    _plate: String
        Name of the plate for storing the metrics

    _split: String
        Name of the data split for storing the metric
    """

    y_true = _df[gene_column]
    y_pred = _df["predicted_genotype"]
    y_proba = _df["probability_WT"]

    # Store metrics
    eval_mets["metrics"]["f1_score"].append(f1_score(y_true, y_pred))
    eval_mets["metrics"]["precision"].append(precision_score(y_true, y_pred))
    eval_mets["metrics"]["recall"].append(recall_score(y_true, y_pred))
    eval_mets["metrics"]["accuracy"].append(accuracy_score(y_true, y_pred))
    eval_mets["metrics"]["plate"].append(_plate)
    eval_mets["metrics"]["datasplit"].append(_split)

    # Store precision and recall data
    precision, recall, _ = precision_recall_curve(y_true, y_proba)
    pr_size = precision.shape[0]
    eval_mets["precision_recall"]["precision"].extend(precision.tolist())
    eval_mets["precision_recall"]["recall"].extend(recall.tolist())
    eval_mets["precision_recall"]["plate"].extend([_plate] * pr_size)
    eval_mets["precision_recall"]["datasplit"].extend([_split] * pr_size)

    # Store confusion matrices
    cm = confusion_matrix(y_true, y_pred)
    cm = cm.flatten()
    cm_size = cm.shape[0]
    eval_mets["confusion_matrix"]["confusion_values"].extend(cm.tolist())
    eval_mets["confusion_matrix"]["true_genotype"].extend(cm_true_labels)
    eval_mets["confusion_matrix"]["predicted_genotype"].extend(cm_pred_labels)
    eval_mets["confusion_matrix"]["plate"].extend([_plate] * cm_size)
    eval_mets["confusion_matrix"]["datasplit"].extend([_split] * cm_size)

In [8]:
# Iterate through each data split
for split in evaldf["datasplit"].unique():

    # Calculate metrics for all plates
    df_temp = evaldf.loc[(evaldf["datasplit"] == split)].copy()
    compute_metrics(df_temp, "all_plates", split)

    # Calculate metrics for each plate
    for plate in evaldf["Metadata_Plate"].unique():
        df_temp = evaldf.loc[(evaldf["Metadata_Plate"] == plate) & (evaldf["datasplit"] == split)].copy()
        df_temp = down_sample_by_genotype(df_temp)
        compute_metrics(df_temp, plate, split)

### Save evaluation metrics and model coefficients for plotting

In [9]:
for met, met_data in eval_mets.items():
    pd.DataFrame(eval_mets[met]).to_parquet(f"{eval_path}/{met}_final_model.parquet")

pd.DataFrame(
    {
        "feature_names": model.feature_names_in_,
        "feature_importances": model.coef_.reshape(-1)
    }
).to_parquet(f"{eval_path}/feature_importances.parquet")