In [1]:
import pathlib
import sys

import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import toml
import torch
from sklearn import preprocessing

sys.path.append("../..")
from MLP_utils.parameters import Parameters
from MLP_utils.utils import (
    Dataset_formatter,
    optimized_model_create,
    output_stats,
    parameter_set,
    results_output,
    test_optimized_model,
    un_nest,
)
from sklearn.metrics import (
    accuracy_score,
    auc,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)

sys.path.append("../../..")

In [2]:
# Parameters
SHUFFLE_DATA = False
CELL_TYPE = "PBMC"
CONTROL_NAME = "DMSO_0.100_DMSO_0.025"
TREATMENT_NAME = "Thapsigargin_1.000_DMSO_0.025"
MODEL_NAME = "DMSO_0.025_vs_Thapsigargin_1"
SHUFFLE = False

In [3]:
ml_configs_file = pathlib.Path("../../MLP_utils/binary_config.toml").resolve(
    strict=True
)
ml_configs = toml.load(ml_configs_file)
params = Parameters()
mlp_params = parameter_set(params, ml_configs)

# overwrite mlp_params via command line arguments from papermill
mlp_params.CELL_TYPE = CELL_TYPE
mlp_params.MODEL_NAME = MODEL_NAME
mlp_params.CONTROL_NAME = CONTROL_NAME
mlp_params.TREATMENT_NAME = TREATMENT_NAME
mlp_params.MODEL_NAME = MODEL_NAME
mlp_params.SHUFFLE = SHUFFLE

In [4]:
# Import Data
# set data file path under pathlib path for multi-system use
file_path = pathlib.Path(
    f"../../../data/{mlp_params.CELL_TYPE}_preprocessed_sc_norm.parquet"
).resolve(strict=True)

df = pq.read_table(file_path).to_pandas()

In [5]:
def test_loop(df, output_name, title):
    # Code snippet for metadata extraction by Jenna Tomkinson
    df_metadata = list(df.columns[df.columns.str.startswith("Metadata")])

    # define which columns are data and which are descriptive
    df_descriptive = df[df_metadata]
    df_values = df.drop(columns=df_metadata)
    # Creating label encoder
    le = preprocessing.LabelEncoder()
    # Converting strings into numbers
    print(df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())
    lst_of_treatments = (
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist()
    )

    df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = le.fit_transform(
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
    )
    print(df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())
    lst_of_coded_treatments = (
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist()
    )
    # make a dictionary of the treatments and their corresponding codes to decode later
    dict_of_treatments = {}
    for i, j in zip(
        lst_of_coded_treatments,
        lst_of_treatments,
    ):
        dict_of_treatments[i] = j
    # split into X and Y where Y are the predictive column and x are the observable data
    df_values_X = df_values.drop(
        [
            "oneb_Metadata_Treatment_Dose_Inhibitor_Dose",
            "twob_Metadata_Treatment_Dose_Inhibitor_Dose",
            "threeb_Metadata_Treatment_Dose_Inhibitor_Dose",
            "fourb_Metadata_Treatment_Dose_Inhibitor_Dose",
        ],
        axis=1,
    )
    df_values_Y = df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
    test_data = Dataset_formatter(
        torch.FloatTensor(df_values_X.values), torch.FloatTensor(df_values_Y.values)
    )

    mlp_params.IN_FEATURES = df_values_X.shape[1]
    print("Number of in features: ", mlp_params.IN_FEATURES)
    if mlp_params.MODEL_TYPE == "Regression":
        mlp_params.OUT_FEATURES = 1
    else:
        mlp_params.OUT_FEATURES = len(
            df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique()
        )

    print("Number of out features: ", mlp_params.OUT_FEATURES)

    if mlp_params.OUT_FEATURES > 2:
        mlp_params.MODEL_TYPE = "Multi_Class"
    elif mlp_params.OUT_FEATURES == 2:
        mlp_params.OUT_FEATURES = mlp_params.OUT_FEATURES - 1
        mlp_params.MODEL_TYPE = "Binary_Classification"
    elif mlp_params.OUT_FEATURES == 1:
        mlp_params.MODEL_TYPE = "Regression"
    else:
        pass
    # convert data class into a dataloader to be compatible with pytorch
    test_loader = torch.utils.data.DataLoader(
        dataset=test_data, batch_size=1, shuffle=mlp_params.SHUFFLE
    )
    model, _ = optimized_model_create(mlp_params, mlp_params.MODEL_NAME)
    # calling the testing function and outputting list values of tested model
    if mlp_params.MODEL_TYPE == "Multi_Class" or mlp_params.MODEL_TYPE == "Regression":
        y_pred_list = test_optimized_model(
            model,
            test_loader,
            mlp_params,
            model_name=mlp_params.MODEL_NAME,
            shuffle=mlp_params.SHUFFLE,
        )
    elif mlp_params.MODEL_TYPE == "Binary_Classification":
        y_pred_list, y_pred_prob_list = test_optimized_model(
            model,
            test_loader,
            mlp_params,
            model_name=mlp_params.MODEL_NAME,
            shuffle=mlp_params.SHUFFLE,
        )
    else:
        raise Exception("Model type must be specified for proper model testing")

    # un-nest list if nested i.e. length of input data does not match length of output data
    if len(y_pred_list) != len(df_values_Y):
        y_pred_list = un_nest(y_pred_list)
        y_pred_prob_list = un_nest(y_pred_prob_list)
    else:
        pass

    stats, recall, precision, f1, precision_, recall_, threshold_ = output_stats(
        y_pred_list,
        df_values_Y,
        mlp_params,
        y_pred_prob_list,
        test_name=f"{output_name}_all_testing",
        model_name=mlp_params.MODEL_NAME,
        title=title,
        shuffle=mlp_params.SHUFFLE,
    )
    return (
        stats,
        recall,
        precision,
        f1,
        precision_,
        recall_,
        threshold_,
        dict_of_treatments,
    )

In [6]:
print(df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())

['LPS_0.010_DMSO_0.025', 'LPS_0.100_DMSO_0.025', 'LPS_Nigericin_100.000_1.0_DMSO_0.025', 'LPS_Nigericin_100.000_3.0_DMSO_0.025', 'DMSO_0.100_DMSO_0.025', 'media_ctr_0_Media_ctr_0.0', 'DMSO_0.100_DMSO_1.0', 'LPS_1.000_DMSO_0.025', 'LPS_10.000_DMSO_0.025', 'LPS_Nigericin_100.000_10.0_DMSO_0.025', 'Disulfiram_0.100_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025', 'DMSO_0.100_Z-VAD-FMK_100.0', 'DMSO_0.100_Z-VAD-FMK_30.0', 'LPS_10.000_Disulfiram_0.1', 'LPS_10.000_Disulfiram_1.0', 'Disulfiram_1.000_DMSO_0.025', 'Disulfiram_2.5_DMSO_0.025', 'Disulfiram_2.500_DMSO_0.025', 'Topotecan_5.000_DMSO_0.025', 'Topotecan_10.000_DMSO_0.025', 'LPS_10.000_Disulfiram_2.5', 'LPS_10.000_Z-VAD-FMK_100.0', 'H2O2_100.000_DMSO_0.025', 'Topotecan_20.000_DMSO_0.025', 'LPS_100.000_DMSO_0.025', 'LPS_Nigericin_1.000_1.0_DMSO_0.025', 'H2O2_100.000_Disulfiram_1.0', 'H2O2_100.000_Z-VAD-FMK_100.0', 'LPS_Nigericin_1.000_3.0_DMSO_0.025', 'LPS_Nigericin_1.000_10.0_DMSO_0.025', 'Flagellin_0.100

In [7]:
paired_treatment_list = [
    ["DMSO_0.100_DMSO_0.025", "LPS_100.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_0.100_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_10.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_100.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_0.100_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_1.000_Disulfiram_1.0"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_1.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_3.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_10.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_1.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_3.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_10.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "H2O2_100.000_Z-VAD-FMK_100.0"],
    ["DMSO_0.100_DMSO_0.025", "H2O2_100.000_DMSO_0.025"],
    ["LPS_100.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_100.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_10.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_10.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_1.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_1.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_0.100_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_0.100_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_0.010_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_0.010_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
]

In [8]:
# create a dataframe to store the model stats
model_stats_df = pd.DataFrame(
    columns=[
        "treatments_tested",
        "model",
        "group",
        "shuffled_data",
        "PR_Threshold",
        "Precision",
        "Recall",
    ]
)
model_stats_df

Unnamed: 0,treatments_tested,model,group,shuffled_data,PR_Threshold,Precision,Recall


In [9]:
for i in paired_treatment_list:
    # filter df to only include the two treatments to test
    test_df = df.query(
        f"oneb_Metadata_Treatment_Dose_Inhibitor_Dose == '{i[0]}' | oneb_Metadata_Treatment_Dose_Inhibitor_Dose == '{i[1]}'"
    )
    output_name = ("__").join(
        test_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique()
    )

    print(output_name)

    title = f'{output_name.split("__")[0].split("_")[0]} vs {("__").join(output_name.split("__")[1].split("_")[:2])}'
    print(title)
    (
        stats,
        recall,
        precision,
        f1,
        precision_,
        recall_,
        threshold_,
        dict_of_treatments,
    ) = test_loop(test_df, output_name, title)
    print(recall, precision, f1)

    threshold_ = np.append(threshold_, None)
    stats_df = pd.DataFrame(
        {
            "PR_Threshold": threshold_,
            "Precision": precision_,
            "Recall": recall_,
        }
    )

    stats_df["treatments_tested"] = "0 vs 1"
    # make it so that the second treatment is always the one that is being tested as the positive label
    stats_df["treatments_tested"] = stats_df["treatments_tested"].replace(
        "0 vs 1", f"{dict_of_treatments[0]} vs {dict_of_treatments[1]}"
    )
    stats_df["model"] = mlp_params.MODEL_NAME
    stats_df["group"] = "test"
    stats_df["shuffled_data"] = mlp_params.SHUFFLE
    stats_df
    model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)

DMSO_0.100_DMSO_0.025__LPS_100.000_DMSO_0.025
DMSO vs LPS__100.000


['DMSO_0.100_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.69      0.91      0.78    215130
           1       0.79      0.44      0.57    159435

    accuracy                           0.71    374565
   macro avg       0.74      0.68      0.67    374565
weighted avg       0.73      0.71      0.69    374565



0.439364004139618 0.7916864447006171 0.5651084838877527


DMSO_0.100_DMSO_0.025__Thapsigargin_1.000_DMSO_0.025
DMSO vs Thapsigargin__1.000


['DMSO_0.100_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.87      0.91      0.89    215130
           1       0.87      0.81      0.83    148456

    accuracy                           0.87    363586
   macro avg       0.87      0.86      0.86    363586
weighted avg       0.87      0.87      0.87    363586



0.8054036212749905 0.8664338147377879 0.8348047686373078


DMSO_0.100_DMSO_0.025__Thapsigargin_10.000_DMSO_0.025
DMSO vs Thapsigargin__10.000


['DMSO_0.100_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.86      0.91      0.89    215130
           1       0.88      0.81      0.84    166984

    accuracy                           0.87    382114
   macro avg       0.87      0.86      0.87    382114
weighted avg       0.87      0.87      0.87    382114



0.8115807502515211 0.8802751489090826 0.8445333507822408


LPS_0.100_DMSO_0.025__DMSO_0.100_DMSO_0.025
LPS vs DMSO__0.100


['LPS_0.100_DMSO_0.025', 'DMSO_0.100_DMSO_0.025']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.75      0.91      0.82    215130
           1       0.80      0.52      0.63    138597

    accuracy                           0.76    353727
   macro avg       0.77      0.72      0.73    353727
weighted avg       0.77      0.76      0.75    353727



0.5215047944760709 0.796805238614942 0.6304097545659114


DMSO_0.100_DMSO_0.025__LPS_1.000_DMSO_0.025
DMSO vs LPS__1.000


['DMSO_0.100_DMSO_0.025', 'LPS_1.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.79      0.91      0.84    215130
           1       0.82      0.61      0.70    139433

    accuracy                           0.80    354563
   macro avg       0.80      0.76      0.77    354563
weighted avg       0.80      0.80      0.79    354563



0.6146106015075341 0.8229887927474575 0.7036976211395868


DMSO_0.100_DMSO_0.025__LPS_10.000_DMSO_0.025
DMSO vs LPS__10.000


['DMSO_0.100_DMSO_0.025', 'LPS_10.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.80      0.91      0.85    215130
           1       0.85      0.68      0.75    156490

    accuracy                           0.81    371620
   macro avg       0.82      0.80      0.80    371620
weighted avg       0.82      0.81      0.81    371620



0.6768036296248962 0.8517672604447304 0.7542720814713265


DMSO_0.100_DMSO_0.025__LPS_100.000_DMSO_0.025
DMSO vs LPS__100.000


['DMSO_0.100_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.69      0.91      0.78    215130
           1       0.79      0.44      0.57    159435

    accuracy                           0.71    374565
   macro avg       0.74      0.68      0.67    374565
weighted avg       0.73      0.71      0.69    374565



0.439364004139618 0.7916864447006171 0.5651084838877527


DMSO_0.100_DMSO_0.025__Flagellin_0.100_DMSO_0.025
DMSO vs Flagellin__0.100


['DMSO_0.100_DMSO_0.025', 'Flagellin_0.100_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.79      0.91      0.85    215130
           1       0.71      0.46      0.56     95915

    accuracy                           0.77    311045
   macro avg       0.75      0.69      0.70    311045
weighted avg       0.77      0.77      0.76    311045



0.4603763749152896 0.7055073575228874 0.557172058749306


DMSO_0.100_DMSO_0.025__Flagellin_1.000_DMSO_0.025
DMSO vs Flagellin__1.000


['DMSO_0.100_DMSO_0.025', 'Flagellin_1.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.80      0.91      0.85    215130
           1       0.74      0.51      0.61    104547

    accuracy                           0.78    319677
   macro avg       0.77      0.71      0.73    319677
weighted avg       0.78      0.78      0.77    319677



0.5148784757094895 0.7449246481504546 0.608897787430433


DMSO_0.100_DMSO_0.025__Flagellin_1.000_Disulfiram_1.0
DMSO vs Flagellin__1.000


['DMSO_0.100_DMSO_0.025', 'Flagellin_1.000_Disulfiram_1.0']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.88      0.91      0.90    215130
           1       0.87      0.82      0.84    145552

    accuracy                           0.88    360682
   macro avg       0.87      0.87      0.87    360682
weighted avg       0.88      0.88      0.88    360682



0.8190680993734198 0.8660941961074908 0.8419249932027076


LPS_Nigericin_100.000_1.0_DMSO_0.025__DMSO_0.100_DMSO_0.025
LPS vs DMSO__0.100


['LPS_Nigericin_100.000_1.0_DMSO_0.025', 'DMSO_0.100_DMSO_0.025']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.75      0.91      0.83    215130
           1       0.85      0.63      0.72    170659

    accuracy                           0.79    385789
   macro avg       0.80      0.77      0.77    385789
weighted avg       0.80      0.79      0.78    385789



0.6255163806186606 0.8527583837931971 0.7216714383739914


LPS_Nigericin_100.000_3.0_DMSO_0.025__DMSO_0.100_DMSO_0.025
LPS vs DMSO__0.100


['LPS_Nigericin_100.000_3.0_DMSO_0.025', 'DMSO_0.100_DMSO_0.025']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.76      0.91      0.83    215130
           1       0.87      0.67      0.76    184854

    accuracy                           0.80    399984
   macro avg       0.82      0.79      0.80    399984
weighted avg       0.81      0.80      0.80    399984



0.6729851666720763 0.87095690162144 0.7592785864689188


DMSO_0.100_DMSO_0.025__LPS_Nigericin_100.000_10.0_DMSO_0.025
DMSO vs LPS__Nigericin


['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_100.000_10.0_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.70      0.91      0.80    215130
           1       0.84      0.53      0.65    176090

    accuracy                           0.74    391220
   macro avg       0.77      0.72      0.72    391220
weighted avg       0.76      0.74      0.73    391220



0.5308648986313816 0.8352991636285653 0.6491621585961208


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_1.0_DMSO_0.025
DMSO vs LPS__Nigericin


['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_1.0_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.64      0.91      0.75    215130
           1       0.75      0.33      0.46    166013

    accuracy                           0.66    381143
   macro avg       0.69      0.62      0.60    381143
weighted avg       0.69      0.66      0.62    381143



0.3289621897080349 0.747658945293248 0.4568952174585978


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_3.0_DMSO_0.025
DMSO vs LPS__Nigericin


['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_3.0_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.62      0.91      0.74    215130
           1       0.75      0.32      0.45    174261

    accuracy                           0.65    389391
   macro avg       0.69      0.62      0.60    389391
weighted avg       0.68      0.65      0.61    389391



0.31963548929479346 0.7513624345761615 0.4484828477453068


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_10.0_DMSO_0.025
DMSO vs LPS__Nigericin


['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_10.0_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.59      0.91      0.72    215130
           1       0.71      0.25      0.38    181166

    accuracy                           0.61    396296
   macro avg       0.65      0.58      0.55    396296
weighted avg       0.65      0.61      0.56    396296



0.25452347570736233 0.7144229428443053 0.3753301669861503


DMSO_0.100_DMSO_0.025__H2O2_100.000_Z-VAD-FMK_100.0
DMSO vs H2O2__100.000


['DMSO_0.100_DMSO_0.025', 'H2O2_100.000_Z-VAD-FMK_100.0']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.90      0.91      0.91    215130
           1       0.89      0.87      0.88    173437

    accuracy                           0.89    388567
   macro avg       0.89      0.89      0.89    388567
weighted avg       0.89      0.89      0.89    388567



0.8709444928129523 0.8912476546735424 0.880979111928917


DMSO_0.100_DMSO_0.025__H2O2_100.000_DMSO_0.025
DMSO vs H2O2__100.000


['DMSO_0.100_DMSO_0.025', 'H2O2_100.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.50      0.91      0.64    215130
           1       0.69      0.17      0.28    240536

    accuracy                           0.52    455666
   macro avg       0.60      0.54      0.46    455666
weighted avg       0.60      0.52      0.45    455666



0.1733545082648751 0.6934641609845336 0.2773709032614263


Thapsigargin_1.000_DMSO_0.025__LPS_100.000_DMSO_0.025
Thapsigargin vs LPS__100.000


['Thapsigargin_1.000_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.76      0.56      0.64    159435
           1       0.63      0.81      0.71    148456

    accuracy                           0.68    307891
   macro avg       0.69      0.68      0.68    307891
weighted avg       0.70      0.68      0.67    307891



0.8054036212749905 0.6305710985829329 0.7073442717992859


Thapsigargin_10.000_DMSO_0.025__LPS_100.000_DMSO_0.025
Thapsigargin vs LPS__100.000


['Thapsigargin_10.000_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.74      0.56      0.64    159435
           1       0.66      0.81      0.73    166984

    accuracy                           0.69    326419
   macro avg       0.70      0.69      0.68    326419
weighted avg       0.70      0.69      0.68    326419



0.8115807502515211 0.6592418191281845 0.7275221108292735


LPS_10.000_DMSO_0.025__Thapsigargin_1.000_DMSO_0.025
LPS vs Thapsigargin__1.000


['LPS_10.000_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.64      0.32      0.43    156490
           1       0.53      0.81      0.64    148456

    accuracy                           0.56    304946
   macro avg       0.58      0.56      0.53    304946
weighted avg       0.58      0.56      0.53    304946



0.8054036212749905 0.5302776299450062 0.6395051559625177


LPS_10.000_DMSO_0.025__Thapsigargin_10.000_DMSO_0.025
LPS vs Thapsigargin__10.000


['LPS_10.000_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.62      0.32      0.42    156490
           1       0.56      0.81      0.66    166984

    accuracy                           0.58    323474
   macro avg       0.59      0.57      0.54    323474
weighted avg       0.59      0.58      0.55    323474



0.8115807502515211 0.5613169644706214 0.6636387230729303


LPS_1.000_DMSO_0.025__Thapsigargin_1.000_DMSO_0.025
LPS vs Thapsigargin__1.000


['LPS_1.000_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.65      0.39      0.48    139433
           1       0.58      0.81      0.68    148456

    accuracy                           0.60    287889
   macro avg       0.62      0.60      0.58    287889
weighted avg       0.62      0.60      0.58    287889



0.8054036212749905 0.5825035076779173 0.6760545063892345


LPS_1.000_DMSO_0.025__Thapsigargin_10.000_DMSO_0.025
LPS vs Thapsigargin__10.000


['LPS_1.000_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.63      0.39      0.48    139433
           1       0.61      0.81      0.70    166984

    accuracy                           0.62    306417
   macro avg       0.62      0.60      0.59    306417
weighted avg       0.62      0.62      0.60    306417



0.8115807502515211 0.6126128976846369 0.6981983606472919


LPS_0.100_DMSO_0.025__Thapsigargin_1.000_DMSO_0.025
LPS vs Thapsigargin__1.000


['LPS_0.100_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.70      0.48      0.57    138597
           1       0.62      0.81      0.70    148456

    accuracy                           0.65    287053
   macro avg       0.66      0.64      0.64    287053
weighted avg       0.66      0.65      0.64    287053



0.8054036212749905 0.623244685841769 0.7027111212981412


LPS_0.100_DMSO_0.025__Thapsigargin_10.000_DMSO_0.025
LPS vs Thapsigargin__10.000


['LPS_0.100_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.68      0.48      0.56    138597
           1       0.65      0.81      0.72    166984

    accuracy                           0.66    305581
   macro avg       0.67      0.65      0.64    305581
weighted avg       0.66      0.66      0.65    305581



0.8115807502515211 0.6521703561116459 0.7231952271174864


LPS_0.010_DMSO_0.025__Thapsigargin_1.000_DMSO_0.025
LPS vs Thapsigargin__1.000


['LPS_0.010_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.75      0.59      0.66    143080
           1       0.67      0.81      0.73    148456

    accuracy                           0.70    291536
   macro avg       0.71      0.70      0.70    291536
weighted avg       0.71      0.70      0.70    291536



0.8054036212749905 0.6714644323267946 0.7323604624454482


LPS_0.010_DMSO_0.025__Thapsigargin_10.000_DMSO_0.025
LPS vs Thapsigargin__10.000


['LPS_0.010_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.73      0.59      0.65    143080
           1       0.70      0.81      0.75    166984

    accuracy                           0.71    310064
   macro avg       0.71      0.70      0.70    310064
weighted avg       0.71      0.71      0.71    310064



0.8115807502515211 0.6984790462986347 0.7507943059275859


In [10]:
model_stats_df

Unnamed: 0,treatments_tested,model,group,shuffled_data,PR_Threshold,Precision,Recall
0,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425654,1.000000
1,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425716,0.999586
2,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425715,0.999580
3,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425716,0.999580
4,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425714,0.999573
...,...,...,...,...,...,...,...
278227,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.025_vs_Thapsigargin_1,test,False,1.0,0.772727,0.000102
278228,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.025_vs_Thapsigargin_1,test,False,1.0,0.761905,0.000096
278229,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.025_vs_Thapsigargin_1,test,False,1.0,0.750000,0.000090
278230,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.025_vs_Thapsigargin_1,test,False,1.0,0.764706,0.000078


In [11]:
# set path for the model training metrics
metrics_path = pathlib.Path(
    f"../../results/{mlp_params.MODEL_TYPE}/{mlp_params.MODEL_NAME}/{mlp_params.CELL_TYPE}"
)
metrics_path.mkdir(parents=True, exist_ok=True)
# check if the model training metrics file exists
metrics_file = pathlib.Path(f"{metrics_path}/testing_metrics.csv")
if metrics_file.exists():
    metrics_df = pd.read_csv(metrics_file)
    if len(metrics_df["shuffled_data"].unique()) > 1:
        pass
    elif metrics_df["shuffled_data"].unique() == mlp_params.SHUFFLE:
        pass
    else:
        metrics_df = pd.concat([metrics_df, model_stats_df], axis=0)
        metrics_df.to_csv(metrics_file, index=False)
else:
    model_stats_df.to_csv(metrics_file, index=False)