In [1]:
import argparse
import ast
import pathlib
import sys

import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import toml
import torch
from sklearn import preprocessing

sys.path.append("../..")
from MLP_utils.parameters import Parameters
from MLP_utils.utils import (
    Dataset_formatter,
    optimized_model_create,
    output_stats,
    parameter_set,
    results_output,
    test_optimized_model,
    un_nest,
)
from sklearn.metrics import (
    accuracy_score,
    auc,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)

sys.path.append("../../..")

In [None]:
# set up the argument parser
argparser = argparse.ArgumentParser(description="MLP binary classification testing")

argparser.add_argument(
    "--cell_type",
    type=str,
    default="cells",
    help="The type of data to be used.",
)   
argparser.add_argument(
    "--control_name",
    type=str,
    default="control",
    help="The name of the control condition.",
)
argparser.add_argument(
    "--treatment_name",
    type=str,
    default="treatment",
    help="The name of the treatment condition.",
)
argparser.add_argument(
    "--shuffle",
    type=str
    default="False",
    help="Whether to shuffle the data before training.",
)

# get the arguments
args = argparser.parse_args()

CELL_TYPE = args.cell_type
CONTROL_NAME = args.control_name
TREATMENT_NAME = args.treatment_name
SHUFFLE = ast.literal_eval(args.shuffle)
print(f"CELL_TYPE: {CELL_TYPE} CONTROL_NAME: {CONTROL_NAME} TREATMENT_NAME: {TREATMENT_NAME} SHUFFLE: {SHUFFLE}")

In [3]:
MODEL_NAME = CONTROL_NAME + "_vs_" + TREATMENT_NAME

In [4]:
ml_configs_file = pathlib.Path("../../MLP_utils/binary_config.toml").resolve(
    strict=True
)
ml_configs = toml.load(ml_configs_file)
params = Parameters()
mlp_params = parameter_set(params, ml_configs)

# overwrite mlp_params via command line arguments from papermill
mlp_params.CELL_TYPE = CELL_TYPE
mlp_params.MODEL_NAME = MODEL_NAME
mlp_params.CONTROL_NAME = CONTROL_NAME
mlp_params.TREATMENT_NAME = TREATMENT_NAME
mlp_params.MODEL_NAME = MODEL_NAME
mlp_params.SHUFFLE = SHUFFLE

In [5]:
# Import Data
# set data file path under pathlib path for multi-system use
file_path = pathlib.Path(
    f"../../../data/{mlp_params.CELL_TYPE}_preprocessed_sc_norm.parquet"
).resolve(strict=True)

df = pq.read_table(file_path).to_pandas()

In [6]:
def test_loop(df, output_name, title, mlp_params):
    # Code snippet for metadata extraction by Jenna Tomkinson
    df_metadata = list(df.columns[df.columns.str.startswith("Metadata")])

    # define which columns are data and which are descriptive
    df_descriptive = df[df_metadata]
    df_values = df.drop(columns=df_metadata)
    # Creating label encoder
    le = preprocessing.LabelEncoder()
    # Converting strings into numbers
    print(df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())
    lst_of_treatments = (
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist()
    )

    df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = le.fit_transform(
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
    )
    print(df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())
    lst_of_coded_treatments = (
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist()
    )
    # make a dictionary of the treatments and their corresponding codes to decode later
    dict_of_treatments = {}
    for i, j in zip(
        lst_of_coded_treatments,
        lst_of_treatments,
    ):
        dict_of_treatments[i] = j
    # split into X and Y where Y are the predictive column and x are the observable data
    df_values_X = df_values.drop(
        [
            "oneb_Metadata_Treatment_Dose_Inhibitor_Dose",
            "twob_Metadata_Treatment_Dose_Inhibitor_Dose",
            "threeb_Metadata_Treatment_Dose_Inhibitor_Dose",
            "fourb_Metadata_Treatment_Dose_Inhibitor_Dose",
        ],
        axis=1,
    )
    df_values_Y = df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
    test_data = Dataset_formatter(
        torch.FloatTensor(df_values_X.values), torch.FloatTensor(df_values_Y.values)
    )

    mlp_params.IN_FEATURES = df_values_X.shape[1]
    print("Number of in features: ", mlp_params.IN_FEATURES)
    if mlp_params.MODEL_TYPE == "Regression":
        mlp_params.OUT_FEATURES = 1
    else:
        mlp_params.OUT_FEATURES = len(
            df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique()
        )

    print("Number of out features: ", mlp_params.OUT_FEATURES)

    if mlp_params.OUT_FEATURES > 2:
        mlp_params.MODEL_TYPE = "Multi_Class"
    elif mlp_params.OUT_FEATURES == 2:
        mlp_params.OUT_FEATURES = mlp_params.OUT_FEATURES - 1
        mlp_params.MODEL_TYPE = "Binary_Classification"
    elif mlp_params.OUT_FEATURES == 1:
        mlp_params.MODEL_TYPE = "Regression"
    else:
        pass
    # convert data class into a dataloader to be compatible with pytorch
    test_loader = torch.utils.data.DataLoader(
        dataset=test_data, batch_size=1, shuffle=mlp_params.SHUFFLE
    )
    model, _ = optimized_model_create(mlp_params, mlp_params.MODEL_NAME)
    # calling the testing function and outputting list values of tested model
    if mlp_params.MODEL_TYPE == "Multi_Class" or mlp_params.MODEL_TYPE == "Regression":
        y_pred_list = test_optimized_model(
            model,
            test_loader,
            mlp_params,
            model_name=mlp_params.MODEL_NAME,
            shuffle=mlp_params.SHUFFLE,
        )
    elif mlp_params.MODEL_TYPE == "Binary_Classification":
        y_pred_list, y_pred_prob_list = test_optimized_model(
            model,
            test_loader,
            mlp_params,
            model_name=mlp_params.MODEL_NAME,
            shuffle=mlp_params.SHUFFLE,
        )
    else:
        raise Exception("Model type must be specified for proper model testing")

    # un-nest list if nested i.e. length of input data does not match length of output data
    if len(y_pred_list) != len(df_values_Y):
        y_pred_list = un_nest(y_pred_list)
        y_pred_prob_list = un_nest(y_pred_prob_list)
    else:
        pass

    stats, recall, precision, f1, precision_, recall_, threshold_ = output_stats(
        y_pred_list,
        df_values_Y,
        mlp_params,
        y_pred_prob_list,
        test_name=f"{output_name}_all_testing",
        model_name=mlp_params.MODEL_NAME,
        title=title,
        shuffle=mlp_params.SHUFFLE,
    )
    return (
        stats,
        recall,
        precision,
        f1,
        precision_,
        recall_,
        threshold_,
        dict_of_treatments,
    )

In [7]:
print(df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())

['media_ctr_0_Media_ctr_0.0', 'DMSO_0.100_DMSO_1.0', 'DMSO_0.100_Z-VAD-FMK_100.0', 'DMSO_0.100_Z-VAD-FMK_30.0', 'DMSO_0.100_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025', 'Topotecan_5.000_DMSO_0.025', 'Topotecan_10.000_DMSO_0.025', 'Topotecan_20.000_DMSO_0.025', 'LPS_0.010_DMSO_0.025', 'LPS_0.100_DMSO_0.025', 'LPS_1.000_DMSO_0.025', 'LPS_10.000_DMSO_0.025', 'LPS_10.000_Disulfiram_0.1', 'LPS_10.000_Disulfiram_1.0', 'LPS_10.000_Disulfiram_2.5', 'LPS_Nigericin_100.000_1.0_DMSO_0.025', 'LPS_Nigericin_100.000_3.0_DMSO_0.025', 'LPS_Nigericin_100.000_10.0_DMSO_0.025', 'Disulfiram_0.100_DMSO_0.025', 'Disulfiram_1.000_DMSO_0.025', 'Disulfiram_2.500_DMSO_0.025', 'H2O2_100.000_DMSO_0.025', 'LPS_10.000_Z-VAD-FMK_100.0', 'LPS_100.000_DMSO_0.025', 'LPS_Nigericin_1.000_1.0_DMSO_0.025', 'LPS_Nigericin_1.000_3.0_DMSO_0.025', 'LPS_Nigericin_1.000_10.0_DMSO_0.025', 'LPS_Nigericin_1.000_10.0_Disulfiram_1.0', 'LPS_Nigericin_1.000_10.0_Z-VAD-FMK_100.0', 'H2O2_100.000_Disulfi

In [8]:
paired_treatment_list = [
    ["DMSO_0.100_DMSO_0.025", "LPS_100.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_0.100_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_10.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_100.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_0.100_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_1.000_Disulfiram_1.0"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_1.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_3.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_10.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_1.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_3.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_10.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "H2O2_100.000_Z-VAD-FMK_100.0"],
    ["DMSO_0.100_DMSO_0.025", "H2O2_100.000_DMSO_0.025"],
    ["LPS_100.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_100.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_10.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_10.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_1.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_1.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_0.100_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_0.100_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_0.010_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_0.010_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
]

In [9]:
# create a dataframe to store the model stats
model_stats_df = pd.DataFrame(
    columns=[
        "treatments_tested",
        "model",
        "group",
        "shuffled_data",
        "PR_Threshold",
        "Precision",
        "Recall",
    ]
)
model_stats_df

Unnamed: 0,treatments_tested,model,group,shuffled_data,PR_Threshold,Precision,Recall


In [10]:
for i in paired_treatment_list:
    # filter df to only include the two treatments to test
    test_df = df.query(
        f"oneb_Metadata_Treatment_Dose_Inhibitor_Dose == '{i[0]}' | oneb_Metadata_Treatment_Dose_Inhibitor_Dose == '{i[1]}'"
    )
    output_name = ("__").join(
        test_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique()
    )

    print(output_name)

    title = f'{output_name.split("__")[0].split("_")[0]} vs {("__").join(output_name.split("__")[1].split("_")[:2])}'
    print(title)
    (
        stats,
        recall,
        precision,
        f1,
        precision_,
        recall_,
        threshold_,
        dict_of_treatments,
    ) = test_loop(test_df, output_name, title, mlp_params)
    print(recall, precision, f1)

    threshold_ = np.append(threshold_, None)
    stats_df = pd.DataFrame(
        {
            "PR_Threshold": threshold_,
            "Precision": precision_,
            "Recall": recall_,
        }
    )

    stats_df["treatments_tested"] = "0 vs 1"
    # make it so that the second treatment is always the one that is being tested as the positive label
    stats_df["treatments_tested"] = stats_df["treatments_tested"].replace(
        "0 vs 1", f"{dict_of_treatments[0]} vs {dict_of_treatments[1]}"
    )
    stats_df["model"] = mlp_params.MODEL_NAME
    stats_df["group"] = "test"
    stats_df["shuffled_data"] = mlp_params.SHUFFLE
    stats_df
    model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)

DMSO_0.100_DMSO_0.025__LPS_100.000_DMSO_0.025
DMSO vs LPS__100.000
['DMSO_0.100_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.69      0.69      0.69     35643
           1       0.31      0.31      0.31     15987

    accuracy                           0.57     51630
   macro avg       0.50      0.50      0.50     51630
weighted avg       0.57      0.57      0.57     51630

0.3102520798148496 0.3090342679127726 0.3096419764647127
DMSO_0.100_DMSO_0.025__Thapsigargin_1.000_DMSO_0.025
DMSO vs Thapsigargin__1.000
['DMSO_0.100_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shu

  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__Thapsigargin_10.000_DMSO_0.025
DMSO vs Thapsigargin__10.000
['DMSO_0.100_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.73      0.97      0.83     35643
           1       0.26      0.03      0.05     13212

    accuracy                           0.72     48855
   macro avg       0.50      0.50      0.44     48855
weighted avg       0.60      0.72      0.62     48855

0.02512867090523766 0.2637013502779984 0.045884873194665186


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_0.100_DMSO_0.025
DMSO vs LPS__0.100
['DMSO_0.100_DMSO_0.025', 'LPS_0.100_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.67      0.97      0.79     35643
           1       0.33      0.03      0.06     17510

    accuracy                           0.66     53153
   macro avg       0.50      0.50      0.43     53153
weighted avg       0.56      0.66      0.55     53153

0.03409480296973158 0.3325905292479109 0.06184926184926185


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_1.000_DMSO_0.025
DMSO vs LPS__1.000
['DMSO_0.100_DMSO_0.025', 'LPS_1.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.68      0.97      0.80     35643
           1       0.32      0.03      0.06     16458

    accuracy                           0.67     52101
   macro avg       0.50      0.50      0.43     52101
weighted avg       0.57      0.67      0.57     52101

0.03147405517073763 0.315468940316687 0.05723756906077348


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_10.000_DMSO_0.025
DMSO vs LPS__10.000
['DMSO_0.100_DMSO_0.025', 'LPS_10.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.68      0.97      0.80     35643
           1       0.32      0.03      0.06     16810

    accuracy                           0.67     52453
   macro avg       0.50      0.50      0.43     52453
weighted avg       0.56      0.67      0.56     52453

0.03271861986912552 0.31554790590935167 0.05928960275966151


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_100.000_DMSO_0.025
DMSO vs LPS__100.000
['DMSO_0.100_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.69      0.69      0.69     35643
           1       0.31      0.31      0.31     15987

    accuracy                           0.57     51630
   macro avg       0.50      0.50      0.50     51630
weighted avg       0.57      0.57      0.57     51630

0.31144054544317257 0.31021806853582556 0.3108281050035896


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__Flagellin_0.100_DMSO_0.025
DMSO vs Flagellin__0.100
['DMSO_0.100_DMSO_0.025', 'Flagellin_0.100_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.70      0.97      0.82     35643
           1       0.28      0.03      0.05     14928

    accuracy                           0.69     50571
   macro avg       0.49      0.50      0.43     50571
weighted avg       0.58      0.69      0.59     50571

0.029139871382636656 0.283203125 0.05284256559766764


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__Flagellin_1.000_DMSO_0.025
DMSO vs Flagellin__1.000
['DMSO_0.100_DMSO_0.025', 'Flagellin_1.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.69      0.96      0.80     35643
           1       0.30      0.04      0.07     15809

    accuracy                           0.68     51452
   macro avg       0.50      0.50      0.44     51452
weighted avg       0.57      0.68      0.58     51452

0.03966095262192422 0.3045167557066537 0.07018132975151108


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__Flagellin_1.000_Disulfiram_1.0
DMSO vs Flagellin__1.000
['DMSO_0.100_DMSO_0.025', 'Flagellin_1.000_Disulfiram_1.0']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.73      0.96      0.83     35643
           1       0.27      0.04      0.07     12885

    accuracy                           0.72     48528
   macro avg       0.50      0.50      0.45     48528
weighted avg       0.61      0.72      0.63     48528

0.037252619324796274 0.269209197980931 0.06544859558221978


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_Nigericin_100.000_1.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_100.000_1.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.71      0.78      0.74     35643
           1       0.29      0.22      0.25     14690

    accuracy                           0.61     50333
   macro avg       0.50      0.50      0.49     50333
weighted avg       0.58      0.61      0.60     50333

0.21688223281143634 0.28630481667864843 0.24680455496165465


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_Nigericin_100.000_3.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_100.000_3.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.68      0.71      0.69     35643
           1       0.32      0.29      0.31     16726

    accuracy                           0.58     52369
   macro avg       0.50      0.50      0.50     52369
weighted avg       0.57      0.58      0.57     52369

0.2920004782972617 0.3192574192704929 0.3050212340744442


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_Nigericin_100.000_10.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_100.000_10.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.72      0.71      0.72     35643
           1       0.27      0.28      0.27     13677

    accuracy                           0.59     49320
   macro avg       0.50      0.50      0.50     49320
weighted avg       0.60      0.59      0.59     49320

0.2776193609709732 0.27160228898426325 0.2745778645550855


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_1.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_1.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.69      0.92      0.79     35643
           1       0.30      0.08      0.13     16218

    accuracy                           0.66     51861
   macro avg       0.50      0.50      0.46     51861
weighted avg       0.57      0.66      0.58     51861

0.0797878899987668 0.3046856604662114 0.1264598094307354


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_3.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_3.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.72      0.79      0.76     35643
           1       0.28      0.21      0.24     13652

    accuracy                           0.63     49295
   macro avg       0.50      0.50      0.50     49295
weighted avg       0.60      0.63      0.61     49295

0.20531790213888074 0.2752356637863315 0.23519046819936232


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_10.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_10.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.71      0.84      0.77     35643
           1       0.28      0.15      0.20     14402

    accuracy                           0.64     50045
   macro avg       0.49      0.50      0.48     50045
weighted avg       0.59      0.64      0.61     50045

0.1519927787807249 0.27913797500637594 0.19681711922316128


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__H2O2_100.000_Z-VAD-FMK_100.0
DMSO vs H2O2__100.000
['DMSO_0.100_DMSO_0.025', 'H2O2_100.000_Z-VAD-FMK_100.0']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.71      0.98      0.82     35643
           1       0.31      0.02      0.04     14747

    accuracy                           0.70     50390
   macro avg       0.51      0.50      0.43     50390
weighted avg       0.59      0.70      0.59     50390

0.021360276666440633 0.3073170731707317 0.039944204920111595


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


DMSO_0.100_DMSO_0.025__H2O2_100.000_DMSO_0.025
DMSO vs H2O2__100.000
['DMSO_0.100_DMSO_0.025', 'H2O2_100.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.57      0.95      0.71     35643
           1       0.42      0.04      0.08     26900

    accuracy                           0.56     62543
   macro avg       0.49      0.50      0.40     62543
weighted avg       0.51      0.56      0.44     62543

0.04464684014869889 0.42051820728291317 0.0807232154859524


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


Thapsigargin_1.000_DMSO_0.025__LPS_100.000_DMSO_0.025
Thapsigargin vs LPS__100.000
['Thapsigargin_1.000_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.53      0.47      0.50     15987
           1       0.46      0.53      0.49     13766

    accuracy                           0.49     29753
   macro avg       0.50      0.50      0.49     29753
weighted avg       0.50      0.49      0.49     29753

0.5270957431352608 0.45953134895503484 0.49100013533631076
Thapsigargin_10.000_DMSO_0.025__LPS_100.000_DMSO_0.025
Thapsigargin vs LPS__100.000
['Thapsigargin_10.000_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[1, 0]


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.55      0.46      0.50     15987
           1       0.45      0.54      0.49     13212

    accuracy                           0.50     29199
   macro avg       0.50      0.50      0.50     29199
weighted avg       0.51      0.50      0.50     29199

0.5389040266424463 0.45387900809587556 0.4927506142081041
Thapsigargin_1.000_DMSO_0.025__LPS_10.000_DMSO_0.025
Thapsigargin vs LPS__10.000
['Thapsigargin_1.000_DMSO_0.025', 'LPS_10.000_DMSO_0.025']
[1, 0]


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.55      0.95      0.70     16810
           1       0.45      0.05      0.09     13766

    accuracy                           0.54     30576
   macro avg       0.50      0.50      0.39     30576
weighted avg       0.50      0.54      0.42     30576

0.048452709574313524 0.449763991908294 0.08748114630467571
Thapsigargin_10.000_DMSO_0.025__LPS_10.000_DMSO_0.025
Thapsigargin vs LPS__10.000
['Thapsigargin_10.000_DMSO_0.025', 'LPS_10.000_DMSO_0.025']
[1, 0]


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.56      0.95      0.71     16810
           1       0.45      0.05      0.08     13212

    accuracy                           0.56     30022
   macro avg       0.50      0.50      0.40     30022
weighted avg       0.51      0.56      0.43     30022

0.04692703602785347 0.4492753623188406 0.0849780701754386
Thapsigargin_1.000_DMSO_0.025__LPS_1.000_DMSO_0.025
Thapsigargin vs LPS__1.000
['Thapsigargin_1.000_DMSO_0.025', 'LPS_1.000_DMSO_0.025']
[1, 0]


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.54      0.95      0.69     16458
           1       0.44      0.04      0.08     13766

    accuracy                           0.54     30224
   macro avg       0.49      0.50      0.39     30224
weighted avg       0.49      0.54      0.41     30224

0.04373093127996513 0.4356005788712012 0.07948243992606284
Thapsigargin_10.000_DMSO_0.025__LPS_1.000_DMSO_0.025
Thapsigargin vs LPS__1.000


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


['Thapsigargin_10.000_DMSO_0.025', 'LPS_1.000_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.55      0.96      0.70     16458
           1       0.43      0.04      0.08     13212

    accuracy                           0.55     29670
   macro avg       0.49      0.50      0.39     29670
weighted avg       0.50      0.55      0.42     29670

0.04178019981834696 0.43158717748240816 0.07618521841142778
Thapsigargin_1.000_DMSO_0.025__LPS_0.100_DMSO_0.025
Thapsigargin vs LPS__0.100
['Thapsigargin_1.000_DMSO_0.025', 'LPS_0.100_DMSO_0.025']
[1, 0]


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.56      0.95      0.70     17510
           1       0.43      0.05      0.09     13766

    accuracy                           0.55     31276
   macro avg       0.50      0.50      0.40     31276
weighted avg       0.50      0.55      0.43     31276

0.048162138602353625 0.4319218241042345 0.08666100254885302
Thapsigargin_10.000_DMSO_0.025__LPS_0.100_DMSO_0.025
Thapsigargin vs LPS__0.100
['Thapsigargin_10.000_DMSO_0.025', 'LPS_0.100_DMSO_0.025']


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.57      0.95      0.71     17510
           1       0.45      0.05      0.09     13212

    accuracy                           0.57     30722
   macro avg       0.51      0.50      0.40     30722
weighted avg       0.52      0.57      0.45     30722

0.04874356645473812 0.44972067039106145 0.08795411089866156
Thapsigargin_1.000_DMSO_0.025__LPS_0.010_DMSO_0.025
Thapsigargin vs LPS__0.010
['Thapsigargin_1.000_DMSO_0.025', 'LPS_0.010_DMSO_0.025']
[1, 0]


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.54      0.97      0.69     15859
           1       0.49      0.03      0.06     13766

    accuracy                           0.53     29625
   macro avg       0.51      0.50      0.38     29625
weighted avg       0.51      0.53      0.40     29625

0.03174487868661921 0.48936170212765956 0.05962207517566001
Thapsigargin_10.000_DMSO_0.025__LPS_0.010_DMSO_0.025
Thapsigargin vs LPS__0.010
['Thapsigargin_10.000_DMSO_0.025', 'LPS_0.010_DMSO_0.025']
[1, 0]


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
../../trained_models/model_save_states/Binary_Classification/SHSY5Y
DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025_shuffle
              precision    recall  f1-score   support

           0       0.55      0.97      0.70     15859
           1       0.48      0.03      0.05     13212

    accuracy                           0.54     29071
   macro avg       0.51      0.50      0.38     29071
weighted avg       0.51      0.54      0.41     29071

0.028458976687859523 0.4759493670886076 0.05370661334095129


  model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)


In [11]:
model_stats_df

Unnamed: 0,treatments_tested,model,group,shuffled_data,PR_Threshold,Precision,Recall
0,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.0,0.309646,1.000000
1,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.0,0.309655,0.999875
2,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.0,0.309661,0.999875
3,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.0,0.309647,0.999812
4,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.0,0.309653,0.999812
...,...,...,...,...,...,...,...
28440,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.827824,0.467227,0.021041
28441,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.830851,0.468013,0.021041
28442,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.831917,0.468803,0.021041
28443,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.100_DMSO_0.025_vs_LPS_100.000_DMSO_0.025,test,True,0.835869,0.469595,0.021041


In [12]:
# set path for the model training metrics
metrics_path = pathlib.Path(
    f"../../results/{mlp_params.MODEL_TYPE}/{mlp_params.MODEL_NAME}/{mlp_params.CELL_TYPE}"
)
metrics_path.mkdir(parents=True, exist_ok=True)
# check if the model training metrics file exists
metrics_file = pathlib.Path(f"{metrics_path}/testing_metrics.csv")
if metrics_file.exists():
    metrics_df = pd.read_csv(metrics_file)
    if len(metrics_df["shuffled_data"].unique()) > 1:
        pass
    elif metrics_df["shuffled_data"].unique() == mlp_params.SHUFFLE:
        pass
    else:
        metrics_df = pd.concat([metrics_df, model_stats_df], axis=0)
        metrics_df.to_csv(metrics_file, index=False)
else:
    model_stats_df.to_csv(metrics_file, index=False)