In [1]:
import pathlib
import sys

import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import toml
import torch
from sklearn import preprocessing

sys.path.append("../..")
from MLP_utils.parameters import Parameters
from MLP_utils.utils import (
    Dataset_formatter,
    optimized_model_create,
    output_stats,
    parameter_set,
    results_output,
    test_optimized_model,
    un_nest,
)
from sklearn.metrics import (
    accuracy_score,
    auc,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)

sys.path.append("../../..")

In [2]:
# Parameters
CELL_TYPE = "SHSY5Y"
CONTROL_NAME = "DMSO_0.100_DMSO_0.025"
TREATMENT_NAME = "LPS_Nigericin_1.000_10.0_DMSO_0.025"
SHUFFLE = True

In [3]:
MODEL_NAME = CONTROL_NAME + "_vs_" + TREATMENT_NAME

In [4]:
ml_configs_file = pathlib.Path("../../MLP_utils/binary_config.toml").resolve(
    strict=True
)
ml_configs = toml.load(ml_configs_file)
params = Parameters()
mlp_params = parameter_set(params, ml_configs)

# overwrite mlp_params via command line arguments from papermill
mlp_params.CELL_TYPE = CELL_TYPE
mlp_params.MODEL_NAME = MODEL_NAME
mlp_params.CONTROL_NAME = CONTROL_NAME
mlp_params.TREATMENT_NAME = TREATMENT_NAME
mlp_params.MODEL_NAME = MODEL_NAME
mlp_params.SHUFFLE = SHUFFLE

In [5]:
# Import Data
# set data file path under pathlib path for multi-system use
file_path = pathlib.Path(
    f"../../../data/{mlp_params.CELL_TYPE}_preprocessed_sc_norm.parquet"
).resolve(strict=True)

df = pq.read_table(file_path).to_pandas()

In [6]:
def test_loop(df, output_name, title):
    # Code snippet for metadata extraction by Jenna Tomkinson
    df_metadata = list(df.columns[df.columns.str.startswith("Metadata")])

    # define which columns are data and which are descriptive
    df_descriptive = df[df_metadata]
    df_values = df.drop(columns=df_metadata)
    # Creating label encoder
    le = preprocessing.LabelEncoder()
    # Converting strings into numbers
    print(df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())
    lst_of_treatments = (
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist()
    )

    df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = le.fit_transform(
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
    )
    print(df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())
    lst_of_coded_treatments = (
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist()
    )
    # make a dictionary of the treatments and their corresponding codes to decode later
    dict_of_treatments = {}
    for i, j in zip(
        lst_of_coded_treatments,
        lst_of_treatments,
    ):
        dict_of_treatments[i] = j
    # split into X and Y where Y are the predictive column and x are the observable data
    df_values_X = df_values.drop(
        [
            "oneb_Metadata_Treatment_Dose_Inhibitor_Dose",
            "twob_Metadata_Treatment_Dose_Inhibitor_Dose",
            "threeb_Metadata_Treatment_Dose_Inhibitor_Dose",
            "fourb_Metadata_Treatment_Dose_Inhibitor_Dose",
        ],
        axis=1,
    )
    df_values_Y = df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
    test_data = Dataset_formatter(
        torch.FloatTensor(df_values_X.values), torch.FloatTensor(df_values_Y.values)
    )

    mlp_params.IN_FEATURES = df_values_X.shape[1]
    print("Number of in features: ", mlp_params.IN_FEATURES)
    if mlp_params.MODEL_TYPE == "Regression":
        mlp_params.OUT_FEATURES = 1
    else:
        mlp_params.OUT_FEATURES = len(
            df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique()
        )

    print("Number of out features: ", mlp_params.OUT_FEATURES)

    if mlp_params.OUT_FEATURES > 2:
        mlp_params.MODEL_TYPE = "Multi_Class"
    elif mlp_params.OUT_FEATURES == 2:
        mlp_params.OUT_FEATURES = mlp_params.OUT_FEATURES - 1
        mlp_params.MODEL_TYPE = "Binary_Classification"
    elif mlp_params.OUT_FEATURES == 1:
        mlp_params.MODEL_TYPE = "Regression"
    else:
        pass
    # convert data class into a dataloader to be compatible with pytorch
    test_loader = torch.utils.data.DataLoader(
        dataset=test_data, batch_size=1, shuffle=mlp_params.SHUFFLE
    )
    model, _ = optimized_model_create(mlp_params, mlp_params.MODEL_NAME)
    # calling the testing function and outputting list values of tested model
    if mlp_params.MODEL_TYPE == "Multi_Class" or mlp_params.MODEL_TYPE == "Regression":
        y_pred_list = test_optimized_model(
            model,
            test_loader,
            mlp_params,
            model_name=mlp_params.MODEL_NAME,
            shuffle=mlp_params.SHUFFLE,
        )
    elif mlp_params.MODEL_TYPE == "Binary_Classification":
        y_pred_list, y_pred_prob_list = test_optimized_model(
            model,
            test_loader,
            mlp_params,
            model_name=mlp_params.MODEL_NAME,
            shuffle=mlp_params.SHUFFLE,
        )
    else:
        raise Exception("Model type must be specified for proper model testing")

    # un-nest list if nested i.e. length of input data does not match length of output data
    if len(y_pred_list) != len(df_values_Y):
        y_pred_list = un_nest(y_pred_list)
        y_pred_prob_list = un_nest(y_pred_prob_list)
    else:
        pass

    stats, recall, precision, f1, precision_, recall_, threshold_ = output_stats(
        y_pred_list,
        df_values_Y,
        mlp_params,
        y_pred_prob_list,
        test_name=f"{output_name}_all_testing",
        model_name=mlp_params.MODEL_NAME,
        title=title,
        shuffle=mlp_params.SHUFFLE,
    )
    return (
        stats,
        recall,
        precision,
        f1,
        precision_,
        recall_,
        threshold_,
        dict_of_treatments,
    )

In [7]:
print(df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())

['media_ctr_0_Media_ctr_0.0', 'DMSO_0.100_DMSO_1.0', 'DMSO_0.100_Z-VAD-FMK_100.0', 'DMSO_0.100_Z-VAD-FMK_30.0', 'DMSO_0.100_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025', 'Topotecan_5.000_DMSO_0.025', 'Topotecan_10.000_DMSO_0.025', 'Topotecan_20.000_DMSO_0.025', 'LPS_0.010_DMSO_0.025', 'LPS_0.100_DMSO_0.025', 'LPS_1.000_DMSO_0.025', 'LPS_10.000_DMSO_0.025', 'LPS_10.000_Disulfiram_0.1', 'LPS_10.000_Disulfiram_1.0', 'LPS_10.000_Disulfiram_2.5', 'LPS_Nigericin_100.000_1.0_DMSO_0.025', 'LPS_Nigericin_100.000_3.0_DMSO_0.025', 'LPS_Nigericin_100.000_10.0_DMSO_0.025', 'Disulfiram_0.100_DMSO_0.025', 'Disulfiram_1.000_DMSO_0.025', 'Disulfiram_2.500_DMSO_0.025', 'H2O2_100.000_DMSO_0.025', 'LPS_10.000_Z-VAD-FMK_100.0', 'LPS_100.000_DMSO_0.025', 'LPS_Nigericin_1.000_1.0_DMSO_0.025', 'LPS_Nigericin_1.000_3.0_DMSO_0.025', 'LPS_Nigericin_1.000_10.0_DMSO_0.025', 'LPS_Nigericin_1.000_10.0_Disulfiram_1.0', 'LPS_Nigericin_1.000_10.0_Z-VAD-FMK_100.0', 'H2O2_100.000_Disulfi

In [8]:
paired_treatment_list = [
    ["DMSO_0.100_DMSO_0.025", "LPS_100.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_0.100_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_10.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_100.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_0.100_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_1.000_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "Flagellin_1.000_Disulfiram_1.0"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_1.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_3.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_100.000_10.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_1.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_3.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "LPS_Nigericin_1.000_10.0_DMSO_0.025"],
    ["DMSO_0.100_DMSO_0.025", "H2O2_100.000_Z-VAD-FMK_100.0"],
    ["DMSO_0.100_DMSO_0.025", "H2O2_100.000_DMSO_0.025"],
    ["LPS_100.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_100.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_10.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_10.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_1.000_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_1.000_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_0.100_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_0.100_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
    ["LPS_0.010_DMSO_0.025", "Thapsigargin_1.000_DMSO_0.025"],
    ["LPS_0.010_DMSO_0.025", "Thapsigargin_10.000_DMSO_0.025"],
]

In [9]:
# create a dataframe to store the model stats
model_stats_df = pd.DataFrame(
    columns=[
        "treatments_tested",
        "model",
        "group",
        "shuffled_data",
        "PR_Threshold",
        "Precision",
        "Recall",
    ]
)
model_stats_df

Unnamed: 0,treatments_tested,model,group,shuffled_data,PR_Threshold,Precision,Recall


In [10]:
for i in paired_treatment_list:
    # filter df to only include the two treatments to test
    test_df = df.query(
        f"oneb_Metadata_Treatment_Dose_Inhibitor_Dose == '{i[0]}' | oneb_Metadata_Treatment_Dose_Inhibitor_Dose == '{i[1]}'"
    )
    output_name = ("__").join(
        test_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique()
    )

    print(output_name)

    title = f'{output_name.split("__")[0].split("_")[0]} vs {("__").join(output_name.split("__")[1].split("_")[:2])}'
    print(title)
    (
        stats,
        recall,
        precision,
        f1,
        precision_,
        recall_,
        threshold_,
        dict_of_treatments,
    ) = test_loop(test_df, output_name, title)
    print(recall, precision, f1)

    threshold_ = np.append(threshold_, None)
    stats_df = pd.DataFrame(
        {
            "PR_Threshold": threshold_,
            "Precision": precision_,
            "Recall": recall_,
        }
    )

    stats_df["treatments_tested"] = "0 vs 1"
    # make it so that the second treatment is always the one that is being tested as the positive label
    stats_df["treatments_tested"] = stats_df["treatments_tested"].replace(
        "0 vs 1", f"{dict_of_treatments[0]} vs {dict_of_treatments[1]}"
    )
    stats_df["model"] = mlp_params.MODEL_NAME
    stats_df["group"] = "test"
    stats_df["shuffled_data"] = mlp_params.SHUFFLE
    stats_df
    model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)

DMSO_0.100_DMSO_0.025__LPS_100.000_DMSO_0.025
DMSO vs LPS__100.000


['DMSO_0.100_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.69      0.98      0.81     35643
           1       0.31      0.02      0.04     15987

    accuracy                           0.68     51630
   macro avg       0.50      0.50      0.43     51630
weighted avg       0.57      0.68      0.57     51630

0.022956151873397137 0.3144815766923736 0.04278885391162412


DMSO_0.100_DMSO_0.025__Thapsigargin_1.000_DMSO_0.025
DMSO vs Thapsigargin__1.000
['DMSO_0.100_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.72      0.99      0.83     35643
           1       0.29      0.01      0.02     13766

    accuracy                           0.72     49409
   macro avg       0.51      0.50      0.43     49409
weighted avg       0.60      0.72      0.61     49409

0.009952055789626617 0.2908704883227176 0.019245627590082186


DMSO_0.100_DMSO_0.025__Thapsigargin_10.000_DMSO_0.025
DMSO vs Thapsigargin__10.000
['DMSO_0.100_DMSO_0.025', 'Thapsigargin_10.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.73      0.99      0.84     35643
           1       0.26      0.01      0.02     13212

    accuracy                           0.72     48855
   macro avg       0.49      0.50      0.43     48855
weighted avg       0.60      0.72      0.62     48855

0.012110202845897668 0.2601626016260163 0.0231431257684241


DMSO_0.100_DMSO_0.025__LPS_0.100_DMSO_0.025
DMSO vs LPS__0.100
['DMSO_0.100_DMSO_0.025', 'LPS_0.100_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.67      0.99      0.80     35643
           1       0.34      0.01      0.01     17510

    accuracy                           0.67     53153
   macro avg       0.51      0.50      0.41     53153
weighted avg       0.56      0.67      0.54     53153

0.007024557395773843 0.3416666666666667 0.013766088416340236


DMSO_0.100_DMSO_0.025__LPS_1.000_DMSO_0.025
DMSO vs LPS__1.000
['DMSO_0.100_DMSO_0.025', 'LPS_1.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.68      0.99      0.81     35643
           1       0.33      0.01      0.02     16458

    accuracy                           0.68     52101
   macro avg       0.51      0.50      0.41     52101
weighted avg       0.57      0.68      0.56     52101

0.009600194434317658 0.33263157894736844 0.018661784680800805


DMSO_0.100_DMSO_0.025__LPS_10.000_DMSO_0.025
DMSO vs LPS__10.000
['DMSO_0.100_DMSO_0.025', 'LPS_10.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.68      0.99      0.81     35643
           1       0.31      0.01      0.02     16810

    accuracy                           0.68     52453
   macro avg       0.50      0.50      0.41     52453
weighted avg       0.56      0.68      0.55     52453

0.010232004759071982 0.31272727272727274 0.019815668202764977


DMSO_0.100_DMSO_0.025__LPS_100.000_DMSO_0.025
DMSO vs LPS__100.000
['DMSO_0.100_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.69      0.98      0.81     35643
           1       0.34      0.02      0.05     15987

    accuracy                           0.68     51630
   macro avg       0.52      0.50      0.43     51630
weighted avg       0.58      0.68      0.57     51630

0.024895227372239946 0.34104541559554413 0.046403171272006535


DMSO_0.100_DMSO_0.025__Flagellin_0.100_DMSO_0.025
DMSO vs Flagellin__0.100
['DMSO_0.100_DMSO_0.025', 'Flagellin_0.100_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.71      0.99      0.82     35643
           1       0.31      0.02      0.03     14928

    accuracy                           0.70     50571
   macro avg       0.51      0.50      0.43     50571
weighted avg       0.59      0.70      0.59     50571

0.01527331189710611 0.3106267029972752 0.029115055548461242


DMSO_0.100_DMSO_0.025__Flagellin_1.000_DMSO_0.025
DMSO vs Flagellin__1.000
['DMSO_0.100_DMSO_0.025', 'Flagellin_1.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.69      0.98      0.81     35643
           1       0.31      0.02      0.03     15809

    accuracy                           0.69     51452
   macro avg       0.50      0.50      0.42     51452
weighted avg       0.57      0.69      0.57     51452

0.016066797393889557 0.30639324487334135 0.030532515927395122


DMSO_0.100_DMSO_0.025__Flagellin_1.000_Disulfiram_1.0
DMSO vs Flagellin__1.000
['DMSO_0.100_DMSO_0.025', 'Flagellin_1.000_Disulfiram_1.0']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.73      0.98      0.84     35643
           1       0.28      0.02      0.03     12885

    accuracy                           0.73     48528
   macro avg       0.51      0.50      0.44     48528
weighted avg       0.61      0.73      0.63     48528

0.018315871168024835 0.2796208530805687 0.034379780027678636


DMSO_0.100_DMSO_0.025__LPS_Nigericin_100.000_1.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_100.000_1.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.71      0.96      0.81     35643
           1       0.30      0.04      0.08     14690

    accuracy                           0.69     50333
   macro avg       0.50      0.50      0.45     50333
weighted avg       0.59      0.69      0.60     50333

0.04472430224642614 0.30096197892808063 0.07787589640253659


DMSO_0.100_DMSO_0.025__LPS_Nigericin_100.000_3.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_100.000_3.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.68      0.86      0.76     35643
           1       0.33      0.14      0.20     16726

    accuracy                           0.63     52369
   macro avg       0.51      0.50      0.48     52369
weighted avg       0.57      0.63      0.58     52369

0.1441468372593567 0.3307270233196159 0.20078281145902732


DMSO_0.100_DMSO_0.025__LPS_Nigericin_100.000_10.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_100.000_10.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.72      0.75      0.74     35643
           1       0.28      0.25      0.26     13677

    accuracy                           0.61     49320
   macro avg       0.50      0.50      0.50     49320
weighted avg       0.60      0.61      0.61     49320

0.24990860568838197 0.27949955024940715 0.2638770941094727


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_1.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_1.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.69      0.79      0.73     35643
           1       0.31      0.21      0.25     16218

    accuracy                           0.61     51861
   macro avg       0.50      0.50      0.49     51861
weighted avg       0.57      0.61      0.58     51861

0.21340485879886545 0.3147794452023647 0.2543637232205196


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_3.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_3.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.72      0.73      0.72     35643
           1       0.27      0.26      0.26     13652

    accuracy                           0.60     49295
   macro avg       0.49      0.49      0.49     49295
weighted avg       0.59      0.60      0.60     49295

0.2585701728684442 0.2664553140096618 0.26245353159851303


DMSO_0.100_DMSO_0.025__LPS_Nigericin_1.000_10.0_DMSO_0.025
DMSO vs LPS__Nigericin
['DMSO_0.100_DMSO_0.025', 'LPS_Nigericin_1.000_10.0_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.71      0.71      0.71     35643
           1       0.29      0.28      0.28     14402

    accuracy                           0.59     50045
   macro avg       0.50      0.50      0.50     50045
weighted avg       0.59      0.59      0.59     50045

0.2827385085404805 0.2854539081668419 0.28408971988697806


DMSO_0.100_DMSO_0.025__H2O2_100.000_Z-VAD-FMK_100.0
DMSO vs H2O2__100.000
['DMSO_0.100_DMSO_0.025', 'H2O2_100.000_Z-VAD-FMK_100.0']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.71      0.99      0.83     35643
           1       0.28      0.00      0.01     14747

    accuracy                           0.71     50390
   macro avg       0.49      0.50      0.42     50390
weighted avg       0.58      0.71      0.59     50390

0.004814538550213603 0.2784313725490196 0.009465404612718307


DMSO_0.100_DMSO_0.025__H2O2_100.000_DMSO_0.025
DMSO vs H2O2__100.000
['DMSO_0.100_DMSO_0.025', 'H2O2_100.000_DMSO_0.025']
[0, 1]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.57      0.96      0.72     35643
           1       0.45      0.04      0.07     26900

    accuracy                           0.57     62543
   macro avg       0.51      0.50      0.40     62543
weighted avg       0.52      0.57      0.44     62543

0.04037174721189591 0.44931733553992553 0.07408670737114983


Thapsigargin_1.000_DMSO_0.025__LPS_100.000_DMSO_0.025
Thapsigargin vs LPS__100.000
['Thapsigargin_1.000_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.54      0.96      0.69     15987
           1       0.46      0.04      0.08     13766

    accuracy                           0.53     29753
   macro avg       0.50      0.50      0.38     29753
weighted avg       0.50      0.53      0.41     29753

0.0420601481911957 0.46025437201907793 0.07707667731629392
Thapsigargin_10.000_DMSO_0.025__LPS_100.000_DMSO_0.025
Thapsigargin vs LPS__100.000


['Thapsigargin_10.000_DMSO_0.025', 'LPS_100.000_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.55      0.95      0.69     15987
           1       0.45      0.05      0.09     13212

    accuracy                           0.54     29199
   macro avg       0.50      0.50      0.39     29199
weighted avg       0.50      0.54      0.42     29199

0.047229791099000905 0.4450784593437946 0.08539756397974545


Thapsigargin_1.000_DMSO_0.025__LPS_10.000_DMSO_0.025
Thapsigargin vs LPS__10.000
['Thapsigargin_1.000_DMSO_0.025', 'LPS_10.000_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.55      0.98      0.70     16810
           1       0.43      0.02      0.04     13766

    accuracy                           0.55     30576
   macro avg       0.49      0.50      0.37     30576
weighted avg       0.50      0.55      0.40     30576

0.02012203980822316 0.43213728549141966 0.03845352953425419
Thapsigargin_10.000_DMSO_0.025__LPS_10.000_DMSO_0.025
Thapsigargin vs LPS__10.000


['Thapsigargin_10.000_DMSO_0.025', 'LPS_10.000_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.56      0.97      0.71     16810
           1       0.45      0.03      0.05     13212

    accuracy                           0.56     30022
   macro avg       0.51      0.50      0.38     30022
weighted avg       0.51      0.56      0.42     30022

0.02686951256433545 0.45222929936305734 0.050725155390440806


Thapsigargin_1.000_DMSO_0.025__LPS_1.000_DMSO_0.025
Thapsigargin vs LPS__1.000
['Thapsigargin_1.000_DMSO_0.025', 'LPS_1.000_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.54      0.98      0.70     16458
           1       0.45      0.02      0.04     13766

    accuracy                           0.54     30224
   macro avg       0.50      0.50      0.37     30224
weighted avg       0.50      0.54      0.40     30224

0.018451256719453725 0.44876325088339225 0.03544515768908735
Thapsigargin_10.000_DMSO_0.025__LPS_1.000_DMSO_0.025
Thapsigargin vs LPS__1.000


['Thapsigargin_10.000_DMSO_0.025', 'LPS_1.000_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.55      0.98      0.71     16458
           1       0.45      0.02      0.05     13212

    accuracy                           0.55     29670
   macro avg       0.50      0.50      0.38     29670
weighted avg       0.51      0.55      0.41     29670

0.023917650620647896 0.4450704225352113 0.04539577646889815
Thapsigargin_1.000_DMSO_0.025__LPS_0.100_DMSO_0.025
Thapsigargin vs LPS__0.100


['Thapsigargin_1.000_DMSO_0.025', 'LPS_0.100_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.56      0.99      0.71     17510
           1       0.44      0.01      0.03     13766

    accuracy                           0.56     31276
   macro avg       0.50      0.50      0.37     31276
weighted avg       0.51      0.56      0.41     31276

0.014310620369025135 0.43680709534368073 0.027713300977702752


Thapsigargin_10.000_DMSO_0.025__LPS_0.100_DMSO_0.025
Thapsigargin vs LPS__0.100
['Thapsigargin_10.000_DMSO_0.025', 'LPS_0.100_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.57      0.98      0.72     17510
           1       0.43      0.02      0.04     13212

    accuracy                           0.57     30722
   macro avg       0.50      0.50      0.38     30722
weighted avg       0.51      0.57      0.43     30722

0.019452013321223132 0.4319327731092437 0.03722749330049975
Thapsigargin_1.000_DMSO_0.025__LPS_0.010_DMSO_0.025
Thapsigargin vs LPS__0.010


['Thapsigargin_1.000_DMSO_0.025', 'LPS_0.010_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.54      0.98      0.69     15859
           1       0.47      0.02      0.03     13766

    accuracy                           0.53     29625
   macro avg       0.50      0.50      0.36     29625
weighted avg       0.50      0.53      0.39     29625

0.017506901060584047 0.4679611650485437 0.03375113787549891
Thapsigargin_10.000_DMSO_0.025__LPS_0.010_DMSO_0.025
Thapsigargin vs LPS__0.010


['Thapsigargin_10.000_DMSO_0.025', 'LPS_0.010_DMSO_0.025']
[1, 0]
Number of in features:  1251
Number of out features:  2
DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_10.0_DMSO_0.025_shuffle


              precision    recall  f1-score   support

           0       0.55      0.98      0.70     15859
           1       0.45      0.02      0.04     13212

    accuracy                           0.54     29071
   macro avg       0.50      0.50      0.37     29071
weighted avg       0.50      0.54      0.40     29071

0.022328186497123828 0.44764795144157815 0.04253478480282604


In [11]:
model_stats_df

Unnamed: 0,treatments_tested,model,group,shuffled_data,PR_Threshold,Precision,Recall
0,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,0.0,0.309646,1.000000
1,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,0.0,0.309630,0.999562
2,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,0.0,0.309617,0.999500
3,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,0.0,0.309623,0.999500
4,DMSO_0.100_DMSO_0.025 vs LPS_100.000_DMSO_0.025,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,0.0,0.309629,0.999500
...,...,...,...,...,...,...,...
28210,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,0.999999,0.333333,0.000227
28211,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,0.999999,0.375000,0.000227
28212,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,1.0,0.333333,0.000151
28213,LPS_0.010_DMSO_0.025 vs Thapsigargin_10.000_DM...,DMSO_0.100_DMSO_0.025_vs_LPS_Nigericin_1.000_1...,test,True,1.0,0.400000,0.000151


In [None]:
# set path for the model training metrics
metrics_path = pathlib.Path(
    f"../../results/{mlp_params.MODEL_TYPE}/{mlp_params.MODEL_NAME}/{mlp_params.CELL_TYPE}"
)
metrics_path.mkdir(parents=True, exist_ok=True)
# check if the model training metrics file exists
metrics_file = pathlib.Path(f"{metrics_path}/testing_metrics.csv")
if metrics_file.exists():
    metrics_df = pd.read_csv(metrics_file)
    if len(metrics_df["shuffled_data"].unique()) > 1:
        pass
    elif metrics_df["shuffled_data"].unique() == mlp_params.SHUFFLE:
        pass
    else:
        metrics_df = pd.concat([metrics_df, model_stats_df], axis=0)
        metrics_df.to_csv(metrics_file, index=False)
else:
    model_stats_df.to_csv(metrics_file, index=False)