In [None]:
import sys
import os
import yaml
import numpy as np
from scipy import stats

import pandas as pd
pd.options.mode.chained_assignment = None

sys.path.append('../python/')


In [None]:
dict_parameters = yaml.safe_load(open("../parameters/july_2024_data_parameters.yaml", "r"))

In [None]:
pd_df_plate_data = pd.read_csv(
    open(
        os.path.join(
            dict_parameters["output directory path"],
            dict_parameters["plate data with locations file name"]
        ),
        "rb"
    ), 
    index_col=0,
)

In [None]:
pd_df_estimated_concentrations = pd.read_csv(
    open(
        os.path.join(
            dict_parameters["output directory path"],
            dict_parameters["estimated concentrations file name"]
        ),
        "rb"
    )
)

In [None]:
pd_df_quality_control_concentrations = pd.read_csv(
    open(
        os.path.join(
            dict_parameters["data directory path"],
            dict_parameters["quality control concentrations file name"]
        ),
        "rb"
    )
)

In [None]:
pd_df_plate_data

In [None]:
pd_df_quality_control_concentrations

In [None]:
pd_df_estimated_concentrations

In [None]:
def perform_t_test_on_paired_wells(pd_group):
    if len(pd_group) != 2:
        return np.nan
    mean_1, mean_2 = pd_group["IFN-gamma Trimmed Mean"].values
    std_dev_1, std_dev_2 = pd_group["IFN-gamma Trimmed Standard Deviation"].values
    count_1, count_2 = pd_group["IFN-gamma Count"].values
    t_statistic, p_value = stats.ttest_ind_from_stats(
        mean_1, 
        std_dev_1, 
        count_1, 
        mean_2, 
        std_dev_2, 
        count_2,
        equal_var=False,
    )
    return p_value

In [None]:
pd_df_tested = (
    pd_df_plate_data
    .groupby(["sample name plate", "plate number"])
    .apply(perform_t_test_on_paired_wells, include_groups=False)
    .reset_index()
)
pd_df_tested.columns = ["sample name plate", "plate number"] + ['t test p value']
pd_df_tested = pd.merge(
    pd_df_tested, 
    pd_df_plate_data[["sample name plate", "plate number", "IFN-gamma Trimmed Mean", "IFN-gamma Trimmed Standard Deviation", "IFN-gamma Count"]], 
    on = ["sample name plate", "plate number"], 
    how = "left"
)

In [None]:
pd_df_tested

In [None]:
pd_df_tested_unique = pd_df_tested.drop_duplicates(subset = ["sample name plate", "plate number"])

In [None]:
pd_df_tested_unique

In [None]:
import seaborn as sns
sns.histplot(pd_df_tested_unique, x = "t test p value", bins = 20)

In [None]:
def calculate_paired_intra_plate_cv(pd_group):
    if len(pd_group) != 2:
        return np.nan
    estimate_1, estimate_2 = pd_group["estimated concentration IFN-gamma"].values
    mean = (estimate_1 + estimate_2) / 2
    std_dev = np.sqrt((estimate_1 - mean)**2 + (estimate_2 - mean)**2)
    return std_dev / mean


In [None]:
pd_df_estimated_concentrations_checked = (
    pd_df_estimated_concentrations
    .groupby(["sample name annotations", "plate number"])
    .apply(calculate_paired_intra_plate_cv, include_groups=False)
    .reset_index()
)
pd_df_estimated_concentrations_checked.columns = ["sample name annotations", "plate number"] + ['intra plate CV']
pd_df_estimated_concentrations_checked = pd.merge(
    pd_df_estimated_concentrations_checked, 
    pd_df_estimated_concentrations[["sample name annotations", "plate number", "estimated concentration IFN-gamma"]], 
    on = ["sample name annotations", "plate number"], 
    how = "left"
)

In [None]:
pd_df_estimated_concentrations_checked

In [None]:
for i in list(range(1, 17)) + [18]:
    print(i, 
        np.sqrt(
            np.nanmean(
                pd_df_estimated_concentrations_checked[
                    pd_df_estimated_concentrations_checked["plate number"] == i
                ]["intra plate CV"].values**2
            )
        )
    )

In [None]:
pd_df_estimated_concentrations_checked[pd_df_estimated_concentrations_checked["intra plate CV"] > 0.2]