In [None]:
import sys
import os
import yaml
import numpy as np
from scipy import stats

import pandas as pd
pd.options.mode.chained_assignment = None

sys.path.append('../python/')


In [None]:
dict_parameters = yaml.safe_load(open("../parameters/july_2024_data_parameters.yaml", "r"))

In [None]:
pd_df_plate_data = pd.read_csv(
    open(
        os.path.join(
            dict_parameters["output directory path"],
            dict_parameters["plate data with locations file name"]
        ),
        "rb"
    ), 
    index_col=0,
)

In [None]:
pd_df_plate_data

In [None]:
def perform_t_test_on_paired_wells(pd_group):
    if len(pd_group) != 2:
        return np.nan
    mean_1, mean_2 = pd_group["IFN-gamma Mean"].values
    std_dev_1, std_dev_2 = pd_group["IFN-gamma Std Dev"].values
    count_1, count_2 = pd_group["IFN-gamma Count"].values
    t_stat, p_value = stats.ttest_ind_from_stats(
        mean_1, 
        std_dev_1, 
        count_1, 
        mean_2, 
        std_dev_2, 
        count_2,
        equal_var=False,
    )
    return p_value

pd_df_tested = (
    pd_df_plate_data
    .groupby(["sample name plate", "plate number"])
    .apply(perform_t_test_on_paired_wells, include_groups=False)
    .reset_index()
)
pd_df_tested.columns = ["sample name plate", "plate number"] + ['t_test_p_val']


In [None]:
import seaborn as sns
sns.histplot(pd_df_tested, x = "t_test_p_val", bins = 100)