In [1]:
import gc
import pathlib

import numpy as np
import pandas as pd
from statsmodels.stats.multitest import multipletests

In [2]:
cell_type = "PBMC"

In [3]:
final_df = pd.DataFrame(
    columns=[
        "group1",
        "group2",
        "meandiff",
        "lower",
        "upper",
        "p-adj",
        "reject",
        "features",
    ]
)

In [4]:
# directory to get files from
data_dir = pathlib.Path(f"../results/{cell_type}/").resolve(strict=True)

# save directory
output_file_path = pathlib.Path(f"../results/{cell_type}_combined.parquet")

# get list of files in directory
file_list = [x for x in data_dir.iterdir() if x.is_file()]

list_of_dfs = []
# loop through files
for file in file_list:
    tmp_df = pd.read_parquet(file)
    list_of_dfs.append(tmp_df)
final_df = pd.concat(list_of_dfs, ignore_index=True)

final_df.shape

(7224, 11)

In [5]:
# correct for multiple testing
final_df["reject"], final_df["p-adj_fdr_bh"], _, _ = multipletests(
    final_df["p-adj"], alpha=0.001, method="fdr_bh"
)
final_df.head()

Unnamed: 0,group1,group2,meandiff,lower,upper,p-adj,reject,features,p-adj_abs,pos_neg,shuffled,p-adj_fdr_bh
0,apoptosis,healthy,0.0114,0.0997,-0.0016,0.0245,False,Cytoplasm_RadialDistribution_ZernikeMagnitude_...,0.0245,positive,True,0.031116
1,apoptosis,pyroptosis,0.0098,0.1871,-0.0033,0.023,False,Cytoplasm_RadialDistribution_ZernikeMagnitude_...,0.023,positive,True,0.029533
2,healthy,pyroptosis,-0.0016,0.7992,-0.0075,0.0043,False,Cytoplasm_RadialDistribution_ZernikeMagnitude_...,0.0043,positive,True,0.01927
3,apoptosis,healthy,0.0131,0.0238,0.0014,0.0248,False,Cytoplasm_AreaShape_Zernike_5_3,0.0248,positive,False,0.031414
4,apoptosis,pyroptosis,0.0009,0.9838,-0.0109,0.0127,False,Cytoplasm_AreaShape_Zernike_5_3,0.0127,positive,False,0.021959


In [6]:
# save the final_df
final_df.to_parquet(output_file_path)