In [1]:
import argparse
import pathlib
import random
import sys

import numpy as np
import pandas as pd

# check if in a jupyter notebook
try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False
import warnings

import pycytominer.aggregate
import tqdm

# Suppress all RuntimeWarnings
warnings.filterwarnings("ignore", category=RuntimeWarning)


sys.path.append("../utils")
from mAP_utils import run_mAP_across_time

In [None]:
if not in_notebook:
    # setup the argument parser
    parser = argparse.ArgumentParser(
        description="Generate a map for differing cell counts"
    )

    parser.add_argument(
        "--percentage", type=float, help="Percentage of wells to use for the map file"
    )
    parser.add_argument("--seed", type=int, help="Seed for the random number generator")
    parser.add_argument(
        "--shuffle", action="store_true", help="Shuffle the order of the wells"
    )
    # parse the arguments
    args = parser.parse_args()
    percentage = args.percentage
    set_seed = args.seed
    shuffle = args.shuffle
else:
    percentage = 0.1
    set_seed = 0
    shuffle = False

output_file = pathlib.Path(
    f"../results/mAP_cell_percentages/{percentage}_{set_seed}_{shuffle}.parquet"
)
output_file.parent.mkdir(exist_ok=True, parents=True)

In [3]:
data_file_path = pathlib.Path(
    "../../data/CP_feature_select/profiles/features_selected_profile.parquet"
).resolve(strict=True)
df = pd.read_parquet(data_file_path)

df.head()

Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_DifferenceVariance_CL_488_1_3_00_256,Nuclei_Texture_DifferenceVariance_CL_488_2_3_01_256,Nuclei_Texture_DifferenceVariance_CL_561_3_02_256,Nuclei_Texture_InverseDifferenceMoment_CL_488_1_3_01_256,Nuclei_Texture_InverseDifferenceMoment_CL_561_3_01_256,Nuclei_Texture_InverseDifferenceMoment_DNA_3_00_256,Nuclei_Texture_SumAverage_CL_488_1_3_03_256,Nuclei_Texture_SumAverage_CL_488_2_3_03_256,Nuclei_Texture_SumAverage_CL_561_3_02_256,Nuclei_Texture_SumAverage_DNA_3_00_256
0,1,D-02,192,Staurosporine,0.0,negative,1,1,0.0,2,...,1.439092,0.891046,1.500406,0.587907,0.617193,-3.721488,-0.232102,0.381277,-0.044157,-3.411162
1,1,D-02,192,Staurosporine,0.0,negative,1,1,0.0,23,...,-1.048757,0.629287,0.600013,-0.178073,0.46453,0.297776,0.356853,0.381277,0.040152,0.158581
2,1,D-02,192,Staurosporine,0.0,negative,1,1,0.0,31,...,0.480107,0.891046,0.380827,0.448919,0.390155,0.297776,-0.232102,0.381277,0.078288,0.158581
3,1,D-02,192,Staurosporine,0.0,negative,1,1,0.0,35,...,-0.616834,0.891046,0.863686,0.217271,0.565594,0.297776,-0.030175,0.381277,-0.002435,0.158581
4,1,D-02,192,Staurosporine,0.0,negative,1,1,0.0,40,...,1.439092,-1.464788,-0.659766,0.587907,0.10422,0.297776,-0.232102,-1.424332,0.471232,0.158581


In [4]:
random.seed(set_seed)
subset_df = df.groupby(["Metadata_Time", "Metadata_Well"]).apply(
    lambda x: x.sample(frac=percentage, random_state=set_seed),
    include_groups=True,
)

  subset_df = df.groupby(["Metadata_Time", "Metadata_Well"]).apply(


In [5]:
subset_df.reset_index(drop=True, inplace=True)
if shuffle:
    # permutate the data
    for col in subset_df.columns:
        if col == "Metadata_Time":
            continue
        if col == "Metadata_dose":
            continue
        if col == "Metadata_Well":
            continue
        subset_df[col] = np.random.permutation(subset_df[col])
metadata_cols = [cols for cols in subset_df.columns if "Metadata" in cols]
features_cols = [cols for cols in subset_df.columns if "Metadata" not in cols]
features_cols = features_cols + ["Metadata_number_of_singlecells"]
aggregate_df = pycytominer.aggregate(
    population_df=subset_df,
    strata=["Metadata_Well", "Metadata_Time"],
    features=features_cols,
    operation="median",
)

  population_df = population_df.median().reset_index()
  population_df = population_df.median().reset_index()


In [6]:
metadata_df = subset_df[metadata_cols]
metadata_df = metadata_df.drop_duplicates(subset=["Metadata_Well", "Metadata_Time"])
metadata_df = metadata_df.reset_index(drop=True)
aggregate_df = pd.merge(
    metadata_df, aggregate_df, on=["Metadata_Well", "Metadata_Time"]
)
aggregate_df.head()

Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells_x,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_DifferenceVariance_CL_488_2_3_01_256,Nuclei_Texture_DifferenceVariance_CL_561_3_02_256,Nuclei_Texture_InverseDifferenceMoment_CL_488_1_3_01_256,Nuclei_Texture_InverseDifferenceMoment_CL_561_3_01_256,Nuclei_Texture_InverseDifferenceMoment_DNA_3_00_256,Nuclei_Texture_SumAverage_CL_488_1_3_03_256,Nuclei_Texture_SumAverage_CL_488_2_3_03_256,Nuclei_Texture_SumAverage_CL_561_3_02_256,Nuclei_Texture_SumAverage_DNA_3_00_256,Metadata_number_of_singlecells_y
0,1,C-02,201,Staurosporine,0.0,test,11,1,0.0,50,...,0.088992,-0.721353,-0.220752,0.097002,0.297776,-0.152855,-1.424332,0.034072,0.158581,192.0
1,1,C-03,189,Staurosporine,0.61,test,11,2,0.0,31,...,-1.464788,-0.064935,0.103997,0.268528,0.297776,-0.232102,0.381277,0.09865,0.158581,189.0
2,1,C-04,132,Staurosporine,1.22,positive,13,3,0.0,47,...,-1.024244,-0.815722,0.239337,0.217239,0.297776,-0.232102,0.381277,0.146068,0.158581,195.0
3,1,C-05,197,Staurosporine,2.44,test,2,1,0.0,35,...,-1.464788,-0.43272,0.183577,0.156433,0.297776,-0.186962,0.381277,0.031514,0.158581,192.0
4,1,C-06,97,Staurosporine,4.88,test,3,1,0.0,153,...,-1.464788,-0.813846,-0.086482,0.3041,0.297776,-0.232102,-1.424332,0.177911,0.158581,186.0


In [7]:
dict_of_map_dfs = run_mAP_across_time(
    aggregate_df,
    seed=set_seed,
    time_column="Metadata_Time",
    reference_column_name="Metadata_dose",
    reference_group=aggregate_df["Metadata_dose"].min(),
)


output_df = pd.concat(dict_of_map_dfs.values(), keys=dict_of_map_dfs.keys())
output_df.reset_index(inplace=True)
output_df.rename(columns={"level_0": "Metadata_Time"}, inplace=True)
# add the percentage of cells to the keys
output_df["percentage_of_cells"] = percentage
output_df["seed"] = set_seed
output_df["shuffle"] = shuffle
output_df.reset_index(drop=True, inplace=True)
output_df.to_parquet(output_file)
output_df.head()

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Unnamed: 0,Metadata_Time,Metadata_dose,Metadata_reference_index,mean_average_precision,indices,p_value,corrected_p_value,below_p,below_corrected_p,-log10(p-value),percentage_of_cells,seed,shuffle
0,0.0,0.61,-1,0.944444,"[0, 9, 18]",0.0998809,0.099881,False,False,1.000518,0.1,0,True
1,0.0,1.22,-1,0.9,"[1, 10, 19]",0.0998809,0.099881,False,False,1.000518,0.1,0,True
2,0.0,2.44,-1,1.0,"[2, 11, 20]",9.99999e-07,1e-06,True,True,5.890856,0.1,0,True
3,0.0,4.88,-1,1.0,"[3, 12, 21]",9.99999e-07,1e-06,True,True,5.890856,0.1,0,True
4,0.0,9.77,-1,1.0,"[4, 13, 22]",9.99999e-07,1e-06,True,True,5.890856,0.1,0,True
