In [19]:
import sys
import pathlib
import pandas as pd
from copairs import map
from pycytominer import annotate

# adding src module
sys.path.append("../../")
from src.io_utils import load_config
from src.data_utils import split_meta_and_features

Setting up paths and creating directories

In [2]:
# setting up data directory path
data_dir_path = pathlib.Path("../data").resolve(strict=True)

# setting update data paths
metadata_dir = (data_dir_path / "metadata").resolve(strict=True)
barcode_path = (metadata_dir / "barcode_platemap.csv").resolve(strict=True)
first_batch_platemap = (metadata_dir / "Target_Selective_Library_Screen_Plate_1.csv").resolve(strict=True)

# setting profile paths, b1 means "batch 1"
plate_id_paths= ["b1_plate1", "b1_plate2", "b1_plate3", "b1_plate4"]
plate_paths = dict(zip(plate_id_paths, list(data_dir_path.glob("*.parquet"))))

# creating module directories
figures_dir_path = pathlib.Path("figures").resolve()
figures_dir_path.mkdir(exist_ok=True)
results_dir_path = pathlib.Path("results").resolve()
results_dir_path.mkdir(exist_ok=True)

# config path
config_path = pathlib.Path("../config.yaml").resolve(strict=True)

Next we load our datasets, barcodes and configs

In [4]:
# loading configs
configs = load_config(config_path)

# load barcode
barcodes = pd.read_csv(barcode_path)

# load in platemap
platemap = pd.read_csv(first_batch_platemap)

# loading profiles
loaded_plates = {}
for plate_id, plate_path in plate_paths.items():
    loaded_plates[plate_id] = pd.read_parquet(plate_path)

In [5]:
plate1 = loaded_plates["b1_plate1"]
print(plate1.shape)
plate1

(12745, 2016)


Unnamed: 0,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cells_Location_Center_X,Metadata_Cells_Location_Center_Y,Metadata_Image_Count_Cells,Metadata_ImageNumber,Image_Metadata_Plate,Image_Metadata_Site,Image_Metadata_Well,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_Hoechst_3_02_256,Nuclei_Texture_Variance_Hoechst_3_03_256,Nuclei_Texture_Variance_Mitochondria_3_00_256,Nuclei_Texture_Variance_Mitochondria_3_01_256,Nuclei_Texture_Variance_Mitochondria_3_02_256,Nuclei_Texture_Variance_Mitochondria_3_03_256,Nuclei_Texture_Variance_PM_3_00_256,Nuclei_Texture_Variance_PM_3_01_256,Nuclei_Texture_Variance_PM_3_02_256,Nuclei_Texture_Variance_PM_3_03_256
0,398.108488,397.079770,399.047591,355.078157,13,1168,localhost240928120001,f17,G10,1,...,48.271194,48.357799,6.607844,6.058847,6.126076,5.887714,4.601669,4.478892,4.545796,4.506606
1,855.599578,277.977481,809.932735,268.256144,9,2,localhost240928120001,f04,B02,1,...,56.966002,57.149212,20.243446,20.848104,20.011861,19.960999,20.807437,21.028336,21.053395,20.743538
2,704.260681,247.533802,657.274482,248.769434,5,24,localhost240928120001,f19,B03,1,...,24.981832,23.618909,4.517895,4.695692,4.753353,4.246232,19.246264,19.563467,19.389173,18.360261
3,580.929897,165.856701,596.663683,151.208875,28,50,localhost240928120001,f03,B05,1,...,41.851450,40.962070,5.569402,5.639497,5.799303,5.354111,16.738937,16.593750,16.539002,16.276492
4,421.926235,231.551148,408.041391,256.919071,16,51,localhost240928120001,f08,B05,1,...,81.325721,79.541384,22.942340,21.064127,21.897403,21.347913,11.048527,10.706962,10.848263,10.811911
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13714,441.507487,855.208556,456.691378,836.639357,29,279,localhost240928120001,f07,C09,28,...,81.767509,79.230039,107.852211,100.952735,107.587682,103.530447,26.953325,26.830194,26.554698,26.459866
13715,381.097667,697.486706,400.728484,720.539830,33,967,localhost240928120001,f00,G02,29,...,30.028738,28.932088,2.296707,2.147155,2.128027,2.034617,6.368800,6.477607,6.520405,6.368960
13717,553.885398,726.953540,528.487782,724.737483,33,967,localhost240928120001,f00,G02,30,...,13.058381,12.996540,1.891702,1.830740,1.926370,1.874141,7.205823,7.339064,7.484730,7.407145
13719,869.983996,849.125828,863.799983,832.516160,33,967,localhost240928120001,f00,G02,32,...,31.243526,30.775413,3.665414,3.380208,3.728568,3.632304,4.404563,4.331136,4.417653,4.472548


In [18]:
# annotate the profile with the metadata 
annotated_df = annotate(
    plate1,
    platemap=platemap,
    join_on=["Metadata_well_position", "Image_Metadata_Well"],
)

print("annoated_df shape:", annotated_df.shape)
annotated_df.head()

annoated_df shape: (12745, 2022)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_cell_type,Metadata_heart_failure_type,Metadata_treatment,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cells_Location_Center_X,Metadata_Cells_Location_Center_Y,...,Nuclei_Texture_Variance_Hoechst_3_02_256,Nuclei_Texture_Variance_Hoechst_3_03_256,Nuclei_Texture_Variance_Mitochondria_3_00_256,Nuclei_Texture_Variance_Mitochondria_3_01_256,Nuclei_Texture_Variance_Mitochondria_3_02_256,Nuclei_Texture_Variance_Mitochondria_3_03_256,Nuclei_Texture_Variance_PM_3_00_256,Nuclei_Texture_Variance_PM_3_01_256,Nuclei_Texture_Variance_PM_3_02_256,Nuclei_Texture_Variance_PM_3_03_256
0,B,2,7,healthy,,DMSO,855.599578,277.977481,809.932735,268.256144,...,56.966002,57.149212,20.243446,20.848104,20.011861,19.960999,20.807437,21.028336,21.053395,20.743538
1,B,2,7,healthy,,DMSO,613.937003,209.544343,640.790427,204.845356,...,51.93089,52.569618,12.654575,11.418443,11.870022,12.312151,18.134861,17.340252,16.949959,17.628858
2,B,2,7,healthy,,DMSO,916.37971,280.023188,927.500461,333.104966,...,76.757518,74.898656,166.959274,165.712135,168.537202,173.963916,36.551473,36.859759,36.356569,35.920687
3,B,2,7,healthy,,DMSO,530.411492,80.037145,515.843583,88.534307,...,62.276713,58.399535,10.612893,10.005814,10.029249,9.635498,18.82235,18.391346,18.130655,18.047497
4,B,2,7,healthy,,DMSO,958.301695,69.265085,955.244723,96.955249,...,123.857196,117.177343,183.806064,183.609077,204.218008,190.040806,132.779819,134.273476,140.564727,132.735221


## Applying mAP

### Compound level mAP

In [22]:
# loading in the configs for compound analysis average precision
comp_ap_configs = configs["compound_ap_configs"]
comp_map_configs = configs["compound_map_configs"]

# splitting the feature space 
meta_cols, feat_cols = split_meta_and_features(annotated_df)

# split columns
meta_df = annotated_df[meta_cols]
profiles = annotated_df[feat_cols].values

[[2.41410000e+04 1.01156000e+05 9.61000000e+02 ... 2.10283361e+01
  2.10533954e+01 2.07435377e+01]
 [2.48760000e+04 8.80720000e+04 8.28000000e+02 ... 1.73402524e+01
  1.69499587e+01 1.76288584e+01]
 [1.61440000e+04 4.88520000e+04 1.00400000e+03 ... 3.68597593e+01
  3.63565692e+01 3.59206873e+01]
 ...
 [2.40540000e+04 3.76830000e+04 6.32000000e+02 ... 5.02681987e+00
  4.95176511e+00 5.04024498e+00]
 [1.02690000e+04 2.37160000e+04 8.54000000e+02 ... 3.19973113e+00
  3.29545166e+00 3.36857040e+00]
 [1.99320000e+04 4.24000000e+04 6.58000000e+02 ... 6.38905239e+00
  6.72199511e+00 6.69437656e+00]]


In [None]:
# executing ap calculations
compounds_aps = map.average_precision(
    metadata=meta_df,
    profiles=profiles,
    pos_sameby=comp_ap_configs["pos_sameby"],
    pos_diffby=comp_ap_configs["pos_diffby"],
    neg_sameby=comp_ap_configs["neg_sameby"],
    neg_diffby=comp_ap_configs["neg_diffby"],
)

# executing mAP calculations
replicate_maps = map.mean_average_precision(
    compounds_aps, pos_sameby=comp_map_configs["pos_samby"], null_size=10000, threshold=0.05, seed=0
)

In [24]:
meta_cols

['Metadata_WellRow',
 'Metadata_WellCol',
 'Metadata_heart_number',
 'Metadata_cell_type',
 'Metadata_heart_failure_type',
 'Metadata_treatment',
 'Metadata_Nuclei_Location_Center_X',
 'Metadata_Nuclei_Location_Center_Y',
 'Metadata_Cells_Location_Center_X',
 'Metadata_Cells_Location_Center_Y',
 'Metadata_Image_Count_Cells',
 'Metadata_ImageNumber',
 'Metadata_Plate',
 'Metadata_Well',
 'Metadata_Cells_Number_Object_Number',
 'Metadata_Cytoplasm_Parent_Cells',
 'Metadata_Cytoplasm_Parent_Nuclei',
 'Metadata_Nuclei_Number_Object_Number',
 'Image_Metadata_Site']