# Metadata location by pairwise replicate correlation

Identify the top most replicating compounds, and determine their plate and well location.
In other words, identify perturbations (and where they're located) that are suitable for an example image

In [1]:
import pathlib
import pandas as pd

In [2]:
# Load scores
results_dir = pathlib.Path("..", "6.paper_figures", "results")
scores_file = pathlib.Path(results_dir, "compound_scores.tsv")

scores_df = pd.read_csv(scores_file, sep="\t")

In [3]:
# Select only L1000 with non_spherized normalization
# and sort by median replicate correlation
l1000_scores_df = (
    scores_df
    .query("assay == 'L1000'")
    .query("normalization == 'non_spherized'")
    .sort_values(by="median_score", ascending=False)
    .reset_index(drop=True)
)

print(l1000_scores_df.shape)
l1000_scores_df.head()

(7566, 12)


Unnamed: 0,compound,no_of_compounds,well,dose_recode,median_score,p_value,assay,normalization,category,pass_thresh,neg_log_10_p_val,dose
0,resminostat,3,C07,6,0.832323,0.0,L1000,non_spherized,all_data,True,3.5,10 uM
1,cx-5461,3,D13,6,0.794879,0.0,L1000,non_spherized,all_data,True,3.5,10 uM
2,azacitidine,2,L13,6,0.793985,0.0,L1000,non_spherized,all_data,True,3.5,10 uM
3,bardoxolone-methyl,2,H14,5,0.77931,0.0,L1000,non_spherized,all_data,True,3.5,3.33 uM
4,ots167,3,O10,3,0.768271,0.0,L1000,non_spherized,all_data,True,3.5,0.37 uM


In [4]:
# Select only cell painting with spherized normalization
# and sort by median replicate correlation
cp_scores_df = (
    scores_df
    .query("assay == 'Cell Painting'")
    .query("normalization == 'spherized'")
    .sort_values(by="median_score", ascending=False)
    .reset_index(drop=True)
)

print(cp_scores_df.shape)
cp_scores_df.head()

(7572, 12)


Unnamed: 0,compound,no_of_compounds,well,dose_recode,median_score,p_value,assay,normalization,category,pass_thresh,neg_log_10_p_val,dose
0,kpt-330,5,E14,5,0.92348,0.0,Cell Painting,spherized,all_data,True,3.5,3.33 uM
1,kpt-330,5,E15,4,0.903616,0.0,Cell Painting,spherized,all_data,True,3.5,1.11 uM
2,menadione,5,M07,6,0.891492,0.0,Cell Painting,spherized,all_data,True,3.5,10 uM
3,cepharanthine,5,D19,6,0.881482,0.0,Cell Painting,spherized,all_data,True,3.5,10 uM
4,bardoxolone-methyl,5,H13,6,0.8788,0.0,Cell Painting,spherized,all_data,True,3.5,10 uM


In [5]:
# Load metadata (plate location) for both assays
plate_metadata_dir = pathlib.Path("Profiles_level4", "plate_position_effects", "data")

# Load L1000 metadata
l1000_plate_metadata_file = pathlib.Path(plate_metadata_dir, "L1000_platemap_metadata.tsv.gz")
l1000_plate_metadata_df = pd.read_csv(l1000_plate_metadata_file, sep="\t")

print(l1000_plate_metadata_df.shape)
l1000_plate_metadata_df.head()

(27837, 8)


Unnamed: 0,plate,well_position,plate_map,replicate_id,dose,Metadata_broad_sample,pert_iname,moa
0,REP.A001_A549_24H_X1_B27,A03,REP.A001_A549_24H,REP.A001_A549_24H_X1_B27:A03,0,DMSO,dmso,control vehicle
1,REP.A001_A549_24H_X2_B27,A03,REP.A001_A549_24H,REP.A001_A549_24H_X2_B27:A03,0,DMSO,dmso,control vehicle
2,REP.A001_A549_24H_X3_B27,A03,REP.A001_A549_24H,REP.A001_A549_24H_X3_B27:A03,0,DMSO,dmso,control vehicle
3,REP.A001_A549_24H_X1_B27,A04,REP.A001_A549_24H,REP.A001_A549_24H_X1_B27:A04,0,DMSO,dmso,control vehicle
4,REP.A001_A549_24H_X2_B27,A04,REP.A001_A549_24H,REP.A001_A549_24H_X2_B27:A04,0,DMSO,dmso,control vehicle


In [6]:
# Load Cell Painting metadata
cp_plate_metadata_file = pathlib.Path(plate_metadata_dir, "CellPainting_platemap_metadata.tsv.gz")
cp_plate_metadata_df = pd.read_csv(cp_plate_metadata_file, sep="\t")

print(cp_plate_metadata_df.shape)
cp_plate_metadata_df.head()

(51833, 8)


Unnamed: 0,Metadata_Assay_Plate_Barcode,Metadata_Well,Metadata_Plate_Map_Name,replicate_name,Metadata_dose_recode,Metadata_broad_sample,pert_iname,moa
0,SQ00015211,A01,C-7161-01-LM6-027,replicate_0,0,DMSO,dmso,control vehicle
1,SQ00015211,A02,C-7161-01-LM6-027,replicate_1,0,DMSO,dmso,control vehicle
2,SQ00015211,A03,C-7161-01-LM6-027,replicate_2,0,DMSO,dmso,control vehicle
3,SQ00015211,A04,C-7161-01-LM6-027,replicate_3,0,DMSO,dmso,control vehicle
4,SQ00015211,A05,C-7161-01-LM6-027,replicate_4,0,DMSO,dmso,control vehicle


In [7]:
# Merge scores and metadata to map plate location to reproducibility
l1000_scores_location_df = l1000_scores_df.merge(
    l1000_plate_metadata_df,
    left_on=["compound", "dose_recode", "well"],
    right_on=["pert_iname", "dose", "well_position"]
).rename(columns={"dose_x": "dose", "dose_y": "Metadata_dose_recode"})

# Output file
output_file = pathlib.Path("results", "L1000_compound_metadata_plate_location_with_reproducibility.tsv.gz")
l1000_scores_location_df.to_csv(output_file, sep="\t", index=False)

print(l1000_scores_location_df.shape)
l1000_scores_location_df.head(2)

(22488, 20)


Unnamed: 0,compound,no_of_compounds,well,dose_recode,median_score,p_value,assay,normalization,category,pass_thresh,neg_log_10_p_val,dose,plate,well_position,plate_map,replicate_id,Metadata_dose_recode,Metadata_broad_sample,pert_iname,moa
0,resminostat,3,C07,6,0.832323,0.0,L1000,non_spherized,all_data,True,3.5,10 uM,REP.A009_A549_24H_X1_B32,C07,REP.A009_A549_24H,REP.A009_A549_24H_X1_B32:C07,6,BRD-K28822270-001-01-1,resminostat,hdac inhibitor
1,resminostat,3,C07,6,0.832323,0.0,L1000,non_spherized,all_data,True,3.5,10 uM,REP.A009_A549_24H_X2_B32,C07,REP.A009_A549_24H,REP.A009_A549_24H_X2_B32:C07,6,BRD-K28822270-001-01-1,resminostat,hdac inhibitor


In [8]:
# Merge scores and metadata to map plate location to reproducibility
cp_scores_location_df = cp_scores_df.merge(
    cp_plate_metadata_df,
    left_on=["compound", "dose_recode", "well"],
    right_on=["pert_iname", "Metadata_dose_recode", "Metadata_Well"]
)

# Output file
output_file = pathlib.Path("results", "CellPainting_compound_metadata_plate_location_with_reproducibility.tsv.gz")
cp_scores_location_df.to_csv(output_file, sep="\t", index=False)

print(cp_scores_location_df.shape)
cp_scores_location_df.head(2)

(36641, 20)


Unnamed: 0,compound,no_of_compounds,well,dose_recode,median_score,p_value,assay,normalization,category,pass_thresh,neg_log_10_p_val,dose,Metadata_Assay_Plate_Barcode,Metadata_Well,Metadata_Plate_Map_Name,replicate_name,Metadata_dose_recode,Metadata_broad_sample,pert_iname,moa
0,kpt-330,5,E14,5,0.92348,0.0,Cell Painting,spherized,all_data,True,3.5,3.33 uM,SQ00015157,E14,C-7161-01-LM6-004,replicate_27690,5,BRD-K21361524-001-01-1,kpt-330,exportin antagonist
1,kpt-330,5,E14,5,0.92348,0.0,Cell Painting,spherized,all_data,True,3.5,3.33 uM,SQ00015159,E14,C-7161-01-LM6-004,replicate_27696,5,BRD-K21361524-001-01-1,kpt-330,exportin antagonist
