## Generate Cell Health Profiles

**Gregory Way, 2019**

Use [pycytominer](https://github.com/cytomining/pycytominer) to process and generate cell painting profiles for the Cell Health Project.

In [1]:
import os
import pandas as pd

from pycytominer.aggregate import AggregateProfiles

In [2]:
batch = "CRISPR_PILOT_B1"
bucket_dir = os.path.join(
    "/home",
    "ubuntu",
    "bucket",
    "projects",
    "2015_07_01_Cell_Health_Vazquez_Cancer_Broad",
    "workspace"
)

backend_dir = os.path.join(bucket_dir, "backend", batch)
metadata_dir = os.path.join(bucket_dir, "metadata", batch)

In [3]:
# Load Barcode Platemap
barcode_platemap_file = os.path.join(metadata_dir, "barcode_platemap.csv")
barcode_platemap_df = pd.read_csv(barcode_platemap_file)

print(barcode_platemap_df.shape)
barcode_platemap_df.head()

(9, 2)


Unnamed: 0,Assay_Plate_Barcode,Plate_Map_Name
0,SQ00014610,DEPENDENCIES1_A549
1,SQ00014611,DEPENDENCIES1_A549
2,SQ00014612,DEPENDENCIES1_A549
3,SQ00014613,DEPENDENCIES1_ES2
4,SQ00014614,DEPENDENCIES1_ES2


## Create Profiles

In [9]:
for plate in os.listdir(backend_dir):
    plate_dir = os.path.join(backend_dir, plate)
    sqlite_file = "sqlite:////{}/{}.sqlite".format(plate_dir, plate)
    
    # Load specific platemap
    platemap = barcode_platemap_df.query("Assay_Plate_Barcode == @plate").Plate_Map_Name.values[0]
    platemap_file = os.path.join(metadata_dir, "platemap", "{}.csv".format(platemap))
    
    # Prepare sql file for processing
    ap = AggregateProfiles(sqlite_file, strata=["Image_Metadata_Plate", "Image_Metadata_Well"])

    # Count cells and output
    cell_count_file = os.path.join("results", "{}_cell_count.tsv".format(plate))
    cell_count_df = ap.count_cells()
    cell_count_df = (
        cell_count_df
        .merge(platemap_df,
               left_on="Image_Metadata_Well",
               right_on="well_position")
        .drop(["WellRow", "WellCol", "well_position"],
              axis="columns")
    )
    cell_count_df.to_csv(cell_count_file, sep='\t', index=False)
    
    # Being processing profiles
    output_dir = os.path.join("data", batch, plate)
    os.makedirs(output_dir, exist_ok=True)
    
    # Aggregate single cells into well profiles
    out_file = os.path.join(output_dir, "{}.csv".format(plate))
    ap.aggregate_profiles(output_file=out_file)
    
    

SQ00014610
SQ00014611
SQ00014612
SQ00014613
SQ00014614
SQ00014615
SQ00014616
SQ00014617
SQ00014618


In [12]:
ap = AggregateProfiles(sqlite_file, strata=["Image_Metadata_Plate", "Image_Metadata_Well"])

In [17]:
cell_count_df = ap.count_cells()
cell_count_df = (
    cell_count_df
    .merge(platemap_df,
           left_on="Image_Metadata_Well",
           right_on="well_position")
    .drop(["WellRow", "WellCol", "well_position"],
          axis="columns")
)
cell_count_file = os.path.join("results", "{}_cell_count.tsv".format(plate))
cell_count_df.to_csv(cell_count_file, sep='\t', index=False)

cell_count_df.head(2)

Unnamed: 0,Image_Metadata_Well,SQ00014618,gene_name,pert_name,broad_sample,cell_line
0,A01,2603,EMPTY,EMPTY,,HCC44
1,A02,2112,MCL1,MCL1-5,,HCC44


In [19]:
output_dir = os.path.join("data", "profiles", batch, plate)
os.makedirs(output_dir, exist_ok=True)

In [None]:
out_file = os.path.join(output_dir, "{}.csv".format(plate))
ap.aggregate_profiles(output_file=out_file)

In [None]:
# Annotate Profiles
anno_file = os.path.join(output_dir, "{}_augmented.csv".format(plate))
annotate(
    profiles=out_file,
    platemap=platemap_file,
    join_on=["Metadata_well_position", "Metadata_Well"],
    output_file=anno_file,
)