In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import matplotlib.pyplot as plt

from utils import (
    read_from_gallery,
    create_image_paths,
    filter_out_edge_single_cells,
    extract_single_cell_samples,
    viz_pooled,
)

In [2]:
# Output results to this folder
resultsDir = "./outputs/"

## Set parameters

In [3]:
# Which experiment? "A549", "HeLa_DMEM", "HeLa_HPLM"
batch = "HeLa_DMEM"

# Figure parameters:
n_cells = 2 # number of cells to return
box_size = 200 # in pixels, size of bounding box to crop around cells
dpi_quality = 100  # 500 used for paper images

# Should returned cells be 'random' or 'representative'
cell_selection_method = "representative"

# Which channels would you like to return?
channels = ["DNA", "Mito", "Phalloidin", "WGA", "ER"]

# What genes/guides would you like to return?
lst_2gen = [
    "ATP6V1A",
    "ATP6V1B2",
    "ATP6V1G1",
    "nontargeting",
]

nt_guides = [
    "TCCCTGCATTCATGGTTTTA",
    "ACCCATGAGTTAAGTTTTCT",
    "TTATATGGTTTTAAACGGCT",
    "CAGTCGTTTCTATGGGATCT",
    "TTCGCACGATTGCACCTTGG",
    "ATAAGCCACACTACCCGCCT",
    "ACTATCATGGCACCCAATTG",
    "GACTGAAATCCAAGGACTGT",
    "ATGTCTAGACCTAATCGTTT",
    "GTAAACTTTGTCTGGAGTAT",
]

## Project specific paths

In [4]:
########################## set directories/params when reading directly from CellPainting Gallery
rootDir = "cpg0021-periscope/broad/"
batch_dict = {
    "HeLa_DMEM": "20210422_6W_CP257",
    "HeLa_DMEM": "20210422_6W_CP257",
    "A549": "20200805_A549_WG_Screen",
}
batch_code = batch_dict[batch]

if batch == "HeLa_DMEM":
    platefilter = ["CP257A", "CP257B", "CP257D", "CP257F", "CP257H"]
elif batch == "HeLa_HPLM":
    platefilter = ["CP257J", "CP257K", "CP257L", "CP257N"]
else:
    platefilter = False


######################### set path to single cell profiles
sc_files_dir = f"cpg0021-periscope/broad/workspace/profiles/{batch.split('_')[0]}/single_cell_by_guide/"
im_size = 5500

##################### read metadata and create the file name for input guide or gene
metadata_orig = pd.read_csv("../common_files/Barcodes.csv")

paths = {
    "root_dir": rootDir,
    "batch_folder": batch_code,
}

meta_cols = {
    "site": "Metadata_Foci_site_location",
    "well": "Metadata_Foci_well",
    "plate": "Metadata_Foci_plate",
}

viz_cols = {
    "center_indicator_columns_x_y": [
        "Cells_AreaShape_Center_X",
        "Cells_AreaShape_Center_Y",
    ],
    "image_path_column_prefix": "PathName_Corr",
    "image_name_column_prefix": "FileName_Corr",
}

fix_paths_params = {
    "paths": paths,
    "meta_cols": meta_cols,
    "viz_cols": viz_cols,
}

## Loop over visualization script

In [5]:
for input_gene in lst_2gen:
    print(f"Creating figures for {input_gene}")
    #### get guides corresponding to the input gene by checking the barcode library reference table
    gene_guids_ls = metadata_orig[
        metadata_orig["gene_symbol"] == input_gene
    ].sgRNA.tolist()

    if input_gene == "nontargeting":
        gene_guids_ls = nt_guides.copy()

    for gi in gene_guids_ls:
        df_p_s = read_from_gallery.read_csv_gzip(
            os.path.join(
                sc_files_dir,
                f"{batch_code}_single_cell_normalized_ALLBATCHES__{gi}_{input_gene}.csv.gz",
            )
        )

        df_p_s = create_image_paths.custom_create_image_path_cols(
            fix_paths_params, df_p_s, channels
        )

        df_p_s["Nuclei_Location_Center_X"] = df_p_s["Cells_AreaShape_Center_X"]
        df_p_s["Nuclei_Location_Center_Y"] = df_p_s["Cells_AreaShape_Center_Y"]

        df_p_s, _ = filter_out_edge_single_cells.edgeCellFilter(
            df_p_s, im_size, box_size / 2
        )

        if df_p_s.shape[0] > 0:
            (
                df_samples,
                cp_features_analysis,
            ) = extract_single_cell_samples.extract_single_cell_samples(
                df_p_s, n_cells, cell_selection_method, platefilter
            )

            max_dist_of_neigh = 400
            neigh_center_ls = viz_pooled.extract_neighbor_cells_center(
                df_p_s, df_samples, max_dist_of_neigh, channels
            )

            df_samples["label"] = (
                df_p_s["Metadata_Foci_well"]
                + "-"
                + df_p_s["Metadata_Foci_site_location"].astype(str)
            )

            fig = viz_pooled.visualize_n_SingleCell_pooled(
                channels,
                df_samples,
                box_size,
                im_size,
                outline=True,
                color=True,
                title=input_gene + "_" + cell_selection_method,
                neigh_center_ls=neigh_center_ls,
            )

            resdir = os.path.join(resultsDir, batch, input_gene)
            os.system("mkdir -p " + resdir)
            fig.savefig(os.path.join(resdir, f"{gi}.png"), dpi=dpi_quality)
            plt.close("all")

Creating figure for ATP6V1A
