In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gc


from utils import (
    read_from_gallery,
    create_image_paths,
    filter_out_edge_single_cells,
    extract_single_cell_samples,
    viz_pooled,
)

## Set parameters

In [None]:
# ########################## set batch to use
batch = "20200805_A549_WG_Screen" # for A549
# batch = "20210422_6W_CP257" # for HeLa

########################## save generated images
resultsDir = "./outputs/"
dpi_quality = 100  # 500 used for paper images

#################### options for parameters to set
n_cells = 10
box_size = 100

# how we select cells which can be 'random','representative'
cell_selection_method = "representative"

#################### Set list of gene/guides subject to single cells visualization
lst_2gen = [
    "ATP6V1A",
    "ATP6V1B2",
    "ATP6V1G1",
    "NUS1",
    "TMTC3",
    "DPM1",
    "SAMM50",
    "TOMM22",
    "TOMM20",
    "RIMS1",
    "ACTN4",
    "WDR1",
    "POLD2",
    "POLE",
    "POLD4",
    "nontargeting",
]

nt_guides = [
    "TCCCTGCATTCATGGTTTTA",
    "ACCCATGAGTTAAGTTTTCT",
    "TTATATGGTTTTAAACGGCT",
    "CAGTCGTTTCTATGGGATCT",
    "TTCGCACGATTGCACCTTGG",
    "ATAAGCCACACTACCCGCCT",
    "ACTATCATGGCACCCAATTG",
    "GACTGAAATCCAAGGACTGT",
    "ATGTCTAGACCTAATCGTTT",
    "GTAAACTTTGTCTGGAGTAT",
]

## Project specific paths

In [None]:
########################## set directories/params when reading directly from cp gallery
rootDir = "cpg0021-periscope/broad/"

batch_multi_name_dict = {
    "20210422_6W_CP257": "HeLa",
    "20200805_A549_WG_Screen": "A549",
}

######################### set alterative names used for path to each batch data which is set above
batch_alter_name = batch_multi_name_dict[batch]

######################### set path to single cell profiles
sc_files_dir_formatter = (
    "{0}workspace/profiles/{1}" "/single_cell_by_guide/"
)  # for cp gallery

sc_files_dir = sc_files_dir_formatter.format(rootDir, batch_alter_name, batch)
im_size = 5500

#################### options for parameters to set
channels = ["DNA", "Mito", "Phalloidin", "WGA", "ER"]

##################### read metadata and create the file name for input guide or gene
metadata_orig = pd.read_csv("../common_files/Barcodes.csv")

paths = {
    "root_dir": rootDir,
    "batch_folder": batch,
}

meta_cols = {
    "site": "Metadata_Foci_site_location",
    "well": "Metadata_Foci_well",
    "plate": "Metadata_Foci_plate",
}

viz_cols = {
    "center_indicator_columns_x_y": [
        "Cells_AreaShape_Center_X",
        "Cells_AreaShape_Center_Y",
    ],
    "image_path_column_prefix": "PathName_Corr",
    "image_name_column_prefix": "FileName_Corr",
}

fix_paths_params = {
    "paths": paths,
    "meta_cols": meta_cols,
    "viz_cols": viz_cols,
}

## Loop over visualization script

In [None]:
for input_gene in lst_2gen:
    #### get guides corresponding to the input gene by checking the barcode library reference table
    gene_guids_ls = metadata_orig[
        metadata_orig["gene_symbol"] == input_gene
    ].sgRNA.tolist()

    if input_gene == "nontargeting":
        gene_guids_ls = nt_guides.copy()

    for gi in gene_guids_ls:

        df_p_s = read_from_gallery.read_csv_gzip(
            sc_files_dir
            + batch
            + "_single_cell_normalized_ALLBATCHES__"
            + gi
            + "_"
            + input_gene
            + ".csv.gz"
        )

        df_p_s = create_image_paths.custom_create_image_path_cols(
            fix_paths_params, df_p_s, channels
        )

        df_p_s["Nuclei_Location_Center_X"] = df_p_s["Cells_AreaShape_Center_X"]
        df_p_s["Nuclei_Location_Center_Y"] = df_p_s["Cells_AreaShape_Center_Y"]

        df_p_s, _ = filter_out_edge_single_cells.edgeCellFilter(
            df_p_s, im_size, box_size / 2
        )

        if df_p_s.shape[0] > 0:
            (
                df_samples,
                cp_features_analysis,
            ) = extract_single_cell_samples.extract_single_cell_samples(
                df_p_s, n_cells, cell_selection_method
            )

            resdir = resultsDir + input_gene  # +'/'+gi.split('_')[-2]
            os.system("mkdir -p " + resdir)

            max_dist_of_neigh = 400
            neigh_center_ls = viz_pooled.extract_neighbor_cells_center(
                df_p_s, df_samples, max_dist_of_neigh
            )

            df_samples["label"] = (
                df_p_s["Metadata_Foci_well"]
                + "-"
                + df_p_s["Metadata_Foci_site_location"].astype(str)
            )

            fig = viz_pooled.visualize_n_SingleCell_pooled(
                channels,
                df_samples,
                box_size,
                im_size,
                outline=True,
                color=True,
                title=input_gene + "_" + cell_selection_method,
                neigh_center_ls=neigh_center_ls,
            )
            fig.savefig(resdir + "/" + gi + ".png", dpi=dpi_quality)
            plt.close("all")