# Process single cell morphology features for CellProfiler readouts - CellProfiler IC and CellProfiler Cellpose plugin Method

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize, feature_select
from pycytominer.cyto_utils import cells, output

import sys
sys.path.append("../../")
import sc_count_add_save_util as sc_util

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../../../CellProfiler_pipelines"
output_dir = "../../data/Plate2/CellProfiler"

## Set up paths to sqlite files and outputs

### CellProfiler IC with Cellpose plugin Method

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file_cpcellpose = "NF1_data_cellprofileric_cellpose_plate2.sqlite"
single_cell_file_cpcellpose = f"sqlite:///{cp_dir}/Analysis_Output/Plate2_Output/{sql_file_cpcellpose}"
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP_Plate2.csv"

# Set path with name for outputted data
sc_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_cellprofileric_cellpose.csv.gz")
sc_norm_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_cellprofileric_cellpose.csv.gz")
sc_norm_fs_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofileric_cellpose.csv.gz")

## Set up names for linking columns between tables in the database file

In [4]:
# Define custom linking columns between compartments
linking_cols = {
    "Per_Cytoplasm": {
        "Per_Cells": "Cytoplasm_Parent_Cells",
        "Per_Nuclei": "Cytoplasm_Parent_Nuclei",
    },
    "Per_Cells": {"Per_Cytoplasm": "Cells_Number_Object_Number"},
    "Per_Nuclei": {"Per_Cytoplasm": "Nuclei_Number_Object_Number"},
}

## Load in platemap

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df.head()

Unnamed: 0,WellRow,WellCol,well_position,gene_name,genotype
0,A,1,A1,NF1,WT
1,A,6,A6,NF1,WT
2,A,7,A7,NF1,Null
3,A,12,A12,NF1,Null
4,B,1,B1,NF1,WT


## Set up `SingleCells` class from Pycytominer

In [6]:
# Instantiate SingleCells class
sc_cpcellpose = cells.SingleCells(
    sql_file=single_cell_file_cpcellpose,
    compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
    compartment_linking_cols=linking_cols,
    image_table_name="Per_Image",
    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
    merge_cols=["ImageNumber"],
    image_cols="ImageNumber",
    load_image_data=True
)



## Merge single cells

In [7]:
# Merge single cells across compartments
anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}

sc_df_cpcellpose = sc_cpcellpose.merge_single_cells(
    platemap=platemap_file,
    **anno_kwargs,
)

# Save level 2 data as a csv
output(sc_df_cpcellpose, sc_output_file_cpcellpose)

print(sc_df_cpcellpose.shape)
sc_df_cpcellpose.head()

(1757, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,A,1,NF1,WT,8,1,A1,1,5,1,...,1093.070214,1079.311705,580.758117,537.462263,553.897511,527.086017,62.665736,63.68995,63.107709,61.913081
1,A,1,NF1,WT,8,1,A1,2,8,2,...,399.623133,401.234592,346.296405,345.287094,331.281251,327.218494,46.77955,44.785469,46.493762,47.267042
2,A,1,NF1,WT,8,1,A1,3,7,3,...,873.912673,891.411424,262.248684,257.68102,276.816239,256.228447,33.590363,31.968422,32.30821,31.978696
3,A,1,NF1,WT,8,1,A1,4,9,4,...,301.230999,285.319856,80.34594,70.427325,71.994455,72.141145,11.481529,10.739958,10.825473,11.11952
4,A,1,NF1,WT,8,1,A1,5,11,5,...,599.492139,609.713374,476.092746,486.676778,488.214079,460.007369,82.573745,83.273873,83.468478,83.714484


## Normalize data

In [8]:
# Normalize single cell data and write to file
normalize_sc_cpcellpose = normalize(
    sc_df_cpcellpose,
    method="standardize"
)

output(normalize_sc_cpcellpose, sc_norm_output_file_cpcellpose)

print(normalize_sc_cpcellpose.shape)
normalize_sc_cpcellpose.head()

(1757, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,A,1,NF1,WT,8,1,A1,1,5,1,...,-0.144344,-0.138047,0.073091,0.034282,0.030681,0.009522,-0.233773,-0.222703,-0.232712,-0.231201
1,A,1,NF1,WT,8,1,A1,2,8,2,...,-0.5786,-0.57414,-0.282463,-0.263345,-0.30744,-0.298484,-0.305637,-0.308507,-0.308217,-0.298329
2,A,1,NF1,WT,8,1,A1,3,7,3,...,-0.281587,-0.258892,-0.409919,-0.399022,-0.390164,-0.407884,-0.3653,-0.366681,-0.372685,-0.368402
3,A,1,NF1,WT,8,1,A1,4,9,4,...,-0.640216,-0.648688,-0.685768,-0.689027,-0.701257,-0.691572,-0.465312,-0.463033,-0.470317,-0.464007
4,A,1,NF1,WT,8,1,A1,5,11,5,...,-0.453436,-0.440061,-0.085631,-0.044371,-0.069082,-0.09385,-0.143716,-0.133815,-0.140179,-0.131277


## Feature Selection

In [9]:
feature_select_ops = [
    "variance_threshold",
    "correlation_threshold",
    "blocklist",
]

feature_select_norm_sc_cpcellpose = feature_select(
    normalize_sc_cpcellpose,
    operation=feature_select_ops
)

output(feature_select_norm_sc_cpcellpose, sc_norm_fs_output_file_cpcellpose)

print(feature_select_norm_sc_cpcellpose.shape)
feature_select_norm_sc_cpcellpose.head()

(1757, 406)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_InfoMeas2_RFP_3_00_256,Nuclei_Texture_InfoMeas2_RFP_3_01_256,Nuclei_Texture_InfoMeas2_RFP_3_02_256,Nuclei_Texture_InfoMeas2_RFP_3_03_256,Nuclei_Texture_InverseDifferenceMoment_DAPI_3_00_256,Nuclei_Texture_InverseDifferenceMoment_GFP_3_01_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_02_256,Nuclei_Texture_SumEntropy_DAPI_3_02_256,Nuclei_Texture_SumVariance_GFP_3_03_256,Nuclei_Texture_SumVariance_RFP_3_03_256
0,A,1,NF1,WT,8,1,A1,1,5,1,...,0.306991,-0.007365,-0.039747,0.073103,-0.142482,-0.753251,-0.292874,0.653397,-0.018765,-0.224062
1,A,1,NF1,WT,8,1,A1,2,8,2,...,0.973939,0.902178,1.102743,1.312551,-0.064042,-0.871329,1.786631,-0.571268,-0.297746,-0.234098
2,A,1,NF1,WT,8,1,A1,3,7,3,...,-0.409051,-0.813421,-0.988922,-0.500032,-0.194744,0.68035,-0.091834,0.40083,-0.409966,-0.337452
3,A,1,NF1,WT,8,1,A1,4,9,4,...,-1.361902,0.181611,-0.217133,-1.271409,-0.03161,1.133536,2.262297,-0.994862,-0.661344,-0.417266
4,A,1,NF1,WT,8,1,A1,5,11,5,...,-0.036696,-0.384456,-0.312975,0.466853,-0.092237,-0.633493,-1.066135,-0.478234,-0.139506,-0.11057


## Add single cell count metadata and save csv

In [10]:
sc_util.add_sc_count_metadata(sc_output_file_cpcellpose)
sc_util.add_sc_count_metadata(sc_norm_output_file_cpcellpose)
sc_util.add_sc_count_metadata(sc_norm_fs_output_file_cpcellpose)

---

### Visualize basic count statistics

In [11]:
sc_df_cpcellpose.Metadata_genotype.value_counts()

Null    1030
WT       727
Name: Metadata_genotype, dtype: int64

In [12]:
pd.crosstab(sc_df_cpcellpose.Metadata_genotype, sc_df_cpcellpose.Metadata_Well)

Metadata_Well,A1,A12,A6,A7,B1,B12,B6,B7,C1,C12,...,F6,F7,G1,G12,G6,G7,H1,H12,H6,H7
Metadata_genotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Null,0,51,0,59,0,50,0,65,0,66,...,0,79,0,54,0,68,0,53,0,62
WT,54,0,52,0,54,0,52,0,57,0,...,41,0,59,0,38,0,30,0,28,0
