# Process single cell morphology features for CellProfiler readouts - CellProfiler IC and CellProfiler Cellpose plugin Method

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize, feature_select
from pycytominer.cyto_utils import cells, output

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../../../CellProfiler_pipelines"
output_dir = "../../data/Plate2/CellProfiler"

## Set up paths to sqlite files and outputs

### CellProfiler IC with Cellpose plugin Method

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file_cpcellpose = "NF1_data_cellprofileric_cellpose_plate2.sqlite"
single_cell_file_cpcellpose = f"sqlite:///{cp_dir}/Analysis_Output/Plate2_Output/{sql_file_cpcellpose}"
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP.csv"

# Set path with name for outputted data
sc_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_cellprofileric_cellpose.csv.gz")
sc_norm_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_cellprofileric_cellpose.csv.gz")
sc_norm_fs_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofileric_cellpose.csv.gz")

## Set up names for linking columns between tables in the database file

In [4]:
# Define custom linking columns between compartments
linking_cols = {
    "Per_Cytoplasm": {
        "Per_Cells": "Cytoplasm_Parent_Cells",
        "Per_Nuclei": "Cytoplasm_Parent_Nuclei",
    },
    "Per_Cells": {"Per_Cytoplasm": "Cells_Number_Object_Number"},
    "Per_Nuclei": {"Per_Cytoplasm": "Nuclei_Number_Object_Number"},
}

## Load in platemap

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df

Unnamed: 0,WellRow,WellCol,well_position,gene_name,genotype
0,C,6,C6,NF1,WT
1,C,7,C7,NF1,Null
2,D,6,D6,NF1,WT
3,D,7,D7,NF1,Null
4,E,6,E6,NF1,WT
5,E,7,E7,NF1,Null
6,F,6,F6,NF1,WT
7,F,7,F7,NF1,Null


## Set up `SingleCells` class from Pycytominer

In [6]:
# Instantiate SingleCells class
sc_cpcellpose = cells.SingleCells(
    sql_file=single_cell_file_cpcellpose,
    compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
    compartment_linking_cols=linking_cols,
    image_table_name="Per_Image",
    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
    merge_cols=["ImageNumber"],
    image_cols="ImageNumber",
    load_image_data=True
)



## Merge single cells

In [7]:
# Merge single cells across compartments
anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}

sc_df_cpcellpose = sc_cpcellpose.merge_single_cells(
    platemap=platemap_file,
    **anno_kwargs,
)

# Save level 2 data as a csv
output(sc_df_cpcellpose, sc_output_file_cpcellpose)

print(sc_df_cpcellpose.shape)
sc_df_cpcellpose.head()

(493, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,C,6,NF1,WT,41,1,C6,1,3,1,...,475.176806,525.549525,72.335203,73.637889,72.81023,73.826151,6.340537,6.313753,6.347326,6.393154
1,C,6,NF1,WT,41,1,C6,2,2,2,...,1142.399359,1164.087891,223.207129,194.464502,201.624954,211.771776,28.077448,28.650253,28.983728,27.757807
2,C,6,NF1,WT,41,1,C6,3,7,3,...,192.44441,184.7037,42.660254,44.629217,42.76429,40.824722,13.467157,13.262453,13.492308,13.463394
3,C,6,NF1,WT,41,1,C6,4,4,4,...,387.991757,374.218708,147.924292,143.979505,132.998389,123.425426,23.940806,24.765532,24.998769,23.596867
4,C,6,NF1,WT,41,1,C6,5,6,5,...,519.982522,539.456825,230.84832,233.710128,260.537238,236.023464,19.883753,19.600219,20.333677,20.429527


## Normalize data

In [8]:
# Normalize single cell data and write to file
normalize_sc_cpcellpose = normalize(
    sc_df_cpcellpose,
    method="standardize"
)

output(normalize_sc_cpcellpose, sc_norm_output_file_cpcellpose)

print(normalize_sc_cpcellpose.shape)
normalize_sc_cpcellpose.head()

(493, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,C,6,NF1,WT,41,1,C6,1,3,1,...,-0.600172,-0.564639,-0.73514,-0.723655,-0.738908,-0.727782,-0.481423,-0.475323,-0.480486,-0.475626
1,C,6,NF1,WT,41,1,C6,2,2,2,...,-0.205587,-0.176265,-0.534927,-0.560322,-0.566601,-0.54016,-0.408394,-0.400515,-0.404554,-0.403411
2,C,6,NF1,WT,41,1,C6,3,7,3,...,-0.767376,-0.771949,-0.774519,-0.762869,-0.779098,-0.772667,-0.45748,-0.452051,-0.456519,-0.451728
3,C,6,NF1,WT,41,1,C6,4,4,4,...,-0.651732,-0.656682,-0.63483,-0.628567,-0.658398,-0.660321,-0.422292,-0.413525,-0.417921,-0.417476
4,C,6,NF1,WT,41,1,C6,5,6,5,...,-0.573675,-0.55618,-0.524787,-0.50727,-0.487798,-0.507175,-0.435922,-0.430825,-0.43357,-0.428182


## Feature Selection

In [9]:
feature_select_ops = [
    "variance_threshold",
    "correlation_threshold",
    "blocklist",
]

feature_select_norm_sc_cpcellpose = feature_select(
    normalize_sc_cpcellpose,
    operation=feature_select_ops
)

output(feature_select_norm_sc_cpcellpose, sc_norm_fs_output_file_cpcellpose)

print(feature_select_norm_sc_cpcellpose.shape)
feature_select_norm_sc_cpcellpose.head()

  c /= stddev[:, None]
  c /= stddev[None, :]


(493, 427)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_InfoMeas2_RFP_3_03_256,Nuclei_Texture_InverseDifferenceMoment_DAPI_3_00_256,Nuclei_Texture_InverseDifferenceMoment_GFP_3_01_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_00_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_01_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_02_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_03_256,Nuclei_Texture_SumEntropy_DAPI_3_02_256,Nuclei_Texture_SumVariance_GFP_3_03_256,Nuclei_Texture_SumVariance_RFP_3_03_256
0,C,6,NF1,WT,41,1,C6,1,3,1,...,-0.735204,-0.129843,1.600541,2.281911,2.385328,1.991274,4.01365,-0.514384,-0.681634,-0.422132
1,C,6,NF1,WT,41,1,C6,2,2,2,...,0.333543,-0.436712,-0.077195,0.658621,0.65463,1.110779,2.084356,0.730651,-0.492825,-0.354399
2,C,6,NF1,WT,41,1,C6,3,7,3,...,-1.164611,0.267985,1.257991,1.089733,1.421804,1.755227,2.200269,-1.754412,-0.724768,-0.404119
3,C,6,NF1,WT,41,1,C6,4,4,4,...,-0.033666,0.114998,0.600287,0.889225,0.512373,0.580296,2.0471,-1.182628,-0.639175,-0.372603
4,C,6,NF1,WT,41,1,C6,5,6,5,...,-1.588432,-0.075899,0.800679,1.742889,0.623152,0.482617,0.994045,-0.622814,-0.467766,-0.388658


---

### Visualize basic count statistics

In [10]:
sc_df_cpcellpose.Metadata_genotype.value_counts()

Null    323
WT      170
Name: Metadata_genotype, dtype: int64

In [11]:
pd.crosstab(sc_df_cpcellpose.Metadata_genotype, sc_df_cpcellpose.Metadata_Well)

Metadata_Well,C6,C7,D6,D7,E6,E7,F6,F7
Metadata_genotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Null,0,85,0,79,0,80,0,79
WT,33,0,56,0,40,0,41,0
