# Process single cell morphology features for CellProfiler readouts - CellProfiler IC and CellProfiler Cellpose plugin Method

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize, feature_select
from pycytominer.cyto_utils import cells, output

import sys
sys.path.append("../../")
import sc_count_add_save_util as sc_util

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../../../CellProfiler_pipelines"
output_dir = "../../data/Plate1/CellProfiler"

## Set up paths to sqlite files and outputs

### CellProfiler IC with Cellpose plugin Method

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file_cpcellpose = "NF1_data_cellprofileric_cellpose_plate1.sqlite"
single_cell_file_cpcellpose = f"sqlite:///{cp_dir}/Analysis_Output/Plate1_Output/{sql_file_cpcellpose}"
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP.csv"

# Set path with name for outputted data
sc_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_cellprofileric_cellpose.csv.gz")
sc_norm_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_cellprofileric_cellpose.csv.gz")
sc_norm_fs_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofileric_cellpose.csv.gz")

## Set up names for linking columns between tables in the database file

In [4]:
# Define custom linking columns between compartments
linking_cols = {
    "Per_Cytoplasm": {
        "Per_Cells": "Cytoplasm_Parent_Cells",
        "Per_Nuclei": "Cytoplasm_Parent_Nuclei",
    },
    "Per_Cells": {"Per_Cytoplasm": "Cells_Number_Object_Number"},
    "Per_Nuclei": {"Per_Cytoplasm": "Nuclei_Number_Object_Number"},
}

## Load in platemap

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df

Unnamed: 0,WellRow,WellCol,well_position,gene_name,genotype
0,C,6,C6,NF1,WT
1,C,7,C7,NF1,Null
2,D,6,D6,NF1,WT
3,D,7,D7,NF1,Null
4,E,6,E6,NF1,WT
5,E,7,E7,NF1,Null
6,F,6,F6,NF1,WT
7,F,7,F7,NF1,Null


## Set up `SingleCells` class from Pycytominer

In [6]:
# Instantiate SingleCells class
sc_cpcellpose = cells.SingleCells(
    sql_file=single_cell_file_cpcellpose,
    compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
    compartment_linking_cols=linking_cols,
    image_table_name="Per_Image",
    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
    merge_cols=["ImageNumber"],
    image_cols="ImageNumber",
    load_image_data=True
)



## Merge single cells

In [7]:
# Merge single cells across compartments
anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}

sc_df_cpcellpose = sc_cpcellpose.merge_single_cells(
    platemap=platemap_file,
    **anno_kwargs,
)

# Save level 2 data as a csv
output(sc_df_cpcellpose, sc_output_file_cpcellpose)

print(sc_df_cpcellpose.shape)
sc_df_cpcellpose.head()

(257, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,C,6,NF1,WT,1,1,C6,1,6,1,...,1371.205915,1327.671091,463.681053,425.174641,423.634369,384.280727,76.0638,72.227848,81.856261,81.88001
1,C,6,NF1,WT,1,1,C6,2,5,2,...,1471.60243,1388.983737,386.316701,364.651611,369.681769,340.842867,64.987271,66.324054,71.630671,64.746156
2,C,6,NF1,WT,1,1,C6,3,8,3,...,899.066265,874.641357,204.193102,180.874928,179.586682,181.669577,112.426465,112.414962,110.523332,109.615389
3,C,6,NF1,WT,1,1,C6,4,9,4,...,1344.733717,1321.723403,342.448361,330.435041,338.098718,312.926523,503.704992,506.642578,495.063448,496.033616
4,C,6,NF1,WT,1,1,C6,5,10,5,...,1024.572972,946.064289,227.067426,230.876499,249.01011,219.396316,94.106501,94.467485,97.724948,93.603366


## Normalize data

In [8]:
# Normalize single cell data and write to file
normalize_sc_cpcellpose = normalize(
    sc_df_cpcellpose,
    method="standardize"
)

output(normalize_sc_cpcellpose, sc_norm_output_file_cpcellpose)

print(normalize_sc_cpcellpose.shape)
normalize_sc_cpcellpose.head()

(257, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,C,6,NF1,WT,1,1,C6,1,6,1,...,-0.952489,-0.934866,-0.008782,-0.035372,-0.073675,-0.106998,0.142291,0.112963,0.193601,0.200409
1,C,6,NF1,WT,1,1,C6,2,5,2,...,-0.858995,-0.875179,-0.126563,-0.129052,-0.15544,-0.173501,0.042631,0.05981,0.101546,0.045395
2,C,6,NF1,WT,1,1,C6,3,8,3,...,-1.392171,-1.375884,-0.40383,-0.413508,-0.443528,-0.417193,0.469461,0.474779,0.451674,0.451339
3,C,6,NF1,WT,1,1,C6,4,9,4,...,-0.977142,-0.940656,-0.193349,-0.182013,-0.203304,-0.216241,3.989952,4.024119,3.913472,3.947375
4,C,6,NF1,WT,1,1,C6,5,10,5,...,-1.275293,-1.306355,-0.369006,-0.336114,-0.338317,-0.359434,0.304629,0.313193,0.336458,0.306474


## Feature Selection

In [9]:
feature_select_ops = [
    "variance_threshold",
    "correlation_threshold",
    "blocklist",
]

feature_select_norm_sc_cpcellpose = feature_select(
    normalize_sc_cpcellpose,
    operation=feature_select_ops
)

output(feature_select_norm_sc_cpcellpose, sc_norm_fs_output_file_cpcellpose)

print(feature_select_norm_sc_cpcellpose.shape)
feature_select_norm_sc_cpcellpose.head()

  c /= stddev[:, None]
  c /= stddev[None, :]


(257, 450)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_InfoMeas2_DAPI_3_03_256,Nuclei_Texture_InfoMeas2_GFP_3_00_256,Nuclei_Texture_InfoMeas2_GFP_3_02_256,Nuclei_Texture_InfoMeas2_RFP_3_00_256,Nuclei_Texture_InfoMeas2_RFP_3_01_256,Nuclei_Texture_InfoMeas2_RFP_3_03_256,Nuclei_Texture_InverseDifferenceMoment_GFP_3_00_256,Nuclei_Texture_SumEntropy_DAPI_3_03_256,Nuclei_Texture_SumVariance_DAPI_3_03_256,Nuclei_Texture_SumVariance_RFP_3_01_256
0,C,6,NF1,WT,1,1,C6,1,6,1,...,0.130646,0.455761,0.616154,-0.621821,-0.562829,0.935871,-0.43297,0.830093,-0.795787,0.039039
1,C,6,NF1,WT,1,1,C6,2,5,2,...,0.470856,0.341203,0.950283,-0.66442,0.077871,0.019161,-0.910537,0.984223,-0.745304,0.030197
2,C,6,NF1,WT,1,1,C6,3,8,3,...,-0.094306,0.128028,-0.203957,0.027574,0.739001,0.506915,0.310293,0.551522,-1.311842,0.456768
3,C,6,NF1,WT,1,1,C6,4,9,4,...,0.420691,-0.032242,0.320852,0.89641,1.044477,1.442472,-0.529534,0.828875,-0.790665,3.497173
4,C,6,NF1,WT,1,1,C6,5,10,5,...,-0.00352,0.006622,0.408656,0.304764,0.599886,0.389306,0.005647,0.675189,-1.247019,0.299256


## Add single cell count metadata and save csv

In [10]:
sc_util.add_sc_count_metadata(sc_output_file_cpcellpose)
sc_util.add_sc_count_metadata(sc_norm_output_file_cpcellpose)
sc_util.add_sc_count_metadata(sc_norm_fs_output_file_cpcellpose)

---

### Visualize basic count statistics

In [11]:
sc_df_cpcellpose.Metadata_genotype.value_counts()

Null    181
WT       76
Name: Metadata_genotype, dtype: int64

In [12]:
pd.crosstab(sc_df_cpcellpose.Metadata_genotype, sc_df_cpcellpose.Metadata_Well)

Metadata_Well,C6,C7,D6,D7,E6,E7,F6,F7
Metadata_genotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Null,0,23,0,32,0,59,0,67
WT,26,0,7,0,17,0,26,0
