# Process single cell morphology features for CellProfiler readouts - CellProfiler IC and CellProfiler Cellpose plugin Method

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize, feature_select
from pycytominer.cyto_utils import cells, output

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../../../CellProfiler_pipelines"
output_dir = "../../data/Plate2/CellProfiler"

## Set up paths to sqlite files and outputs

### CellProfiler IC with Cellpose plugin Method

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file_cpcellpose = "NF1_data_cellprofileric_cellpose_plate2.sqlite"
single_cell_file_cpcellpose = f"sqlite:///{cp_dir}/Analysis_Output/Plate2_Output/{sql_file_cpcellpose}"
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP_Plate2.csv"

# Set path with name for outputted data
sc_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_cellprofileric_cellpose.csv.gz")
sc_norm_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_cellprofileric_cellpose.csv.gz")
sc_norm_fs_output_file_cpcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofileric_cellpose.csv.gz")

## Set up names for linking columns between tables in the database file

In [4]:
# Define custom linking columns between compartments
linking_cols = {
    "Per_Cytoplasm": {
        "Per_Cells": "Cytoplasm_Parent_Cells",
        "Per_Nuclei": "Cytoplasm_Parent_Nuclei",
    },
    "Per_Cells": {"Per_Cytoplasm": "Cells_Number_Object_Number"},
    "Per_Nuclei": {"Per_Cytoplasm": "Nuclei_Number_Object_Number"},
}

## Load in platemap

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df

Unnamed: 0,WellRow,WellCol,well_position,gene_name,genotype
0,A,1,A1,NF1,WT
1,A,6,A6,NF1,WT
2,A,7,A7,NF1,Null
3,A,12,A12,NF1,Null
4,B,1,B1,NF1,WT
5,B,6,B6,NF1,WT
6,B,7,B7,NF1,Null
7,B,12,B12,NF1,Null
8,C,1,C1,NF1,WT
9,C,6,C6,NF1,WT


## Set up `SingleCells` class from Pycytominer

In [6]:
# Instantiate SingleCells class
sc_cpcellpose = cells.SingleCells(
    sql_file=single_cell_file_cpcellpose,
    compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
    compartment_linking_cols=linking_cols,
    image_table_name="Per_Image",
    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
    merge_cols=["ImageNumber"],
    image_cols="ImageNumber",
    load_image_data=True
)



## Merge single cells

In [7]:
# Merge single cells across compartments
anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}

sc_df_cpcellpose = sc_cpcellpose.merge_single_cells(
    platemap=platemap_file,
    **anno_kwargs,
)

# Save level 2 data as a csv
output(sc_df_cpcellpose, sc_output_file_cpcellpose)

print(sc_df_cpcellpose.shape)
sc_df_cpcellpose.head()

(1757, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,A,1,NF1,WT,5,1,A1,1,2,1,...,546.566238,545.311482,179.355278,170.285271,190.127059,171.917906,33.451818,33.475314,34.898182,34.62205
1,A,1,NF1,WT,5,1,A1,2,8,2,...,283.203327,271.588888,77.415127,72.579388,70.084109,71.93685,42.654557,43.833944,43.929823,42.424157
2,A,1,NF1,WT,5,1,A1,3,5,3,...,376.172521,347.224223,82.439961,75.946605,88.647043,82.828155,23.788771,24.347087,23.64394,23.427712
3,A,1,NF1,WT,5,1,A1,4,6,4,...,583.858595,591.669958,111.904961,113.233117,120.380393,117.977028,27.902491,28.080917,27.944528,27.849656
4,A,1,NF1,WT,5,1,A1,5,7,5,...,319.863538,314.19419,51.135861,52.972429,52.862503,51.59115,48.756821,45.531392,47.211641,49.60698


## Normalize data

In [8]:
# Normalize single cell data and write to file
normalize_sc_cpcellpose = normalize(
    sc_df_cpcellpose,
    method="standardize"
)

output(normalize_sc_cpcellpose, sc_norm_output_file_cpcellpose)

print(normalize_sc_cpcellpose.shape)
normalize_sc_cpcellpose.head()

(1757, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,A,1,NF1,WT,5,1,A1,1,2,1,...,-0.48658,-0.48148,-0.535624,-0.534374,-0.521832,-0.537811,-0.365926,-0.359841,-0.360915,-0.356286
1,A,1,NF1,WT,5,1,A1,2,8,2,...,-0.651505,-0.657519,-0.690213,-0.685694,-0.704159,-0.691887,-0.324297,-0.312825,-0.319869,-0.320526
2,A,1,NF1,WT,5,1,A1,3,5,3,...,-0.593285,-0.608876,-0.682593,-0.680479,-0.675964,-0.675103,-0.409639,-0.401273,-0.412062,-0.407594
3,A,1,NF1,WT,5,1,A1,4,6,4,...,-0.463226,-0.451665,-0.63791,-0.622733,-0.627766,-0.620936,-0.39103,-0.384325,-0.392517,-0.387327
4,A,1,NF1,WT,5,1,A1,5,7,5,...,-0.628547,-0.630118,-0.730064,-0.71606,-0.730316,-0.72324,-0.296692,-0.305121,-0.304954,-0.287605


## Feature Selection

In [9]:
feature_select_ops = [
    "variance_threshold",
    "correlation_threshold",
    "blocklist",
]

feature_select_norm_sc_cpcellpose = feature_select(
    normalize_sc_cpcellpose,
    operation=feature_select_ops
)

output(feature_select_norm_sc_cpcellpose, sc_norm_fs_output_file_cpcellpose)

print(feature_select_norm_sc_cpcellpose.shape)
feature_select_norm_sc_cpcellpose.head()

(1757, 406)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_InfoMeas2_RFP_3_00_256,Nuclei_Texture_InfoMeas2_RFP_3_01_256,Nuclei_Texture_InfoMeas2_RFP_3_02_256,Nuclei_Texture_InfoMeas2_RFP_3_03_256,Nuclei_Texture_InverseDifferenceMoment_DAPI_3_00_256,Nuclei_Texture_InverseDifferenceMoment_GFP_3_01_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_02_256,Nuclei_Texture_SumEntropy_DAPI_3_02_256,Nuclei_Texture_SumVariance_GFP_3_03_256,Nuclei_Texture_SumVariance_RFP_3_03_256
0,A,1,NF1,WT,5,1,A1,1,2,1,...,-0.955233,-1.16647,-0.214206,0.552115,0.000607,1.09218,0.385774,-0.518238,-0.538285,-0.312174
1,A,1,NF1,WT,5,1,A1,2,8,2,...,1.105931,1.175079,1.249703,1.325019,0.331432,0.834473,3.239866,-0.933359,-0.655349,-0.25551
2,A,1,NF1,WT,5,1,A1,3,5,3,...,-0.454435,1.048243,0.22542,-0.569954,-0.085186,0.995599,1.304649,-0.768525,-0.637812,-0.366693
3,A,1,NF1,WT,5,1,A1,4,6,4,...,0.5395,-1.847431,-2.272379,-1.619573,-0.130778,0.893694,-0.677914,-0.059198,-0.59707,-0.375866
4,A,1,NF1,WT,5,1,A1,5,7,5,...,0.876531,-0.830452,-0.895053,-0.103098,0.248717,1.015055,-0.569338,-0.806808,-0.691142,-0.259737


---

### Visualize basic count statistics

In [10]:
sc_df_cpcellpose.Metadata_genotype.value_counts()

Null    1030
WT       727
Name: Metadata_genotype, dtype: int64

In [11]:
pd.crosstab(sc_df_cpcellpose.Metadata_genotype, sc_df_cpcellpose.Metadata_Well)

Metadata_Well,A1,A12,A6,A7,B1,B12,B6,B7,C1,C12,...,F6,F7,G1,G12,G6,G7,H1,H12,H6,H7
Metadata_genotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Null,0,51,0,59,0,50,0,65,0,66,...,0,79,0,54,0,68,0,53,0,62
WT,54,0,52,0,54,0,52,0,57,0,...,41,0,59,0,38,0,30,0,28,0
