# Process single cell morphology features for CellProfiler readouts - PyBaSiC and CellProfiler Cellpose plugin Method

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize, feature_select
from pycytominer.cyto_utils import cells, output

import sys
sys.path.append("../../")
import sc_count_add_save_util as sc_util

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../../../CellProfiler_pipelines"
output_dir = "../../data/Plate2/CellProfiler"

## Set up paths to sqlite files and outputs

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file_pbcellpose = "NF1_data_pybasic_cellpose_plate2.sqlite"
single_cell_file_pbcellpose = f"sqlite:///{cp_dir}/Analysis_Output/Plate2_Output/{sql_file_pbcellpose}"
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP_Plate2.csv"

# Set path with name for outputted data
sc_output_file_pbcellpose = pathlib.Path(f"{output_dir}/nf1_sc_pybasic_cellpose.csv.gz")
sc_norm_output_file_pbcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_pybasic_cellpose.csv.gz")
sc_norm_fs_output_file_pbcellpose = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_pybasic_cellpose.csv.gz")

## Set up names for linking columns between tables in the database file

In [4]:
# Define custom linking columns between compartments
linking_cols = {
    "Per_Cytoplasm": {
        "Per_Cells": "Cytoplasm_Parent_Cells",
        "Per_Nuclei": "Cytoplasm_Parent_Nuclei",
    },
    "Per_Cells": {"Per_Cytoplasm": "Cells_Number_Object_Number"},
    "Per_Nuclei": {"Per_Cytoplasm": "Nuclei_Number_Object_Number"},
}

## Load in platemap

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df.head()

Unnamed: 0,WellRow,WellCol,well_position,gene_name,genotype
0,A,1,A1,NF1,WT
1,A,6,A6,NF1,WT
2,A,7,A7,NF1,Null
3,A,12,A12,NF1,Null
4,B,1,B1,NF1,WT


## Set up `SingleCells` class from Pycytominer

In [6]:
# Instantiate SingleCells class
sc_pbcellpose = cells.SingleCells(
    sql_file=single_cell_file_pbcellpose,
    compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
    compartment_linking_cols=linking_cols,
    image_table_name="Per_Image",
    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
    merge_cols=["ImageNumber"],
    image_cols="ImageNumber",
    load_image_data=True
)



## Merge single cells 

In [7]:
# Merge single cells across compartments
anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}

sc_df_pbcellpose = sc_pbcellpose.merge_single_cells(
    platemap=platemap_file,
    **anno_kwargs,
)

# Save level 2 data as a csv
output(sc_df_pbcellpose, sc_output_file_pbcellpose)

print(sc_df_pbcellpose.shape)
sc_df_pbcellpose.head()

(1741, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,A,1,NF1,WT,6,1,A1,1,1,1,...,602.697166,595.49861,119.301644,101.646828,97.827848,98.423505,29.803689,28.960385,31.196463,31.548274
1,A,1,NF1,WT,6,1,A1,2,4,2,...,501.536876,468.256611,97.638189,111.762728,117.795551,94.45048,51.46291,51.272491,52.117749,51.550885
2,A,1,NF1,WT,6,1,A1,3,2,3,...,475.463505,462.056318,169.23108,167.313315,176.850622,176.279607,27.864464,28.638993,27.903653,28.235352
3,A,1,NF1,WT,6,1,A1,4,3,4,...,425.666168,424.39526,93.068826,82.513298,83.184941,84.086413,51.345956,50.969933,50.290894,50.525207
4,A,1,NF1,WT,6,1,A1,5,5,5,...,343.964329,318.061708,95.125235,104.730039,105.725569,88.664175,33.542562,32.000222,31.910026,32.225173


## Normalize data

In [8]:
# Normalize single cell data and write to file
normalize_sc_pbcellpose = normalize(
    sc_df_pbcellpose,
    method="standardize"
)

output(normalize_sc_pbcellpose, sc_norm_output_file_pbcellpose)

print(normalize_sc_pbcellpose.shape)
normalize_sc_pbcellpose.head()

(1741, 1207)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,A,1,NF1,WT,6,1,A1,1,1,1,...,-0.433429,-0.43073,-0.575551,-0.600653,-0.625108,-0.610136,-0.461844,-0.459639,-0.458837,-0.451769
1,A,1,NF1,WT,6,1,A1,2,4,2,...,-0.50102,-0.518009,-0.62063,-0.579053,-0.583352,-0.618531,-0.368219,-0.362614,-0.367705,-0.363731
2,A,1,NF1,WT,6,1,A1,3,2,3,...,-0.518441,-0.522262,-0.471652,-0.460442,-0.459858,-0.445624,-0.470227,-0.461037,-0.47318,-0.46635
3,A,1,NF1,WT,6,1,A1,4,3,4,...,-0.551713,-0.548095,-0.630139,-0.641507,-0.655729,-0.640431,-0.368725,-0.363929,-0.375663,-0.368245
4,A,1,NF1,WT,6,1,A1,5,5,5,...,-0.606303,-0.621032,-0.62586,-0.59407,-0.608593,-0.630758,-0.445683,-0.44642,-0.455728,-0.448789


## Feature selection

In [9]:
feature_select_ops = [
    "variance_threshold",
    "correlation_threshold",
    "blocklist",
]

feature_select_norm_sc_pbcellpose = feature_select(
    normalize_sc_pbcellpose,
    operation=feature_select_ops
)

output(feature_select_norm_sc_pbcellpose, sc_norm_fs_output_file_pbcellpose)

print(feature_select_norm_sc_pbcellpose.shape)
feature_select_norm_sc_pbcellpose.head()

(1741, 407)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_InfoMeas2_RFP_3_00_256,Nuclei_Texture_InfoMeas2_RFP_3_01_256,Nuclei_Texture_InfoMeas2_RFP_3_02_256,Nuclei_Texture_InfoMeas2_RFP_3_03_256,Nuclei_Texture_InverseDifferenceMoment_DAPI_3_00_256,Nuclei_Texture_InverseDifferenceMoment_GFP_3_02_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_02_256,Nuclei_Texture_SumEntropy_DAPI_3_02_256,Nuclei_Texture_SumVariance_GFP_3_03_256,Nuclei_Texture_SumVariance_RFP_3_03_256
0,A,1,NF1,WT,6,1,A1,1,1,1,...,0.982895,0.401427,0.274274,0.435056,-0.400704,0.652682,1.232191,-0.145207,-0.592446,-0.397477
1,A,1,NF1,WT,6,1,A1,2,4,2,...,-0.286733,-0.407529,0.106162,0.800195,0.234834,1.606299,0.291835,-0.664826,-0.624545,-0.308666
2,A,1,NF1,WT,6,1,A1,3,2,3,...,-2.066634,0.647804,-2.442599,-2.339242,-0.131006,1.751438,-0.204811,-0.092976,-0.41696,-0.458684
3,A,1,NF1,WT,6,1,A1,4,3,4,...,1.148035,1.040197,0.658763,0.638214,0.09956,1.059569,1.216109,-0.270424,-0.600853,-0.318831
4,A,1,NF1,WT,6,1,A1,5,5,5,...,0.707742,0.896977,1.068174,0.834874,0.019335,1.674967,3.767631,-0.613818,-0.597974,-0.387275


## Add single cell count metadata and save csv

In [10]:
sc_util.add_sc_count_metadata(sc_output_file_pbcellpose)
sc_util.add_sc_count_metadata(sc_norm_output_file_pbcellpose)
sc_util.add_sc_count_metadata(sc_norm_fs_output_file_pbcellpose)

---

### Visualize basic count statistics

In [11]:
sc_df_pbcellpose.Metadata_genotype.value_counts()

Null    1024
WT       717
Name: Metadata_genotype, dtype: int64

In [12]:
pd.crosstab(sc_df_pbcellpose.Metadata_genotype, sc_df_pbcellpose.Metadata_Well)

Metadata_Well,A1,A12,A6,A7,B1,B12,B6,B7,C1,C12,...,F6,F7,G1,G12,G6,G7,H1,H12,H6,H7
Metadata_genotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Null,0,51,0,58,0,50,0,62,0,66,...,0,78,0,54,0,68,0,52,0,62
WT,53,0,51,0,53,0,51,0,56,0,...,41,0,58,0,38,0,30,0,28,0
