# Process CFReT single cell morphology features from CellProfiler readout

## Import libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize
from pycytominer.cyto_utils import cells, output

## Set up paths

In [2]:
# Set file and directory constants
cp_dir = "../2.cellprofiler_processing"
output_dir = "data"

# Set paths for plate localhost220512140003_KK22-05-198
sql_file1 = "localhost220512140003_KK22-05-198.sqlite"
single_cell_file1 = f"sqlite:///{cp_dir}/CellProfiler_output/{sql_file1}"
platemap_file1 = "metadata/plate_1_CFReT.csv"
sc_output_file1 = pathlib.Path(f"{output_dir}/localhost220512140003_KK22-05-198_sc_cellprofiler.csv.gz")
sc_norm_output_file1 = pathlib.Path(f"{output_dir}/localhost220512140003_KK22-05-198_sc_norm_cellprofiler.csv.gz")

# Set paths for plate localhost220513100001_KK22-05-198_FactinAdjusted
sql_file2 = "localhost220513100001_KK22-05-198_FactinAdjusted.sqlite"
single_cell_file2 = f"sqlite:///{cp_dir}/CellProfiler_output/{sql_file2}"
platemap_file2 = "metadata/plate_2_CFReT.csv"
sc_output_file2 = pathlib.Path(f"{output_dir}/localhost220513100001_KK22-05-198_FactinAdjusted_sc_cellprofiler.csv.gz")
sc_norm_output_file2 = pathlib.Path(f"{output_dir}/localhost220513100001_KK22-05-198_FactinAdjusted_sc_norm_cellprofiler.csv.gz")

## Set up names for linking columns between tables in the database file

In [3]:
# Define custom linking columns between compartments
linking_cols = {
    "Per_Cytoplasm": {
        "Per_Cells": "Cytoplasm_Parent_Cells",
        "Per_Nuclei": "Cytoplasm_Parent_OrigNuclei",
    },
    "Per_Cells": {"Per_Cytoplasm": "Cells_Number_Object_Number"},
    "Per_Nuclei": {"Per_Cytoplasm": "Nuclei_Number_Object_Number"},
}

## Load and view platemaps file per plate

### Plate localhost220512140003_KK22-05-198

In [4]:
# Load platemap file for plate localhost220512140003_KK22-05-198
platemap_df1 = pd.read_csv(platemap_file1)
platemap_df1

Unnamed: 0,WellRow,WellCol,well_position,heart_number,treatment,dose
0,A,1,A01,3,drug_x,5uM
1,A,2,A02,3,drug_x,10uM
2,A,3,A03,3,drug_x,10uM
3,A,4,A04,3,DMSO,0uM
4,A,5,A05,8,drug_x,5uM
...,...,...,...,...,...,...
59,H,4,H04,3,DMSO,0uM
60,H,5,H05,8,drug_x,5uM
61,H,6,H06,8,drug_x,0.005uM
62,H,7,H07,8,drug_x,0.005uM


### Plate localhost220513100001_KK22-05-198_FactinAdjusted

In [5]:
# Load platemap file for plate localhost220513100001_KK22-05-198_FactinAdjusted
platemap_df2 = pd.read_csv(platemap_file2)
platemap_df2

Unnamed: 0,WellRow,WellCol,well_position,heart_number,treatment,dose
0,A,9,A09,9,drug_x,5uM
1,A,10,A10,9,drug_x,10uM
2,A,11,A11,9,drug_x,10uM
3,A,12,A12,9,DMSO,0uM
4,B,9,B09,9,drug_x,5uM
5,B,10,B10,9,drug_x,3.33uM
6,B,11,B11,9,drug_x,3.33uM
7,B,12,B12,9,DMSO,0uM
8,C,9,C09,9,drug_x,5uM
9,C,10,C10,9,drug_x,1.11uM


## Set up `SingleCells` class from Pycytominer

### Plate localhost220512140003_KK22-05-198

In [6]:
# Instantiate SingleCells class
sc1 = cells.SingleCells(
    sql_file=single_cell_file1,
    compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
    compartment_linking_cols=linking_cols,
    image_table_name="Per_Image",
    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
    merge_cols=["ImageNumber"],
    image_cols="ImageNumber",
    load_image_data=True
)



### Plate localhost220513100001_KK22-05-198_FactinAdjusted

In [7]:
# Instantiate SingleCells class
sc2 = cells.SingleCells(
    sql_file=single_cell_file2,
    compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
    compartment_linking_cols=linking_cols,
    image_table_name="Per_Image",
    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
    merge_cols=["ImageNumber"],
    image_cols="ImageNumber",
    load_image_data=True
)



## Merge single cells

### Plate localhost220512140003_KK22-05-198

In [8]:
# Merge single cells across compartments
anno_kwargs1 = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}

sc_df1 = sc1.merge_single_cells(
    platemap=platemap_df1,
    **anno_kwargs1,
)

# Save level 2 data as a csv
output(sc_df1, sc_output_file1)

print(sc_df1.shape)
sc_df1.head()

(30849, 2013)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_treatment,Metadata_dose,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_OrigNuclei,...,Nuclei_Texture_Variance_Golgi_3_02_256,Nuclei_Texture_Variance_Golgi_3_03_256,Nuclei_Texture_Variance_Hoechst_3_00_256,Nuclei_Texture_Variance_Hoechst_3_01_256,Nuclei_Texture_Variance_Hoechst_3_02_256,Nuclei_Texture_Variance_Hoechst_3_03_256,Nuclei_Texture_Variance_Mitochondria_3_00_256,Nuclei_Texture_Variance_Mitochondria_3_01_256,Nuclei_Texture_Variance_Mitochondria_3_02_256,Nuclei_Texture_Variance_Mitochondria_3_03_256
0,A,1,3,drug_x,5uM,1,localhost220512140003,A01,1,4,...,6.542603,6.886314,27.74657,26.991794,27.23615,25.985893,3.508829,3.042764,3.162068,3.174969
1,A,1,3,drug_x,5uM,1,localhost220512140003,A01,2,5,...,2.461162,2.464008,40.36296,38.84844,38.528401,38.782049,5.996352,5.167944,5.492333,5.74792
2,A,1,3,drug_x,5uM,1,localhost220512140003,A01,3,6,...,6.382669,6.27088,85.124157,84.17784,86.421311,84.296525,6.619767,6.146135,6.316895,5.875562
3,A,1,3,drug_x,5uM,1,localhost220512140003,A01,4,7,...,70.93582,66.783987,77.782193,74.600695,73.983101,73.431257,72.212991,81.291925,80.023265,67.658017
4,A,1,3,drug_x,5uM,1,localhost220512140003,A01,5,8,...,4.836999,4.924028,56.105261,56.914222,56.576967,55.723829,4.408239,4.320621,4.412594,4.410985


### Plate localhost220513100001_KK22-05-198_FactinAdjusted

In [9]:
# Merge single cells across compartments
anno_kwargs2 = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}

sc_df2 = sc2.merge_single_cells(
    platemap=platemap_df2,
    **anno_kwargs2,
)

# Save level 2 data as a csv
output(sc_df2, sc_output_file2)

print(sc_df2.shape)
sc_df2.head()

(13040, 2013)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_treatment,Metadata_dose,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_OrigNuclei,...,Nuclei_Texture_Variance_Golgi_3_02_256,Nuclei_Texture_Variance_Golgi_3_03_256,Nuclei_Texture_Variance_Hoechst_3_00_256,Nuclei_Texture_Variance_Hoechst_3_01_256,Nuclei_Texture_Variance_Hoechst_3_02_256,Nuclei_Texture_Variance_Hoechst_3_03_256,Nuclei_Texture_Variance_Mitochondria_3_00_256,Nuclei_Texture_Variance_Mitochondria_3_01_256,Nuclei_Texture_Variance_Mitochondria_3_02_256,Nuclei_Texture_Variance_Mitochondria_3_03_256
0,A,9,9,drug_x,5uM,1,localhost220513100001,A09,1,3,...,21.462864,21.98625,141.944618,144.416879,153.297639,147.088477,22.590302,21.109874,23.395531,23.982229
1,A,9,9,drug_x,5uM,1,localhost220513100001,A09,2,4,...,84.689343,68.808177,58.920732,58.970008,62.194088,59.808636,134.447371,141.676296,170.853277,132.043724
2,A,9,9,drug_x,5uM,1,localhost220513100001,A09,3,7,...,4.768415,4.502388,25.119253,23.805631,23.594505,23.416711,3.989105,3.670133,3.598784,3.504328
3,A,9,9,drug_x,5uM,1,localhost220513100001,A09,4,8,...,3.331168,3.287838,48.649403,49.917739,51.161775,49.409916,1.738423,1.74506,2.113953,1.764217
4,A,9,9,drug_x,5uM,1,localhost220513100001,A09,5,9,...,6.13536,6.087052,106.518692,105.244083,104.559167,109.88906,7.972479,7.282319,8.542481,8.403054


## Normalize data

### Plate localhost220512140003_KK22-05-198

In [10]:
# Normalize single cell data and write to file
normalize_sc_df1 = normalize(
    sc_df1,
    method="standardize"
)

output(normalize_sc_df1, sc_norm_output_file1)

print(normalize_sc_df1.shape)
normalize_sc_df1.head()

(30849, 2013)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_treatment,Metadata_dose,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_OrigNuclei,...,Nuclei_Texture_Variance_Golgi_3_02_256,Nuclei_Texture_Variance_Golgi_3_03_256,Nuclei_Texture_Variance_Hoechst_3_00_256,Nuclei_Texture_Variance_Hoechst_3_01_256,Nuclei_Texture_Variance_Hoechst_3_02_256,Nuclei_Texture_Variance_Hoechst_3_03_256,Nuclei_Texture_Variance_Mitochondria_3_00_256,Nuclei_Texture_Variance_Mitochondria_3_01_256,Nuclei_Texture_Variance_Mitochondria_3_02_256,Nuclei_Texture_Variance_Mitochondria_3_03_256
0,A,1,3,drug_x,5uM,1,localhost220512140003,A01,1,4,...,-0.239985,-0.235165,-0.305502,-0.307567,-0.308565,-0.310889,-0.382969,-0.381023,-0.384161,-0.37853
1,A,1,3,drug_x,5uM,1,localhost220512140003,A01,2,5,...,-0.268248,-0.265923,-0.249705,-0.255572,-0.258603,-0.254945,-0.375255,-0.374351,-0.376955,-0.370509
2,A,1,3,drug_x,5uM,1,localhost220512140003,A01,3,6,...,-0.241092,-0.239446,-0.051744,-0.056785,-0.046704,-0.055959,-0.373322,-0.37128,-0.374405,-0.370111
3,A,1,3,drug_x,5uM,1,localhost220512140003,A01,4,7,...,0.205934,0.181434,-0.084215,-0.098785,-0.101736,-0.103461,-0.169921,-0.13537,-0.146478,-0.177506
4,A,1,3,drug_x,5uM,1,localhost220512140003,A01,5,8,...,-0.251796,-0.248813,-0.180083,-0.176346,-0.178748,-0.180877,-0.38018,-0.377011,-0.380294,-0.374676


### Plate localhost220513100001_KK22-05-198_FactinAdjusted

In [11]:
# Normalize single cell data and write to file
normalize_sc_df2 = normalize(
    sc_df2,
    method="standardize"
)

output(normalize_sc_df2, sc_norm_output_file2)

print(normalize_sc_df2.shape)
normalize_sc_df2.head()

(13040, 2013)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_treatment,Metadata_dose,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_OrigNuclei,...,Nuclei_Texture_Variance_Golgi_3_02_256,Nuclei_Texture_Variance_Golgi_3_03_256,Nuclei_Texture_Variance_Hoechst_3_00_256,Nuclei_Texture_Variance_Hoechst_3_01_256,Nuclei_Texture_Variance_Hoechst_3_02_256,Nuclei_Texture_Variance_Hoechst_3_03_256,Nuclei_Texture_Variance_Mitochondria_3_00_256,Nuclei_Texture_Variance_Mitochondria_3_01_256,Nuclei_Texture_Variance_Mitochondria_3_02_256,Nuclei_Texture_Variance_Mitochondria_3_03_256
0,A,9,9,drug_x,5uM,1,localhost220513100001,A09,1,3,...,-0.027555,-0.017413,-0.006634,0.001776,0.026727,0.009673,-0.074731,-0.081263,-0.069309,-0.062066
1,A,9,9,drug_x,5uM,1,localhost220513100001,A09,2,4,...,0.68299,0.517145,-0.250717,-0.249044,-0.241509,-0.246707,0.659287,0.724034,0.898689,0.656439
2,A,9,9,drug_x,5uM,1,localhost220513100001,A09,3,7,...,-0.215169,-0.217024,-0.35009,-0.352265,-0.355157,-0.353607,-0.196794,-0.197748,-0.199267,-0.198224
3,A,9,9,drug_x,5uM,1,localhost220513100001,A09,4,8,...,-0.231321,-0.23089,-0.280914,-0.275615,-0.273991,-0.277253,-0.211563,-0.210606,-0.209014,-0.209794
4,A,9,9,drug_x,5uM,1,localhost220513100001,A09,5,9,...,-0.199808,-0.198932,-0.110783,-0.113211,-0.116774,-0.099598,-0.170655,-0.173621,-0.166813,-0.165652


---

### Visualize basic count statistics

### Plate localhost220512140003_KK22-05-198

In [12]:
sc_df1.Metadata_dose.value_counts()

0uM        7980
5uM        7548
0.04uM     2223
0.014uM    2086
3.33uM     2032
1.11uM     1963
0.12uM     1906
0.37uM     1774
0.005uM    1696
10uM       1641
Name: Metadata_dose, dtype: int64

In [13]:
pd.crosstab(sc_df1.Metadata_dose, sc_df1.Metadata_Well)

Metadata_Well,A01,A02,A03,A04,A05,A06,A07,A08,B01,B02,...,G07,G08,H01,H02,H03,H04,H05,H06,H07,H08
Metadata_dose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.005uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,503,379,0,0,413,401,0
0.014uM,0,0,0,0,0,0,0,0,0,0,...,610,0,0,0,0,0,0,0,0,0
0.04uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0.12uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0.37uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0uM,0,0,0,361,0,0,0,355,0,0,...,0,634,0,0,0,514,0,0,0,457
1.11uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10uM,0,480,390,0,0,391,380,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3.33uM,0,0,0,0,0,0,0,0,0,433,...,0,0,0,0,0,0,0,0,0,0
5uM,449,0,0,0,458,0,0,0,452,0,...,0,0,393,0,0,0,383,0,0,0


### Plate localhost220513100001_KK22-05-198_FactinAdjusted

In [14]:
sc_df2.Metadata_dose.value_counts()

5uM        3513
0uM        2814
0.014uM    1223
0.04uM     1176
0.12uM     1157
0.37uM      902
0.005uM     765
1.11uM      523
3.33uM      501
10uM        466
Name: Metadata_dose, dtype: int64

In [15]:
pd.crosstab(sc_df2.Metadata_dose, sc_df2.Metadata_Well)

Metadata_Well,A09,A10,A11,A12,B09,B10,B11,B12,C09,C10,...,F11,F12,G09,G10,G11,G12,H09,H10,H11,H12
Metadata_dose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.005uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,471,294,0
0.014uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,685,538,0,0,0,0,0
0.04uM,0,0,0,0,0,0,0,0,0,0,...,538,0,0,0,0,0,0,0,0,0
0.12uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0.37uM,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0uM,0,0,0,157,0,0,0,249,0,0,...,0,425,0,0,0,492,0,0,0,351
1.11uM,0,0,0,0,0,0,0,0,0,271,...,0,0,0,0,0,0,0,0,0,0
10uM,0,243,223,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3.33uM,0,0,0,0,0,234,267,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5uM,254,0,0,0,335,0,0,0,337,0,...,0,0,560,0,0,0,359,0,0,0
