# Process single cell morphology features for CellProfiler readouts

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer.cyto_utils import cells, output

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../2.cellprofiler_analysis"
output_dir = "data"

## Set paths to sqlite files

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file = "interstellar_wave1_dilate100.sqlite"
single_cell_file = f"sqlite:///{cp_dir}/analysis_output/{sql_file}"
platemap_file = "../../metadata/Interstellar_platemap.csv"

# Set path with name for outputted data
sc_output_file = pathlib.Path(f"{output_dir}/interstellar_wave1_dilate100_sc.csv.gz")

## Set up names for linking columns between tables in the database file

In [4]:
# Define custom linking columns between compartments
linking_cols = {
    "Per_TranslocatedNuclei": {
        "Per_DilatedNuclei": "TranslocatedNuclei_Parent_DilatedNuclei",
        "Per_Nuclei": "TranslocatedNuclei_Parent_Nuclei",
    },
    "Per_DilatedNuclei": {"Per_TranslocatedNuclei": "DilatedNuclei_Number_Object_Number"},
    "Per_Nuclei": {"Per_TranslocatedNuclei": "Nuclei_Number_Object_Number"},
}

## Load and view platemap file

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df.head()

Unnamed: 0,well,wellName,row,col,alias,treatment,dose
0,A01,A1,1,1,1,LPS,10µg/ml
1,B01,B1,2,1,2,LPS,1µg/ml
2,C01,C1,3,1,3,H2O2,500µM
3,D01,D1,4,1,4,H2O2,50µM
4,E01,E1,5,1,5,ATP,1mM


## Set up `SingleCells` class from Pycytominer

In [6]:
# Instantiate SingleCells class
sc = cells.SingleCells(
    sql_file=single_cell_file,
    compartments=["Per_DilatedNuclei", "Per_TranslocatedNuclei", "Per_Nuclei"],
    compartment_linking_cols=linking_cols,
    image_table_name="Per_Image",
    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
    merge_cols=["ImageNumber"],
    image_cols="ImageNumber",
    load_image_data=True
)



## Merge single cells 

In [7]:
# Merge single cells across compartments
anno_kwargs = {"join_on": ["Metadata_well", "Image_Metadata_Well"]}

sc_df = sc.merge_single_cells(
    platemap=platemap_df,
    **anno_kwargs,
)

# Save level 2 data as a csv
output(sc_df, sc_output_file)

print(sc_df.shape)
sc_df.head()

(61843, 841)


Unnamed: 0,Metadata_wellName,Metadata_row,Metadata_col,Metadata_alias,Metadata_treatment,Metadata_dose,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_TranslocatedNuclei_Parent_DilatedNuclei,...,Nuclei_Texture_SumVariance_CorrGasderminD_3_02_256,Nuclei_Texture_SumVariance_CorrGasderminD_3_03_256,Nuclei_Texture_Variance_CorrDNA_3_00_256,Nuclei_Texture_Variance_CorrDNA_3_01_256,Nuclei_Texture_Variance_CorrDNA_3_02_256,Nuclei_Texture_Variance_CorrDNA_3_03_256,Nuclei_Texture_Variance_CorrGasderminD_3_00_256,Nuclei_Texture_Variance_CorrGasderminD_3_01_256,Nuclei_Texture_Variance_CorrGasderminD_3_02_256,Nuclei_Texture_Variance_CorrGasderminD_3_03_256
0,A3,1,3,1,LPS,10µg/ml,1,70117_20230118MM1_Ab Test_V2,A03,1,...,0.142435,0.166532,3.561029,3.446556,3.438397,3.519586,0.078367,0.054412,0.060766,0.064295
1,A3,1,3,1,LPS,10µg/ml,1,70117_20230118MM1_Ab Test_V2,A03,11,...,0.142435,0.166532,3.561029,3.446556,3.438397,3.519586,0.078367,0.054412,0.060766,0.064295
2,A3,1,3,1,LPS,10µg/ml,1,70117_20230118MM1_Ab Test_V2,A03,4,...,0.436902,0.41142,2.937372,2.769628,2.853175,2.808121,0.176701,0.171388,0.172782,0.170238
3,A3,1,3,1,LPS,10µg/ml,1,70117_20230118MM1_Ab Test_V2,A03,10,...,0.436902,0.41142,2.937372,2.769628,2.853175,2.808121,0.176701,0.171388,0.172782,0.170238
4,A3,1,3,1,LPS,10µg/ml,1,70117_20230118MM1_Ab Test_V2,A03,5,...,0.186918,0.126192,9.808492,9.669385,9.645344,9.55359,0.06385,0.066355,0.069811,0.059171


## View info of the dataframe

In [8]:
sc_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 61843 entries, 0 to 61842
Columns: 841 entries, Metadata_wellName to Nuclei_Texture_Variance_CorrGasderminD_3_03_256
dtypes: int64(3), object(838)
memory usage: 397.3+ MB


---

### Visualize basic count statistics

In [9]:
sc_df.Metadata_treatment.value_counts()

ATP                9816
Flagellin          9012
LPS                7736
Thapsi             7201
LPS + Nigericin    7152
Disulfiram         6850
H2O2               4814
DMSO 0.1%          4734
Media only         4528
Name: Metadata_treatment, dtype: int64

In [10]:
pd.crosstab(sc_df.Metadata_treatment, sc_df.Metadata_Well)

Metadata_Well,A03,A04,A09,A10,B03,B04,B09,B10,C03,C04,...,F09,F10,G03,G04,G09,G10,H03,H04,H09,H10
Metadata_treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ATP,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
DMSO 0.1%,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2282,2452,0,0,0,0
Disulfiram,0,0,1113,1188,0,0,2229,2320,0,0,...,0,0,0,0,0,0,0,0,0,0
Flagellin,0,0,0,0,0,0,0,0,0,0,...,0,0,2099,2250,0,0,2309,2354,0,0
H2O2,0,0,0,0,0,0,0,0,128,31,...,0,0,0,0,0,0,0,0,0,0
LPS,1189,1971,0,0,2292,2284,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LPS + Nigericin,0,0,0,0,0,0,0,0,0,0,...,2169,2333,0,0,0,0,0,0,0,0
Media only,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2101,2427
Thapsi,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
