# Process image features from CellProfiler readout

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import annotate
from pycytominer.cyto_utils import output

import sys
sys.path.append("../../utils")
import extract_image_features_utils as extract_utils

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../3.cellprofiler_analysis"
output_dir = "data"

## Set paths to sqlite files

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file = "interstellar_wave3_imagequality.sqlite"
single_cell_file = f"sqlite:///{cp_dir}/analysis_output/{sql_file}"
platemap_file = "../../metadata/Interstellar_platemap.csv"
image_table_name = "Per_Image"

# Set path with name for outputted data
image_features_output_file = pathlib.Path(f"{output_dir}/interstellar_wave3_imagequality.csv.gz")

## Set variables for extracting image features

In [4]:
# Only ImageQuality category since the CellProfiler pipeline extract image quality measurements
image_feature_categories = ["Image_ImageQuality"]
image_cols="ImageNumber"
strata=["Image_Metadata_Well", "Image_Metadata_Plate"]

## Load and view platemap file

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df.head()

Unnamed: 0,well,wellName,row,col,alias,treatment,dose
0,A01,A1,1,1,1,LPS,10µg/ml
1,B01,B1,2,1,2,LPS,1µg/ml
2,C01,C1,3,1,3,H2O2,500µM
3,D01,D1,4,1,4,H2O2,50µM
4,E01,E1,5,1,5,ATP,1mM


## Load in sqlite file

In [6]:
image_df = extract_utils.load_sqlite_as_df(single_cell_file, image_table_name)

print(image_df.shape)
image_df.head()

(960, 309)


Unnamed: 0,ImageNumber,Image_ExecutionTime_01LoadData,Image_ExecutionTime_02CorrectIlluminationApply,Image_ExecutionTime_03MeasureImageQuality,Image_FileName_IllumDNA,Image_FileName_IllumER,Image_FileName_IllumGasdermin,Image_FileName_IllumMito,Image_FileName_IllumPM,Image_FileName_IllumRNA,...,Image_Width_IllumGasdermin,Image_Width_IllumMito,Image_Width_IllumPM,Image_Width_IllumRNA,Image_Width_OrigDNA,Image_Width_OrigER,Image_Width_OrigGasdermin,Image_Width_OrigMito,Image_Width_OrigPM,Image_Width_OrigRNA
0,1,3.08,0.09,21.65,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,...,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160
1,2,5.04,0.7,49.37,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,...,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160
2,3,5.0,0.89,49.69,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,...,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160
3,4,5.39,0.88,48.1,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,...,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160
4,5,4.66,1.02,50.02,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,70117_20230118MM1_CellPainting_A700_20X_V1_Ill...,...,2160,2160,2160,2160,2160,2160,2160,2160,2160,2160


## Extract image features from sqlite file

In [7]:
image_features_df = extract_utils.extract_image_features(image_feature_categories, image_df, image_cols, strata)

print(image_features_df.shape)
image_features_df.head()

(960, 195)


Unnamed: 0,ImageNumber,Image_Metadata_Plate,Image_Metadata_Well,Image_ImageQuality_Correlation_CorrDNA_20,Image_ImageQuality_Correlation_CorrER_20,Image_ImageQuality_Correlation_CorrGasdermin_20,Image_ImageQuality_Correlation_CorrMito_20,Image_ImageQuality_Correlation_CorrPM_20,Image_ImageQuality_Correlation_CorrRNA_20,Image_ImageQuality_Correlation_OrigDNA_20,...,Image_ImageQuality_TotalIntensity_CorrGasdermin,Image_ImageQuality_TotalIntensity_CorrMito,Image_ImageQuality_TotalIntensity_CorrPM,Image_ImageQuality_TotalIntensity_CorrRNA,Image_ImageQuality_TotalIntensity_OrigDNA,Image_ImageQuality_TotalIntensity_OrigER,Image_ImageQuality_TotalIntensity_OrigGasdermin,Image_ImageQuality_TotalIntensity_OrigMito,Image_ImageQuality_TotalIntensity_OrigPM,Image_ImageQuality_TotalIntensity_OrigRNA
0,1,70117_20230118MM1_CellPainting_A700_20X_V1,A13,0.403882,0.434506,0.542825,0.573529,0.597844,0.314772,0.402339,...,48879.050781,34841.144531,48721.179688,47153.792969,63551.707031,107106.304688,68543.257812,48249.867188,84069.125,82692.421875
1,2,70117_20230118MM1_CellPainting_A700_20X_V1,A13,0.291322,0.541198,0.600939,0.599302,0.66698,0.399502,0.308004,...,57369.070312,39998.722656,63881.996094,58803.9375,81538.945312,124476.578125,80636.5,55319.164062,111208.203125,104272.976562
2,3,70117_20230118MM1_CellPainting_A700_20X_V1,A13,0.371845,0.252259,0.009633,0.505273,0.589347,0.441325,0.374518,...,54433.957031,38906.957031,54597.066406,51266.492188,71519.992188,111797.804688,76578.726562,53959.65625,95429.734375,90756.078125
3,4,70117_20230118MM1_CellPainting_A700_20X_V1,A13,0.375755,0.817931,0.277198,0.60473,0.613117,0.766828,0.394779,...,55847.441406,38892.527344,53088.859375,55621.476562,74082.757812,137921.578125,79080.132812,54155.644531,92845.429688,99988.484375
4,5,70117_20230118MM1_CellPainting_A700_20X_V1,A13,0.401941,0.13197,0.50704,0.480633,0.624985,0.364825,0.413676,...,46019.976562,32674.126953,43813.699219,40466.992188,55326.621094,92031.796875,64688.136719,45247.652344,76171.96875,71476.976562


## Merge platemap metadata with extracted image features

In [8]:
## Uses pycytominer annotate functionality to merge the platemap and image features and reorder the dataframe
image_features_df = annotate(
    profiles=image_features_df,
    platemap=platemap_df,
    join_on=["Metadata_well", "Image_Metadata_Well"],
    output_file="none",
)

## Save image features data frame as `csv.gz` file

In [9]:
# Save image feature data as a csv
output(image_features_df, image_features_output_file)

print(image_features_df.shape)
image_features_df.head()

(960, 201)


Unnamed: 0,Metadata_wellName,Metadata_row,Metadata_col,Metadata_alias,Metadata_treatment,Metadata_dose,Metadata_Plate,Metadata_Well,ImageNumber,Image_ImageQuality_Correlation_CorrDNA_20,...,Image_ImageQuality_TotalIntensity_CorrGasdermin,Image_ImageQuality_TotalIntensity_CorrMito,Image_ImageQuality_TotalIntensity_CorrPM,Image_ImageQuality_TotalIntensity_CorrRNA,Image_ImageQuality_TotalIntensity_OrigDNA,Image_ImageQuality_TotalIntensity_OrigER,Image_ImageQuality_TotalIntensity_OrigGasdermin,Image_ImageQuality_TotalIntensity_OrigMito,Image_ImageQuality_TotalIntensity_OrigPM,Image_ImageQuality_TotalIntensity_OrigRNA
0,A13,1,13,1,LPS,10µg/ml,70117_20230118MM1_CellPainting_A700_20X_V1,A13,1,0.403882,...,48879.050781,34841.144531,48721.179688,47153.792969,63551.707031,107106.304688,68543.257812,48249.867188,84069.125,82692.421875
1,A13,1,13,1,LPS,10µg/ml,70117_20230118MM1_CellPainting_A700_20X_V1,A13,2,0.291322,...,57369.070312,39998.722656,63881.996094,58803.9375,81538.945312,124476.578125,80636.5,55319.164062,111208.203125,104272.976562
2,A13,1,13,1,LPS,10µg/ml,70117_20230118MM1_CellPainting_A700_20X_V1,A13,3,0.371845,...,54433.957031,38906.957031,54597.066406,51266.492188,71519.992188,111797.804688,76578.726562,53959.65625,95429.734375,90756.078125
3,A13,1,13,1,LPS,10µg/ml,70117_20230118MM1_CellPainting_A700_20X_V1,A13,4,0.375755,...,55847.441406,38892.527344,53088.859375,55621.476562,74082.757812,137921.578125,79080.132812,54155.644531,92845.429688,99988.484375
4,A13,1,13,1,LPS,10µg/ml,70117_20230118MM1_CellPainting_A700_20X_V1,A13,5,0.401941,...,46019.976562,32674.126953,43813.699219,40466.992188,55326.621094,92031.796875,64688.136719,45247.652344,76171.96875,71476.976562


## View info of the dataframe

In [10]:
image_features_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 960 entries, 0 to 959
Columns: 201 entries, Metadata_wellName to Image_ImageQuality_TotalIntensity_OrigRNA
dtypes: float64(192), int64(4), object(5)
memory usage: 1.5+ MB
