# Process image features from CellProfiler readout

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import annotate
from pycytominer.cyto_utils import output

import sys
sys.path.append("../../utils")
import extract_image_features_utils as extract_utils

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../2.cellprofiler_analysis"
output_dir = "data"

## Set paths to sqlite files

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file = "interstellar_wave2.sqlite"
single_cell_file = f"sqlite:///{cp_dir}/analysis_output/{sql_file}"
platemap_file = "../../metadata/Interstellar_platemap.csv"
image_table_name = "Per_Image"

# Set path with name for outputted data
image_features_output_file = pathlib.Path(f"{output_dir}/interstellar_wave2.csv.gz")

## Set variables for extracting image features

In [4]:
# These categories are based on the measurement modules ran in the CellProfiler pipeline
image_feature_categories = ["Image_Correlation", "Image_Granularity", "Image_ImageQuality", "Image_Intensity"]
image_cols="ImageNumber"
strata=["Image_Metadata_Well", "Image_Metadata_Plate"]

## Load and view platemap file

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df.head()

Unnamed: 0,well,wellName,row,col,alias,treatment,dose
0,A01,A1,1,1,1,LPS,10µg/ml
1,B01,B1,2,1,2,LPS,1µg/ml
2,C01,C1,3,1,3,H2O2,500µM
3,D01,D1,4,1,4,H2O2,50µM
4,E01,E1,5,1,5,ATP,1mM


## Load in sqlite file

In [6]:
image_df = extract_utils.load_sqlite_as_df(single_cell_file, image_table_name)

print(image_df.shape)
image_df.head()

(1152, 262)


Unnamed: 0,ImageNumber,Image_Correlation_Correlation_CorrASC_Nothing_CorrASC_Spillover,Image_Correlation_Correlation_CorrASC_Nothing_CorrDNA,Image_Correlation_Correlation_CorrASC_Nothing_CorrMito,Image_Correlation_Correlation_CorrASC_Spillover_CorrDNA,Image_Correlation_Correlation_CorrASC_Spillover_CorrMito,Image_Correlation_Correlation_CorrDNA_CorrMito,Image_Correlation_Costes_CorrASC_Nothing_CorrASC_Spillover,Image_Correlation_Costes_CorrASC_Nothing_CorrDNA,Image_Correlation_Costes_CorrASC_Nothing_CorrMito,...,Image_URL_OrigDNA,Image_URL_OrigMito,Image_Width_IllumASC_Nothing,Image_Width_IllumASC_Spillover,Image_Width_IllumDNA,Image_Width_IllumMito,Image_Width_OrigASC_Nothing,Image_Width_OrigASC_Spillover,Image_Width_OrigDNA,Image_Width_OrigMito
0,1,0.326864,0.199036,0.339608,0.4499,0.791279,0.632607,1.0,0.188597,1.0,...,file:/home/jenna/Interstellar_Project/1.wave2_...,file:/home/jenna/Interstellar_Project/1.wave2_...,2160,2160,2160,2160,2160,2160,2160,2160
1,2,0.350382,0.185641,0.353833,0.457126,0.836053,0.588348,1.0,1.0,1.0,...,file:/home/jenna/Interstellar_Project/1.wave2_...,file:/home/jenna/Interstellar_Project/1.wave2_...,2160,2160,2160,2160,2160,2160,2160,2160
2,3,0.368232,0.181695,0.364433,0.473028,0.861336,0.589386,1.0,1.0,0.34019,...,file:/home/jenna/Interstellar_Project/1.wave2_...,file:/home/jenna/Interstellar_Project/1.wave2_...,2160,2160,2160,2160,2160,2160,2160,2160
3,4,0.367947,0.191055,0.364856,0.489311,0.852192,0.629633,0.345266,0.17885,0.352046,...,file:/home/jenna/Interstellar_Project/1.wave2_...,file:/home/jenna/Interstellar_Project/1.wave2_...,2160,2160,2160,2160,2160,2160,2160,2160
4,5,0.334973,0.210069,0.338661,0.626342,0.983291,0.584879,1.0,1.0,1.0,...,file:/home/jenna/Interstellar_Project/1.wave2_...,file:/home/jenna/Interstellar_Project/1.wave2_...,2160,2160,2160,2160,2160,2160,2160,2160


## Extract image features from sqlite file

In [7]:
image_features_df = extract_utils.extract_image_features(image_feature_categories, image_df, image_cols, strata)

print(image_features_df.shape)
image_features_df.head()

(1152, 173)


Unnamed: 0,ImageNumber,Image_Metadata_Plate,Image_Metadata_Well,Image_Correlation_Correlation_CorrASC_Nothing_CorrASC_Spillover,Image_Correlation_Correlation_CorrASC_Nothing_CorrDNA,Image_Correlation_Correlation_CorrASC_Nothing_CorrMito,Image_Correlation_Correlation_CorrASC_Spillover_CorrDNA,Image_Correlation_Correlation_CorrASC_Spillover_CorrMito,Image_Correlation_Correlation_CorrDNA_CorrMito,Image_Correlation_Costes_CorrASC_Nothing_CorrASC_Spillover,...,Image_Intensity_TotalArea_CorrDNA,Image_Intensity_TotalArea_CorrMito,Image_Intensity_TotalIntensity_CorrASC_Nothing,Image_Intensity_TotalIntensity_CorrASC_Spillover,Image_Intensity_TotalIntensity_CorrDNA,Image_Intensity_TotalIntensity_CorrMito,Image_Intensity_UpperQuartileIntensity_CorrASC_Nothing,Image_Intensity_UpperQuartileIntensity_CorrASC_Spillover,Image_Intensity_UpperQuartileIntensity_CorrDNA,Image_Intensity_UpperQuartileIntensity_CorrMito
0,1,70117_20230118MM1_Ab Test_ASC,I05,0.326864,0.199036,0.339608,0.4499,0.791279,0.632607,1.0,...,4665600,4665600,36515.609375,104517.195312,59313.171875,67360.164062,0.008125,0.029968,0.012307,0.019521
1,2,70117_20230118MM1_Ab Test_ASC,I05,0.350382,0.185641,0.353833,0.457126,0.836053,0.588348,1.0,...,4665600,4665600,35387.605469,103477.421875,59061.085938,66604.078125,0.007877,0.029796,0.007854,0.019221
2,3,70117_20230118MM1_Ab Test_ASC,I05,0.368232,0.181695,0.364433,0.473028,0.861336,0.589386,1.0,...,4665600,4665600,34720.488281,102763.734375,58926.574219,66208.085938,0.00773,0.029346,0.008051,0.018951
3,4,70117_20230118MM1_Ab Test_ASC,I05,0.367947,0.191055,0.364856,0.489311,0.852192,0.629633,0.345266,...,4665600,4665600,34208.0625,102403.734375,58863.046875,65895.007812,0.007616,0.028611,0.01323,0.018597
4,5,70117_20230118MM1_Ab Test_ASC,I05,0.334973,0.210069,0.338661,0.626342,0.983291,0.584879,1.0,...,4665600,4665600,36979.464844,122280.648438,85388.195312,81200.867188,0.00823,0.034008,0.029648,0.022478


## Merge platemap metadata with extracted image features

In [8]:
## Uses pycytominer annotate functionality to merge the platemap and image features and reorder the dataframe
image_features_df = annotate(
    profiles=image_features_df,
    platemap=platemap_df,
    join_on=["Metadata_well", "Image_Metadata_Well"],
    output_file="none",
)

## Add condition as a metadata column to the dataframe

In [9]:
# add the condition metadata as a column based on the dictionary (adds to the end)
image_features_df['Metadata_condition'] = image_features_df['Metadata_col'].map({5 : 8, 8 : 8, 6 : 7, 7 : 7})
# pop out the column from the dataframe
condition_column = image_features_df.pop('Metadata_condition')
# insert the column as the third index column in the dataframe
image_features_df.insert(3, 'Metadata_condition', condition_column)

print(image_features_df.shape)
image_features_df.head()

(1152, 180)


Unnamed: 0,Metadata_wellName,Metadata_row,Metadata_col,Metadata_condition,Metadata_alias,Metadata_treatment,Metadata_dose,Metadata_Plate,Metadata_Well,ImageNumber,...,Image_Intensity_TotalArea_CorrDNA,Image_Intensity_TotalArea_CorrMito,Image_Intensity_TotalIntensity_CorrASC_Nothing,Image_Intensity_TotalIntensity_CorrASC_Spillover,Image_Intensity_TotalIntensity_CorrDNA,Image_Intensity_TotalIntensity_CorrMito,Image_Intensity_UpperQuartileIntensity_CorrASC_Nothing,Image_Intensity_UpperQuartileIntensity_CorrASC_Spillover,Image_Intensity_UpperQuartileIntensity_CorrDNA,Image_Intensity_UpperQuartileIntensity_CorrMito
0,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,1,...,4665600,4665600,36515.609375,104517.195312,59313.171875,67360.164062,0.008125,0.029968,0.012307,0.019521
1,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,2,...,4665600,4665600,35387.605469,103477.421875,59061.085938,66604.078125,0.007877,0.029796,0.007854,0.019221
2,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,3,...,4665600,4665600,34720.488281,102763.734375,58926.574219,66208.085938,0.00773,0.029346,0.008051,0.018951
3,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,4,...,4665600,4665600,34208.0625,102403.734375,58863.046875,65895.007812,0.007616,0.028611,0.01323,0.018597
4,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,5,...,4665600,4665600,36979.464844,122280.648438,85388.195312,81200.867188,0.00823,0.034008,0.029648,0.022478


In [10]:
image_features_df['Metadata_condition'].unique()

array([8, 7])

## Save image features data frame as `csv.gz` file

In [11]:
# Save image feature data as a csv
output(image_features_df, image_features_output_file)

print(image_features_df.shape)
image_features_df.head()

(1152, 180)


Unnamed: 0,Metadata_wellName,Metadata_row,Metadata_col,Metadata_condition,Metadata_alias,Metadata_treatment,Metadata_dose,Metadata_Plate,Metadata_Well,ImageNumber,...,Image_Intensity_TotalArea_CorrDNA,Image_Intensity_TotalArea_CorrMito,Image_Intensity_TotalIntensity_CorrASC_Nothing,Image_Intensity_TotalIntensity_CorrASC_Spillover,Image_Intensity_TotalIntensity_CorrDNA,Image_Intensity_TotalIntensity_CorrMito,Image_Intensity_UpperQuartileIntensity_CorrASC_Nothing,Image_Intensity_UpperQuartileIntensity_CorrASC_Spillover,Image_Intensity_UpperQuartileIntensity_CorrDNA,Image_Intensity_UpperQuartileIntensity_CorrMito
0,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,1,...,4665600,4665600,36515.609375,104517.195312,59313.171875,67360.164062,0.008125,0.029968,0.012307,0.019521
1,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,2,...,4665600,4665600,35387.605469,103477.421875,59061.085938,66604.078125,0.007877,0.029796,0.007854,0.019221
2,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,3,...,4665600,4665600,34720.488281,102763.734375,58926.574219,66208.085938,0.00773,0.029346,0.008051,0.018951
3,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,4,...,4665600,4665600,34208.0625,102403.734375,58863.046875,65895.007812,0.007616,0.028611,0.01323,0.018597
4,I5,9,5,8,1,LPS,10µg/ml,70117_20230118MM1_Ab Test_ASC,I05,5,...,4665600,4665600,36979.464844,122280.648438,85388.195312,81200.867188,0.00823,0.034008,0.029648,0.022478


## View info of the dataframe

In [12]:
image_features_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1152 entries, 0 to 1151
Columns: 180 entries, Metadata_wellName to Image_Intensity_UpperQuartileIntensity_CorrMito
dtypes: float64(166), int64(9), object(5)
memory usage: 1.6+ MB
