# Process image features from CellProfiler readout - Only Factin_Adjusted

After discussion, we want to look into the Factin_Adjusted plate as it contains the better protocols for the actin channel so we are more interested in this data.
In this notebook, we create two different CSV outputs, one with the image features (e.g., Granularity, Texture, etc.) and one for the image quality metrics (e.g. blur).

## Import Libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import annotate
from pycytominer.cyto_utils import output

import sys
sys.path.append("../../utils")
import extraction_utils as extract_utils

## Set up paths to CellProfiler directory and outputs

In [2]:
# Set file and directory constants
cp_dir = "../2.cellprofiler_processing"
output_dir = "data"

## Set paths to sqlite files

In [3]:
# Set name and path of .sqlite file and path to metadata
sql_file = "localhost220513100001_KK22-05-198_FactinAdjusted.sqlite"
single_cell_file = f"sqlite:///{cp_dir}/CellProfiler_output/{sql_file}"
# plate_2 is synonymous with the Factin_Adjusted plate
platemap_file = "metadata/plate_2_CFReT.csv"
image_table_name = "Per_Image"

# Set paths with name for outputted data
image_quality_output_file = pathlib.Path(f"{output_dir}/image_quality_Factin_Adjusted.csv.gz")
image_features_output_file = pathlib.Path(f"{output_dir}/image_features_Factin_Adjusted.csv.gz")

## Set variables for extracting image measurements

In [4]:
# These image feature categories are based on the measurement modules ran in the CellProfiler pipeline 
# Texture not included as it was added post running the plate originally
image_feature_categories = ["Image_Correlation", "Image_Granularity", "Image_Intensity", "Image_Texture"]
image_quality_category = ["Image_ImageQuality"]
image_cols=["ImageNumber", "Image_Count_Cells", "Image_Count_Cytoplasm", "Image_Count_Nuclei"]
strata=["Image_Metadata_Well", "Image_Metadata_Plate"]

## Load and view platemap file

In [5]:
# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df.head()

Unnamed: 0,WellRow,WellCol,well_position,heart_number,treatment,dose
0,A,9,A09,9,drug_x,5uM
1,A,10,A10,9,drug_x,10uM
2,A,11,A11,9,drug_x,10uM
3,A,12,A12,9,DMSO,0uM
4,B,9,B09,9,drug_x,5uM


## Load in sqlite file

In [6]:
image_df = extract_utils.load_sqlite_as_df(single_cell_file, image_table_name)

print(image_df.shape)
image_df.head()

(512, 614)


Unnamed: 0,ImageNumber,Image_Correlation_Correlation_Actin_ER,Image_Correlation_Correlation_Actin_Golgi,Image_Correlation_Correlation_Actin_Hoechst,Image_Correlation_Correlation_Actin_Mitochondria,Image_Correlation_Correlation_ER_Golgi,Image_Correlation_Correlation_ER_Hoechst,Image_Correlation_Correlation_ER_Mitochondria,Image_Correlation_Correlation_Golgi_Hoechst,Image_Correlation_Correlation_Golgi_Mitochondria,...,Image_URL_Actin,Image_URL_ER,Image_URL_Golgi,Image_URL_Hoechst,Image_URL_Mitochondria,Image_Width_Actin,Image_Width_ER,Image_Width_Golgi,Image_Width_Hoechst,Image_Width_Mitochondria
0,1,0.319439,0.321517,0.085523,0.383154,0.85758,0.359548,0.844906,0.629203,0.916118,...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,1104,1104,1104,1104,1104
1,2,0.316284,0.453749,0.145243,0.434937,0.855621,0.458719,0.866483,0.690948,0.913332,...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,1104,1104,1104,1104,1104
2,3,0.424681,0.416474,0.167123,0.467472,0.925546,0.507574,0.935513,0.668837,0.943327,...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,1104,1104,1104,1104,1104
3,4,0.254302,0.232419,0.03866,0.296697,0.868859,0.475344,0.893075,0.724819,0.920282,...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,1104,1104,1104,1104,1104
4,5,0.223079,0.265874,0.035197,0.308074,0.90086,0.329486,0.895757,0.560036,0.941737,...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,file:/home/jenna/CFReT_data/1.preprocessing-da...,1104,1104,1104,1104,1104


## Extract image features from sqlite file

In [7]:
image_features_df = extract_utils.extract_image_features(image_feature_categories, image_df, image_cols, strata)

print(image_features_df.shape)
image_features_df.head()

(512, 511)


Unnamed: 0,ImageNumber,Image_Count_Cells,Image_Count_Cytoplasm,Image_Count_Nuclei,Image_Metadata_Plate,Image_Metadata_Well,Image_Correlation_Correlation_Actin_ER,Image_Correlation_Correlation_Actin_Golgi,Image_Correlation_Correlation_Actin_Hoechst,Image_Correlation_Correlation_Actin_Mitochondria,...,Image_Texture_Variance_Golgi_3_02_256,Image_Texture_Variance_Golgi_3_03_256,Image_Texture_Variance_Hoechst_3_00_256,Image_Texture_Variance_Hoechst_3_01_256,Image_Texture_Variance_Hoechst_3_02_256,Image_Texture_Variance_Hoechst_3_03_256,Image_Texture_Variance_Mitochondria_3_00_256,Image_Texture_Variance_Mitochondria_3_01_256,Image_Texture_Variance_Mitochondria_3_02_256,Image_Texture_Variance_Mitochondria_3_03_256
0,1,34,34,44,localhost220513100001,A09,0.319439,0.321517,0.085523,0.383154,...,52.706348,52.74987,91.719909,91.54343,91.470904,91.54182,65.783776,65.846346,65.777592,65.846211
1,2,23,23,37,localhost220513100001,A09,0.316284,0.453749,0.145243,0.434937,...,38.904857,38.935283,60.510194,60.549699,60.527953,60.549702,44.81865,44.84646,44.806184,44.846421
2,3,16,16,25,localhost220513100001,A09,0.424681,0.416474,0.167123,0.467472,...,38.207461,38.226081,38.139204,38.106845,38.133159,38.106841,50.20008,50.285655,50.230629,50.285723
3,4,30,30,44,localhost220513100001,A09,0.254302,0.232419,0.03866,0.296697,...,50.203604,50.256349,106.29729,106.425268,106.365254,106.430593,56.979249,57.034142,56.989758,57.034399
4,5,20,20,29,localhost220513100001,A09,0.223079,0.265874,0.035197,0.308074,...,64.981442,65.074773,56.298831,56.222616,56.305175,56.21861,93.277867,93.388141,93.274971,93.388373


## Extract image quality from sqlite file

In [8]:
image_quality_df = extract_utils.extract_image_features(image_quality_category, image_df, image_cols, strata)

print(image_quality_df.shape)
image_quality_df.head()

(512, 31)


Unnamed: 0,ImageNumber,Image_Count_Cells,Image_Count_Cytoplasm,Image_Count_Nuclei,Image_Metadata_Plate,Image_Metadata_Well,Image_ImageQuality_Correlation_Actin_20,Image_ImageQuality_Correlation_ER_20,Image_ImageQuality_Correlation_Golgi_20,Image_ImageQuality_Correlation_Hoechst_20,...,Image_ImageQuality_PowerLogLogSlope_Actin,Image_ImageQuality_PowerLogLogSlope_ER,Image_ImageQuality_PowerLogLogSlope_Golgi,Image_ImageQuality_PowerLogLogSlope_Hoechst,Image_ImageQuality_PowerLogLogSlope_Mitochondria,Image_ImageQuality_Scaling_Actin,Image_ImageQuality_Scaling_ER,Image_ImageQuality_Scaling_Golgi,Image_ImageQuality_Scaling_Hoechst,Image_ImageQuality_Scaling_Mitochondria
0,1,34,34,44,localhost220513100001,A09,0.237135,0.465877,0.431789,0.240001,...,-1.334364,-2.108479,-2.053092,-1.888484,-1.908435,255.0,255.0,255.0,255.0,255.0
1,2,23,23,37,localhost220513100001,A09,0.429162,0.489337,0.697063,0.417105,...,-1.652742,-2.600385,-2.600577,-2.806157,-2.19933,255.0,255.0,255.0,255.0,255.0
2,3,16,16,25,localhost220513100001,A09,0.628649,0.552359,0.477154,0.482263,...,-1.670223,-2.762701,-2.558045,-2.768658,-2.280276,255.0,255.0,255.0,255.0,255.0
3,4,30,30,44,localhost220513100001,A09,0.292559,0.397774,0.350588,0.313631,...,-1.418383,-2.170335,-2.142809,-2.207902,-2.006711,255.0,255.0,255.0,255.0,255.0
4,5,20,20,29,localhost220513100001,A09,0.274312,0.46267,0.487833,0.384392,...,-1.379144,-2.261329,-2.368629,-1.815138,-2.082494,255.0,255.0,255.0,255.0,255.0


## Merge platemap metadata with extracted image features and image quality

In [9]:
## Uses pycytominer annotate functionality to merge the platemap and image features and reorder the dataframe
image_features_df = annotate(
    profiles=image_features_df,
    platemap=platemap_df,
    join_on=["Metadata_well_position", "Image_Metadata_Well"],
    output_file="none",
)

image_quality_df = annotate(
    profiles=image_quality_df,
    platemap=platemap_df,
    join_on=["Metadata_well_position", "Image_Metadata_Well"],
    output_file="none",
)

## Save image features data frame as `csv.gz` file

In [10]:
# Save image feature data as a csv
output(image_features_df, image_features_output_file)

print(image_features_df.shape)
image_features_df.head()

(512, 516)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_treatment,Metadata_dose,Metadata_Plate,Metadata_Well,ImageNumber,Image_Count_Cells,Image_Count_Cytoplasm,...,Image_Texture_Variance_Golgi_3_02_256,Image_Texture_Variance_Golgi_3_03_256,Image_Texture_Variance_Hoechst_3_00_256,Image_Texture_Variance_Hoechst_3_01_256,Image_Texture_Variance_Hoechst_3_02_256,Image_Texture_Variance_Hoechst_3_03_256,Image_Texture_Variance_Mitochondria_3_00_256,Image_Texture_Variance_Mitochondria_3_01_256,Image_Texture_Variance_Mitochondria_3_02_256,Image_Texture_Variance_Mitochondria_3_03_256
0,A,9,9,drug_x,5uM,localhost220513100001,A09,1,34,34,...,52.706348,52.74987,91.719909,91.54343,91.470904,91.54182,65.783776,65.846346,65.777592,65.846211
1,A,9,9,drug_x,5uM,localhost220513100001,A09,2,23,23,...,38.904857,38.935283,60.510194,60.549699,60.527953,60.549702,44.81865,44.84646,44.806184,44.846421
2,A,9,9,drug_x,5uM,localhost220513100001,A09,3,16,16,...,38.207461,38.226081,38.139204,38.106845,38.133159,38.106841,50.20008,50.285655,50.230629,50.285723
3,A,9,9,drug_x,5uM,localhost220513100001,A09,4,30,30,...,50.203604,50.256349,106.29729,106.425268,106.365254,106.430593,56.979249,57.034142,56.989758,57.034399
4,A,9,9,drug_x,5uM,localhost220513100001,A09,5,20,20,...,64.981442,65.074773,56.298831,56.222616,56.305175,56.21861,93.277867,93.388141,93.274971,93.388373


## Save image quality dataframe as `csv.gz` file

In [11]:
# Save image feature data as a csv
output(image_quality_df, image_quality_output_file)

print(image_quality_df.shape)
image_quality_df.head()

(512, 36)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_treatment,Metadata_dose,Metadata_Plate,Metadata_Well,ImageNumber,Image_Count_Cells,Image_Count_Cytoplasm,...,Image_ImageQuality_PowerLogLogSlope_Actin,Image_ImageQuality_PowerLogLogSlope_ER,Image_ImageQuality_PowerLogLogSlope_Golgi,Image_ImageQuality_PowerLogLogSlope_Hoechst,Image_ImageQuality_PowerLogLogSlope_Mitochondria,Image_ImageQuality_Scaling_Actin,Image_ImageQuality_Scaling_ER,Image_ImageQuality_Scaling_Golgi,Image_ImageQuality_Scaling_Hoechst,Image_ImageQuality_Scaling_Mitochondria
0,A,9,9,drug_x,5uM,localhost220513100001,A09,1,34,34,...,-1.334364,-2.108479,-2.053092,-1.888484,-1.908435,255.0,255.0,255.0,255.0,255.0
1,A,9,9,drug_x,5uM,localhost220513100001,A09,2,23,23,...,-1.652742,-2.600385,-2.600577,-2.806157,-2.19933,255.0,255.0,255.0,255.0,255.0
2,A,9,9,drug_x,5uM,localhost220513100001,A09,3,16,16,...,-1.670223,-2.762701,-2.558045,-2.768658,-2.280276,255.0,255.0,255.0,255.0,255.0
3,A,9,9,drug_x,5uM,localhost220513100001,A09,4,30,30,...,-1.418383,-2.170335,-2.142809,-2.207902,-2.006711,255.0,255.0,255.0,255.0,255.0
4,A,9,9,drug_x,5uM,localhost220513100001,A09,5,20,20,...,-1.379144,-2.261329,-2.368629,-1.815138,-2.082494,255.0,255.0,255.0,255.0,255.0


## View info of the dataframe

In [12]:
image_features_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 512 entries, 0 to 511
Columns: 516 entries, Metadata_WellRow to Image_Texture_Variance_Mitochondria_3_03_256
dtypes: float64(500), int64(11), object(5)
memory usage: 2.0+ MB


In [13]:
image_quality_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 512 entries, 0 to 511
Data columns (total 36 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   Metadata_WellRow                                    512 non-null    object 
 1   Metadata_WellCol                                    512 non-null    int64  
 2   Metadata_heart_number                               512 non-null    int64  
 3   Metadata_treatment                                  512 non-null    object 
 4   Metadata_dose                                       512 non-null    object 
 5   Metadata_Plate                                      512 non-null    object 
 6   Metadata_Well                                       512 non-null    object 
 7   ImageNumber                                         512 non-null    int64  
 8   Image_Count_Cells                                   512 non-null    int64  
 9  