forked from WayScience/Benchmarking_NF1_data
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add documentation and converted notebooks
- Loading branch information
1 parent
6b917a4
commit 347c191
Showing
5 changed files
with
811 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
302 changes: 302 additions & 0 deletions
302
4_processing_features/Plate1/extract_sc_features_cp_plate1.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,302 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# # Process single cell morphology features for CellProfiler readouts - Plate 1 | ||
|
||
# ## Import Libraries | ||
|
||
# In[1]: | ||
|
||
|
||
import pathlib | ||
import pandas as pd | ||
|
||
from pycytominer import normalize, feature_select | ||
from pycytominer.cyto_utils import cells, output | ||
|
||
|
||
# ## Set up paths to CellProfiler directory and outputs | ||
|
||
# In[2]: | ||
|
||
|
||
# Set file and directory constants | ||
cp_dir = "../../CellProfiler_pipelines" | ||
output_dir = "../data/Plate1/" | ||
|
||
|
||
# ## Set up paths to sqlite files and outputs | ||
|
||
# ### All CellProfiler Method | ||
|
||
# In[3]: | ||
|
||
|
||
# Set name and path of .sqlite file and path to metadata | ||
sql_file = "NF1_data_allcp_plate1.sqlite" | ||
single_cell_file = f"sqlite:///{cp_dir}/Analysis_Output/Plate1_Output/{sql_file}" | ||
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP.csv" | ||
|
||
# Set path with name for outputted data | ||
sc_output_file = pathlib.Path(f"{output_dir}/nf1_sc_cellprofiler.csv.gz") | ||
sc_norm_output_file = pathlib.Path(f"{output_dir}/nf1_sc_norm_cellprofiler.csv.gz") | ||
sc_norm_fs_output_file = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofiler.csv.gz") | ||
|
||
|
||
# ### PyBaSiC and CellProfiler Method | ||
|
||
# In[4]: | ||
|
||
|
||
# Set name and path of .sqlite file and path to metadata | ||
sql_file_pbcp = "NF1_data_pybasic_cp_plate1.sqlite" | ||
single_cell_file_pbcp = f"sqlite:///{cp_dir}/Analysis_Output/Plate1_Output/{sql_file}" | ||
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP.csv" | ||
|
||
# Set path with name for outputted data | ||
sc_output_file_pbcp = pathlib.Path(f"{output_dir}/nf1_sc_pybasic_cellprofiler.csv.gz") | ||
sc_norm_output_file_pbcp = pathlib.Path(f"{output_dir}/nf1_sc_norm_pybasic_cellprofiler.csv.gz") | ||
sc_norm_fs_output_file_pbcp = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_pybasic_cellprofiler.csv.gz") | ||
|
||
|
||
# ## Set up names for linking columns between tables in the database file | ||
|
||
# In[5]: | ||
|
||
|
||
# Define custom linking columns between compartments | ||
linking_cols = { | ||
"Per_Cytoplasm": { | ||
"Per_Cells": "Cytoplasm_Parent_Cells", | ||
"Per_Nuclei": "Cytoplasm_Parent_Nuclei", | ||
}, | ||
"Per_Cells": {"Per_Cytoplasm": "Cells_Number_Object_Number"}, | ||
"Per_Nuclei": {"Per_Cytoplasm": "Nuclei_Number_Object_Number"}, | ||
} | ||
|
||
|
||
# ## All CellProfiler Method | ||
|
||
# ### Load and view platemap file | ||
|
||
# In[6]: | ||
|
||
|
||
# Load platemap file | ||
platemap_df = pd.read_csv(platemap_file) | ||
platemap_df | ||
|
||
|
||
# ### Set up `SingleCells` class from Pycytominer | ||
|
||
# In[7]: | ||
|
||
|
||
# Instantiate SingleCells class | ||
sc = cells.SingleCells( | ||
sql_file=single_cell_file, | ||
compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"], | ||
compartment_linking_cols=linking_cols, | ||
image_table_name="Per_Image", | ||
strata=["Image_Metadata_Well", "Image_Metadata_Plate"], | ||
merge_cols=["ImageNumber"], | ||
image_cols="ImageNumber", | ||
load_image_data=True | ||
) | ||
|
||
|
||
# ### Merge single cells | ||
|
||
# In[8]: | ||
|
||
|
||
# Merge single cells across compartments | ||
anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]} | ||
|
||
sc_df = sc.merge_single_cells( | ||
platemap=platemap_df, | ||
**anno_kwargs, | ||
) | ||
|
||
# Save level 2 data as a csv | ||
output(sc_df, sc_output_file) | ||
|
||
print(sc_df.shape) | ||
sc_df.head() | ||
|
||
|
||
# ### Normalize Data | ||
|
||
# In[9]: | ||
|
||
|
||
# Normalize single cell data and write to file | ||
normalize_sc_df = normalize( | ||
sc_df, | ||
method="standardize" | ||
) | ||
|
||
output(normalize_sc_df, sc_norm_output_file) | ||
|
||
print(normalize_sc_df.shape) | ||
normalize_sc_df.head() | ||
|
||
|
||
# ### Feature Selection | ||
|
||
# In[10]: | ||
|
||
|
||
feature_select_ops = [ | ||
"variance_threshold", | ||
"correlation_threshold", | ||
"blocklist", | ||
] | ||
|
||
feature_select_norm_sc_df = feature_select( | ||
normalize_sc_df, | ||
operation=feature_select_ops | ||
) | ||
|
||
output(feature_select_norm_sc_df, sc_norm_fs_output_file) | ||
|
||
print(feature_select_norm_sc_df.shape) | ||
feature_select_norm_sc_df.head() | ||
|
||
|
||
# --- | ||
# | ||
# ### Visualize basic count statistics for All CellProfiler Method | ||
|
||
# In[11]: | ||
|
||
|
||
sc_df.Metadata_genotype.value_counts() | ||
|
||
|
||
# In[12]: | ||
|
||
|
||
pd.crosstab(sc_df.Metadata_genotype, sc_df.Metadata_Well) | ||
|
||
|
||
# --- | ||
# | ||
# ## PyBaSiC and CellProfiler Method | ||
|
||
# In[13]: | ||
|
||
|
||
# Set name and path of .sqlite file and path to metadata | ||
sql_file_pbcp = "NF1_data_pybasic_cp_plate1.sqlite" | ||
single_cell_file_pbcp = f"sqlite:///{cp_dir}/Analysis_Output/Plate1_Output/{sql_file}" | ||
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP.csv" | ||
|
||
# Set path with name for outputted data | ||
sc_output_file_pbcp = pathlib.Path(f"{output_dir}/nf1_sc_pybasic_cellprofiler.csv.gz") | ||
sc_norm_output_file_pbcp = pathlib.Path(f"{output_dir}/nf1_sc_norm_pybasic_cellprofiler.csv.gz") | ||
sc_norm_fs_output_file_pbcp = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_pybasic_cellprofiler.csv.gz") | ||
|
||
|
||
# ### Load and view platemap file | ||
|
||
# In[14]: | ||
|
||
|
||
# Load platemap file | ||
platemap_file = pd.read_csv(platemap_file) | ||
platemap_file.head() | ||
|
||
|
||
# ### Set up `SingleCells` class from Pycytominer | ||
|
||
# In[15]: | ||
|
||
|
||
# Instantiate SingleCells class | ||
sc_pbcp = cells.SingleCells( | ||
sql_file=single_cell_file_pbcp, | ||
compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"], | ||
compartment_linking_cols=linking_cols, | ||
image_table_name="Per_Image", | ||
strata=["Image_Metadata_Well", "Image_Metadata_Plate"], | ||
merge_cols=["ImageNumber"], | ||
image_cols="ImageNumber", | ||
load_image_data=True | ||
) | ||
|
||
|
||
# ### Merge single cells | ||
|
||
# In[16]: | ||
|
||
|
||
# Merge single cells across compartments | ||
anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]} | ||
|
||
sc_df_pbcp = sc_pbcp.merge_single_cells( | ||
platemap=platemap_file, | ||
**anno_kwargs, | ||
) | ||
|
||
# Save level 2 data as a csv | ||
output(sc_df_pbcp, sc_output_file_pbcp) | ||
|
||
print(sc_df_pbcp.shape) | ||
sc_df_pbcp.head() | ||
|
||
|
||
# ### Normalize data | ||
|
||
# In[17]: | ||
|
||
|
||
# Normalize single cell data and write to file | ||
normalize_sc_pbcp = normalize( | ||
sc_df_pbcp, | ||
method="standardize" | ||
) | ||
|
||
output(normalize_sc_pbcp, sc_norm_output_file_pbcp) | ||
|
||
print(normalize_sc_pbcp.shape) | ||
normalize_sc_pbcp.head() | ||
|
||
|
||
# ### Feature selection | ||
|
||
# In[18]: | ||
|
||
|
||
feature_select_ops = [ | ||
"variance_threshold", | ||
"correlation_threshold", | ||
"blocklist", | ||
] | ||
|
||
feature_select_norm_sc_pbcp = feature_select( | ||
normalize_sc_pbcp, | ||
operation=feature_select_ops | ||
) | ||
|
||
output(feature_select_norm_sc_pbcp, sc_norm_fs_output_file_pbcp) | ||
|
||
print(feature_select_norm_sc_pbcp.shape) | ||
feature_select_norm_sc_pbcp.head() | ||
|
||
|
||
# --- | ||
# | ||
# ### Visualize basic count statistics for PyBaSiC and CellProfiler Method | ||
|
||
# In[19]: | ||
|
||
|
||
sc_df_pbcp.Metadata_genotype.value_counts() | ||
|
||
|
||
# In[20]: | ||
|
||
|
||
pd.crosstab(sc_df_pbcp.Metadata_genotype, sc_df_pbcp.Metadata_Well) | ||
|
Oops, something went wrong.