Merge pull request WayScience#35 from jenna-tomkinson/plate2_cellprof…

…iler run plate 2 through cp and extract sc
d33bs · Jan 23, 2023 · 0ba2392 · 0ba2392
2 parents 263a94c + 310c12f
commit 0ba2392
Show file tree

Hide file tree

Showing 13 changed files with 1,749 additions and 40 deletions.
diff --git a/4_processing_features/data/nf1_sc_cellprofiler.csv.gz b/4_processing_features/data/nf1_sc_cellprofiler.csv.gz
diff --git a/4_processing_features/data/nf1_sc_cellprofiler_plate2.csv.gz b/4_processing_features/data/nf1_sc_cellprofiler_plate2.csv.gz
diff --git a/4_processing_features/data/nf1_sc_norm_cellprofiler.csv.gz b/4_processing_features/data/nf1_sc_norm_cellprofiler.csv.gz
diff --git a/4_processing_features/data/nf1_sc_norm_cellprofiler_plate2.csv.gz b/4_processing_features/data/nf1_sc_norm_cellprofiler_plate2.csv.gz
diff --git a/4_processing_features/data/nf1_sc_norm_fs_cellprofiler.csv.gz b/4_processing_features/data/nf1_sc_norm_fs_cellprofiler.csv.gz
diff --git a/4_processing_features/data/nf1_sc_norm_fs_cellprofiler_plate2.csv.gz b/4_processing_features/data/nf1_sc_norm_fs_cellprofiler_plate2.csv.gz
diff --git a/4_processing_features/extract_sc_features_cp.ipynb b/4_processing_features/extract_sc_features_cp.ipynb
diff --git a/4_processing_features/extract_sc_features_cp.py b/4_processing_features/extract_sc_features_cp.py
@@ -15,7 +15,7 @@
 from pycytominer.cyto_utils import cells, output
 
 
-# ## Set up paths to CellProfiler directory and output
+# ## Set up paths to CellProfiler directory and outputs
 
 # In[2]:
 
@@ -24,9 +24,15 @@
 cp_dir = "../CellProfiler_pipelines"
 output_dir = "data"
 
+
+# ### Plate 1
+
+# In[3]:
+
+
 # Set name and path of .sqlite file and path to metadata
 sql_file = "NF1_data.sqlite"
-single_cell_file = f"sqlite:///{cp_dir}/Analysis_Output/{sql_file}"
+single_cell_file = f"sqlite:///{cp_dir}/Analysis_Output/Plate1_Output/{sql_file}"
 platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP.csv"
 
 # Set path with name for outputted data
@@ -35,9 +41,25 @@
 sc_norm_fs_output_file = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofiler.csv.gz")
 
 
+# ### Plate 2
+
+# In[4]:
+
+
+# Set name and path of .sqlite file and path to metadata
+sql_file2 = "NF1_data_plate2.sqlite"
+single_cell_file2 = f"sqlite:///{cp_dir}/Analysis_Output/Plate2_Output/{sql_file2}"
+platemap_file2 = f"{cp_dir}/Metadata/platemap_NF1_CP_Plate2.csv"
+
+# Set path with name for outputted data
+sc_output_file2 = pathlib.Path(f"{output_dir}/nf1_sc_cellprofiler_plate2.csv.gz")
+sc_norm_output_file2 = pathlib.Path(f"{output_dir}/nf1_sc_norm_cellprofiler_plate2.csv.gz")
+sc_norm_fs_output_file2 = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofiler_plate2.csv.gz")
+
+
 # ## Set up names for linking columns between tables in the database file
 
-# In[3]:
+# In[5]:
 
 
 # Define custom linking columns between compartments
@@ -51,19 +73,21 @@
 }
 
 
-# ## Load and view platemap file
+# ## Plate 1
 
-# In[4]:
+# ### Load and view platemap file
+
+# In[6]:
 
 
 # Load platemap file
 platemap_df = pd.read_csv(platemap_file)
 platemap_df
 
 
-# ## Set up `SingleCells` class from Pycytominer
+# ### Set up `SingleCells` class from Pycytominer
 
-# In[5]:
+# In[7]:
 
 
 # Instantiate SingleCells class
@@ -79,9 +103,9 @@
 )
 
 
-# ## Merge single cells 
+# ### Merge single cells 
 
-# In[6]:
+# In[8]:
 
 
 # Merge single cells across compartments
@@ -99,9 +123,9 @@
 sc_df.head()
 
 
-# ## Normalize Data
+# ### Normalize Data
 
-# In[7]:
+# In[9]:
 
 
 # Normalize single cell data and write to file
@@ -116,9 +140,9 @@
 normalize_sc_df.head()
 
 
-# ## Feature Selection
+# ### Feature Selection
 
-# In[8]:
+# In[10]:
 
 
 feature_select_ops = [
@@ -140,16 +164,123 @@
 
 # ---
 # 
-# ### Visualize basic count statistics
+# ### Visualize basic count statistics for Plate 1
 
-# In[9]:
+# In[11]:
 
 
 sc_df.Metadata_genotype.value_counts()
 
 
-# In[10]:
+# In[12]:
 
 
 pd.crosstab(sc_df.Metadata_genotype, sc_df.Metadata_Well)
 
+
+# ---
+# 
+# ## Plate 2
+
+# ### Load and view platemap file
+
+# In[13]:
+
+
+# Load platemap file
+platemap_df2 = pd.read_csv(platemap_file2)
+platemap_df2
+
+
+# ### Set up `SingleCells` class from Pycytominer
+
+# In[14]:
+
+
+# Instantiate SingleCells class
+sc2 = cells.SingleCells(
+    sql_file=single_cell_file2,
+    compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
+    compartment_linking_cols=linking_cols,
+    image_table_name="Per_Image",
+    strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
+    merge_cols=["ImageNumber"],
+    image_cols="ImageNumber",
+    load_image_data=True
+)
+
+
+# ### Merge single cells 
+
+# In[15]:
+
+
+# Merge single cells across compartments
+anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}
+
+sc_df2 = sc2.merge_single_cells(
+    platemap=platemap_df2,
+    **anno_kwargs,
+)
+
+# Save level 2 data as a csv
+output(sc_df2, sc_output_file2)
+
+print(sc_df2.shape)
+sc_df2.head()
+
+
+# ### Normalize data
+
+# In[16]:
+
+
+# Normalize single cell data and write to file
+normalize_sc_df2 = normalize(
+    sc_df2,
+    method="standardize"
+)
+
+output(normalize_sc_df2, sc_norm_output_file2)
+
+print(normalize_sc_df2.shape)
+normalize_sc_df2.head()
+
+
+# ### Feature selection
+
+# In[17]:
+
+
+feature_select_ops = [
+    "variance_threshold",
+    "correlation_threshold",
+    "blocklist",
+]
+
+feature_select_norm_sc_df2 = feature_select(
+    normalize_sc_df2,
+    operation=feature_select_ops
+)
+
+output(feature_select_norm_sc_df2, sc_norm_fs_output_file2)
+
+print(feature_select_norm_sc_df2.shape)
+feature_select_norm_sc_df2.head()
+
+
+# ---
+# 
+# ### Visualize basic count statistics for Plate 2
+
+# In[18]:
+
+
+sc_df2.Metadata_genotype.value_counts()
+
+
+# In[19]:
+
+
+pd.crosstab(sc_df2.Metadata_genotype, sc_df2.Metadata_Well)
+
diff --git a/...pipelines/Analysis_Output/NF1_data.sqlite → ...ysis_Output/Plate1_Output/NF1_data.sqlite b/...pipelines/Analysis_Output/NF1_data.sqlite → ...ysis_Output/Plate1_Output/NF1_data.sqlite
diff --git a/CellProfiler_pipelines/Analysis_Output/Plate2_Output/NF1_data_plate2.sqlite b/CellProfiler_pipelines/Analysis_Output/Plate2_Output/NF1_data_plate2.sqlite
diff --git a/CellProfiler_pipelines/Metadata/barcode_platemap_Plate2.csv b/CellProfiler_pipelines/Metadata/barcode_platemap_Plate2.csv
@@ -0,0 +1,2 @@
+Assay_Plate_Barcode,Plate_Map_Name
+2,platemap_NF1_CP_Plate2
diff --git a/CellProfiler_pipelines/Metadata/platemap_NF1_CP_Plate2.csv b/CellProfiler_pipelines/Metadata/platemap_NF1_CP_Plate2.csv
@@ -0,0 +1,33 @@
+WellRow,WellCol,well_position,gene_name,genotype
+A,1,A1,NF1,WT
+A,6,A6,NF1,WT
+A,7,A7,NF1,Null
+A,12,A12,NF1,Null
+B,1,B1,NF1,WT
+B,6,B6,NF1,WT
+B,7,B7,NF1,Null
+B,12,B12,NF1,Null
+C,1,C1,NF1,WT
+C,6,C6,NF1,WT
+C,7,C7,NF1,Null
+C,12,C12,NF1,Null
+D,1,D1,NF1,WT
+D,6,D6,NF1,WT
+D,7,D7,NF1,Null
+D,12,D12,NF1,Null
+E,1,E1,NF1,WT
+E,6,E6,NF1,WT
+E,7,E7,NF1,Null
+E,12,E12,NF1,Null
+F,1,F1,NF1,WT
+F,6,F6,NF1,WT
+F,7,F7,NF1,Null
+F,12,F12,NF1,Null
+G,1,G1,NF1,WT
+G,6,G6,NF1,WT
+G,7,G7,NF1,Null
+G,12,G12,NF1,Null
+H,1,H1,NF1,WT
+H,6,H6,NF1,WT
+H,7,H7,NF1,Null
+H,12,H12,NF1,Null
diff --git a/CellProfiler_pipelines/Pipelines/NF1_analysis.cpproj b/CellProfiler_pipelines/Pipelines/NF1_analysis.cpproj