Skip to content

Commit

Permalink
Merge pull request WayScience#35 from jenna-tomkinson/plate2_cellprof…
Browse files Browse the repository at this point in the history
…iler

run plate 2 through cp and extract sc
  • Loading branch information
jenna-tomkinson committed Jan 23, 2023
2 parents 263a94c + 310c12f commit 0ba2392
Show file tree
Hide file tree
Showing 13 changed files with 1,749 additions and 40 deletions.
Binary file modified 4_processing_features/data/nf1_sc_cellprofiler.csv.gz
Binary file not shown.
Binary file not shown.
Binary file modified 4_processing_features/data/nf1_sc_norm_cellprofiler.csv.gz
Binary file not shown.
Binary file not shown.
Binary file modified 4_processing_features/data/nf1_sc_norm_fs_cellprofiler.csv.gz
Binary file not shown.
Binary file not shown.
1,591 changes: 1,567 additions & 24 deletions 4_processing_features/extract_sc_features_cp.ipynb

Large diffs are not rendered by default.

163 changes: 147 additions & 16 deletions 4_processing_features/extract_sc_features_cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pycytominer.cyto_utils import cells, output


# ## Set up paths to CellProfiler directory and output
# ## Set up paths to CellProfiler directory and outputs

# In[2]:

Expand All @@ -24,9 +24,15 @@
cp_dir = "../CellProfiler_pipelines"
output_dir = "data"


# ### Plate 1

# In[3]:


# Set name and path of .sqlite file and path to metadata
sql_file = "NF1_data.sqlite"
single_cell_file = f"sqlite:///{cp_dir}/Analysis_Output/{sql_file}"
single_cell_file = f"sqlite:///{cp_dir}/Analysis_Output/Plate1_Output/{sql_file}"
platemap_file = f"{cp_dir}/Metadata/platemap_NF1_CP.csv"

# Set path with name for outputted data
Expand All @@ -35,9 +41,25 @@
sc_norm_fs_output_file = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofiler.csv.gz")


# ### Plate 2

# In[4]:


# Set name and path of .sqlite file and path to metadata
sql_file2 = "NF1_data_plate2.sqlite"
single_cell_file2 = f"sqlite:///{cp_dir}/Analysis_Output/Plate2_Output/{sql_file2}"
platemap_file2 = f"{cp_dir}/Metadata/platemap_NF1_CP_Plate2.csv"

# Set path with name for outputted data
sc_output_file2 = pathlib.Path(f"{output_dir}/nf1_sc_cellprofiler_plate2.csv.gz")
sc_norm_output_file2 = pathlib.Path(f"{output_dir}/nf1_sc_norm_cellprofiler_plate2.csv.gz")
sc_norm_fs_output_file2 = pathlib.Path(f"{output_dir}/nf1_sc_norm_fs_cellprofiler_plate2.csv.gz")


# ## Set up names for linking columns between tables in the database file

# In[3]:
# In[5]:


# Define custom linking columns between compartments
Expand All @@ -51,19 +73,21 @@
}


# ## Load and view platemap file
# ## Plate 1

# In[4]:
# ### Load and view platemap file

# In[6]:


# Load platemap file
platemap_df = pd.read_csv(platemap_file)
platemap_df


# ## Set up `SingleCells` class from Pycytominer
# ### Set up `SingleCells` class from Pycytominer

# In[5]:
# In[7]:


# Instantiate SingleCells class
Expand All @@ -79,9 +103,9 @@
)


# ## Merge single cells
# ### Merge single cells

# In[6]:
# In[8]:


# Merge single cells across compartments
Expand All @@ -99,9 +123,9 @@
sc_df.head()


# ## Normalize Data
# ### Normalize Data

# In[7]:
# In[9]:


# Normalize single cell data and write to file
Expand All @@ -116,9 +140,9 @@
normalize_sc_df.head()


# ## Feature Selection
# ### Feature Selection

# In[8]:
# In[10]:


feature_select_ops = [
Expand All @@ -140,16 +164,123 @@

# ---
#
# ### Visualize basic count statistics
# ### Visualize basic count statistics for Plate 1

# In[9]:
# In[11]:


sc_df.Metadata_genotype.value_counts()


# In[10]:
# In[12]:


pd.crosstab(sc_df.Metadata_genotype, sc_df.Metadata_Well)


# ---
#
# ## Plate 2

# ### Load and view platemap file

# In[13]:


# Load platemap file
platemap_df2 = pd.read_csv(platemap_file2)
platemap_df2


# ### Set up `SingleCells` class from Pycytominer

# In[14]:


# Instantiate SingleCells class
sc2 = cells.SingleCells(
sql_file=single_cell_file2,
compartments=["Per_Cells", "Per_Cytoplasm", "Per_Nuclei"],
compartment_linking_cols=linking_cols,
image_table_name="Per_Image",
strata=["Image_Metadata_Well", "Image_Metadata_Plate"],
merge_cols=["ImageNumber"],
image_cols="ImageNumber",
load_image_data=True
)


# ### Merge single cells

# In[15]:


# Merge single cells across compartments
anno_kwargs = {"join_on": ["Metadata_well_position", "Image_Metadata_Well"]}

sc_df2 = sc2.merge_single_cells(
platemap=platemap_df2,
**anno_kwargs,
)

# Save level 2 data as a csv
output(sc_df2, sc_output_file2)

print(sc_df2.shape)
sc_df2.head()


# ### Normalize data

# In[16]:


# Normalize single cell data and write to file
normalize_sc_df2 = normalize(
sc_df2,
method="standardize"
)

output(normalize_sc_df2, sc_norm_output_file2)

print(normalize_sc_df2.shape)
normalize_sc_df2.head()


# ### Feature selection

# In[17]:


feature_select_ops = [
"variance_threshold",
"correlation_threshold",
"blocklist",
]

feature_select_norm_sc_df2 = feature_select(
normalize_sc_df2,
operation=feature_select_ops
)

output(feature_select_norm_sc_df2, sc_norm_fs_output_file2)

print(feature_select_norm_sc_df2.shape)
feature_select_norm_sc_df2.head()


# ---
#
# ### Visualize basic count statistics for Plate 2

# In[18]:


sc_df2.Metadata_genotype.value_counts()


# In[19]:


pd.crosstab(sc_df2.Metadata_genotype, sc_df2.Metadata_Well)

Binary file not shown.
2 changes: 2 additions & 0 deletions CellProfiler_pipelines/Metadata/barcode_platemap_Plate2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Assay_Plate_Barcode,Plate_Map_Name
2,platemap_NF1_CP_Plate2
33 changes: 33 additions & 0 deletions CellProfiler_pipelines/Metadata/platemap_NF1_CP_Plate2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
WellRow,WellCol,well_position,gene_name,genotype
A,1,A1,NF1,WT
A,6,A6,NF1,WT
A,7,A7,NF1,Null
A,12,A12,NF1,Null
B,1,B1,NF1,WT
B,6,B6,NF1,WT
B,7,B7,NF1,Null
B,12,B12,NF1,Null
C,1,C1,NF1,WT
C,6,C6,NF1,WT
C,7,C7,NF1,Null
C,12,C12,NF1,Null
D,1,D1,NF1,WT
D,6,D6,NF1,WT
D,7,D7,NF1,Null
D,12,D12,NF1,Null
E,1,E1,NF1,WT
E,6,E6,NF1,WT
E,7,E7,NF1,Null
E,12,E12,NF1,Null
F,1,F1,NF1,WT
F,6,F6,NF1,WT
F,7,F7,NF1,Null
F,12,F12,NF1,Null
G,1,G1,NF1,WT
G,6,G6,NF1,WT
G,7,G7,NF1,Null
G,12,G12,NF1,Null
H,1,H1,NF1,WT
H,6,H6,NF1,WT
H,7,H7,NF1,Null
H,12,H12,NF1,Null
Binary file modified CellProfiler_pipelines/Pipelines/NF1_analysis.cpproj
Binary file not shown.

0 comments on commit 0ba2392

Please sign in to comment.