Skip to content

Commit

Permalink
Merge pull request WayScience#21 from jenna-tomkinson/nf1_statistics
Browse files Browse the repository at this point in the history
NF1 UMAP Stats
  • Loading branch information
jenna-tomkinson committed Jan 11, 2023
2 parents 610854c + 283d170 commit 8da88a4
Show file tree
Hide file tree
Showing 21 changed files with 1,761 additions and 10 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,8 @@ __pycache__/
NF1_test
NF1_Second_Plate_Old

# Ignore test notebooks
# ignore test notebooks
example_code.ipynb

# ignore deprecated notebooks
5_analyze_data/notebooks/UMAP_analysis/deprecated_notebooks
Binary file modified 4_processing_features/data/nf1_sc_cellprofiler.csv.gz
Binary file not shown.
Binary file modified 4_processing_features/data/nf1_sc_norm_cellprofiler.csv.gz
Binary file not shown.
Binary file not shown.
340 changes: 334 additions & 6 deletions 4_processing_features/extract_single_cell_features.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"import pathlib\n",
"import pandas as pd\n",
"\n",
"from pycytominer import normalize\n",
"from pycytominer import normalize, feature_select\n",
"from pycytominer.cyto_utils import cells, output"
]
},
Expand Down Expand Up @@ -56,7 +56,8 @@
"\n",
"# Set path with name for outputted data\n",
"sc_output_file = pathlib.Path(f\"{output_dir}/nf1_sc_cellprofiler.csv.gz\")\n",
"sc_norm_output_file = pathlib.Path(f\"{output_dir}/nf1_sc_norm_cellprofiler.csv.gz\")"
"sc_norm_output_file = pathlib.Path(f\"{output_dir}/nf1_sc_norm_cellprofiler.csv.gz\")\n",
"sc_norm_fs_output_file = pathlib.Path(f\"{output_dir}/nf1_sc_norm_fs_cellprofiler.csv.gz\")"
]
},
{
Expand Down Expand Up @@ -825,6 +826,333 @@
"normalize_sc_df.head()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "bba06013",
"metadata": {},
"source": [
"## Feature Selection"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "82d1718c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/jenna/anaconda3/envs/4.process-nf1-features/lib/python3.8/site-packages/numpy/lib/function_base.py:2829: RuntimeWarning: invalid value encountered in true_divide\n",
" c /= stddev[:, None]\n",
"/home/jenna/anaconda3/envs/4.process-nf1-features/lib/python3.8/site-packages/numpy/lib/function_base.py:2830: RuntimeWarning: invalid value encountered in true_divide\n",
" c /= stddev[None, :]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(149, 443)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Metadata_WellRow</th>\n",
" <th>Metadata_WellCol</th>\n",
" <th>Metadata_gene_name</th>\n",
" <th>Metadata_genotype</th>\n",
" <th>Metadata_ImageNumber</th>\n",
" <th>Metadata_Plate</th>\n",
" <th>Metadata_Well</th>\n",
" <th>Metadata_Cytoplasm_Parent_Cells</th>\n",
" <th>Metadata_Cytoplasm_Parent_OrigNuclei</th>\n",
" <th>Metadata_Cells_Number_Object_Number</th>\n",
" <th>...</th>\n",
" <th>Nuclei_Texture_InfoMeas1_RFP_3_03_256</th>\n",
" <th>Nuclei_Texture_InfoMeas2_GFP_3_00_256</th>\n",
" <th>Nuclei_Texture_InfoMeas2_RFP_3_00_256</th>\n",
" <th>Nuclei_Texture_InfoMeas2_RFP_3_01_256</th>\n",
" <th>Nuclei_Texture_InfoMeas2_RFP_3_02_256</th>\n",
" <th>Nuclei_Texture_InfoMeas2_RFP_3_03_256</th>\n",
" <th>Nuclei_Texture_InverseDifferenceMoment_GFP_3_03_256</th>\n",
" <th>Nuclei_Texture_InverseDifferenceMoment_RFP_3_00_256</th>\n",
" <th>Nuclei_Texture_InverseDifferenceMoment_RFP_3_03_256</th>\n",
" <th>Nuclei_Texture_SumVariance_RFP_3_01_256</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>NF1</td>\n",
" <td>WT</td>\n",
" <td>1</td>\n",
" <td>001</td>\n",
" <td>C6</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>-1.197488</td>\n",
" <td>0.289091</td>\n",
" <td>0.969456</td>\n",
" <td>1.131385</td>\n",
" <td>1.303680</td>\n",
" <td>1.416917</td>\n",
" <td>-0.079438</td>\n",
" <td>-2.315521</td>\n",
" <td>-1.693210</td>\n",
" <td>2.881199</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>NF1</td>\n",
" <td>WT</td>\n",
" <td>1</td>\n",
" <td>001</td>\n",
" <td>C6</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>0.188414</td>\n",
" <td>0.611666</td>\n",
" <td>0.481954</td>\n",
" <td>0.748184</td>\n",
" <td>0.750277</td>\n",
" <td>0.511083</td>\n",
" <td>-0.065958</td>\n",
" <td>-1.460076</td>\n",
" <td>-1.427579</td>\n",
" <td>0.304121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>NF1</td>\n",
" <td>WT</td>\n",
" <td>1</td>\n",
" <td>001</td>\n",
" <td>C6</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>...</td>\n",
" <td>-1.087258</td>\n",
" <td>0.843883</td>\n",
" <td>-0.214887</td>\n",
" <td>0.238299</td>\n",
" <td>0.482832</td>\n",
" <td>1.264950</td>\n",
" <td>-0.069749</td>\n",
" <td>-1.841707</td>\n",
" <td>-0.798368</td>\n",
" <td>0.257284</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>NF1</td>\n",
" <td>WT</td>\n",
" <td>1</td>\n",
" <td>001</td>\n",
" <td>C6</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>-1.250742</td>\n",
" <td>0.638684</td>\n",
" <td>1.163023</td>\n",
" <td>1.062039</td>\n",
" <td>1.082605</td>\n",
" <td>1.386850</td>\n",
" <td>-0.272864</td>\n",
" <td>-1.789888</td>\n",
" <td>-1.432404</td>\n",
" <td>1.083761</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>C</td>\n",
" <td>6</td>\n",
" <td>NF1</td>\n",
" <td>WT</td>\n",
" <td>4</td>\n",
" <td>001</td>\n",
" <td>C6</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>-0.258815</td>\n",
" <td>-2.222128</td>\n",
" <td>-0.048779</td>\n",
" <td>0.504843</td>\n",
" <td>1.340830</td>\n",
" <td>0.924382</td>\n",
" <td>0.612704</td>\n",
" <td>-2.158178</td>\n",
" <td>-1.781201</td>\n",
" <td>0.518641</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 443 columns</p>\n",
"</div>"
],
"text/plain": [
" Metadata_WellRow Metadata_WellCol Metadata_gene_name Metadata_genotype \\\n",
"0 C 6 NF1 WT \n",
"1 C 6 NF1 WT \n",
"2 C 6 NF1 WT \n",
"3 C 6 NF1 WT \n",
"4 C 6 NF1 WT \n",
"\n",
" Metadata_ImageNumber Metadata_Plate Metadata_Well \\\n",
"0 1 001 C6 \n",
"1 1 001 C6 \n",
"2 1 001 C6 \n",
"3 1 001 C6 \n",
"4 4 001 C6 \n",
"\n",
" Metadata_Cytoplasm_Parent_Cells Metadata_Cytoplasm_Parent_OrigNuclei \\\n",
"0 1 4 \n",
"1 2 5 \n",
"2 3 7 \n",
"3 4 8 \n",
"4 1 3 \n",
"\n",
" Metadata_Cells_Number_Object_Number ... \\\n",
"0 1 ... \n",
"1 2 ... \n",
"2 3 ... \n",
"3 4 ... \n",
"4 1 ... \n",
"\n",
" Nuclei_Texture_InfoMeas1_RFP_3_03_256 \\\n",
"0 -1.197488 \n",
"1 0.188414 \n",
"2 -1.087258 \n",
"3 -1.250742 \n",
"4 -0.258815 \n",
"\n",
" Nuclei_Texture_InfoMeas2_GFP_3_00_256 \\\n",
"0 0.289091 \n",
"1 0.611666 \n",
"2 0.843883 \n",
"3 0.638684 \n",
"4 -2.222128 \n",
"\n",
" Nuclei_Texture_InfoMeas2_RFP_3_00_256 \\\n",
"0 0.969456 \n",
"1 0.481954 \n",
"2 -0.214887 \n",
"3 1.163023 \n",
"4 -0.048779 \n",
"\n",
" Nuclei_Texture_InfoMeas2_RFP_3_01_256 \\\n",
"0 1.131385 \n",
"1 0.748184 \n",
"2 0.238299 \n",
"3 1.062039 \n",
"4 0.504843 \n",
"\n",
" Nuclei_Texture_InfoMeas2_RFP_3_02_256 \\\n",
"0 1.303680 \n",
"1 0.750277 \n",
"2 0.482832 \n",
"3 1.082605 \n",
"4 1.340830 \n",
"\n",
" Nuclei_Texture_InfoMeas2_RFP_3_03_256 \\\n",
"0 1.416917 \n",
"1 0.511083 \n",
"2 1.264950 \n",
"3 1.386850 \n",
"4 0.924382 \n",
"\n",
" Nuclei_Texture_InverseDifferenceMoment_GFP_3_03_256 \\\n",
"0 -0.079438 \n",
"1 -0.065958 \n",
"2 -0.069749 \n",
"3 -0.272864 \n",
"4 0.612704 \n",
"\n",
" Nuclei_Texture_InverseDifferenceMoment_RFP_3_00_256 \\\n",
"0 -2.315521 \n",
"1 -1.460076 \n",
"2 -1.841707 \n",
"3 -1.789888 \n",
"4 -2.158178 \n",
"\n",
" Nuclei_Texture_InverseDifferenceMoment_RFP_3_03_256 \\\n",
"0 -1.693210 \n",
"1 -1.427579 \n",
"2 -0.798368 \n",
"3 -1.432404 \n",
"4 -1.781201 \n",
"\n",
" Nuclei_Texture_SumVariance_RFP_3_01_256 \n",
"0 2.881199 \n",
"1 0.304121 \n",
"2 0.257284 \n",
"3 1.083761 \n",
"4 0.518641 \n",
"\n",
"[5 rows x 443 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feature_select_ops = [\n",
" \"variance_threshold\",\n",
" \"correlation_threshold\",\n",
" \"blocklist\",\n",
"]\n",
"\n",
"feature_select_norm_sc_df = feature_select(\n",
" normalize_sc_df,\n",
" operation=feature_select_ops\n",
")\n",
"\n",
"output(feature_select_norm_sc_df, sc_norm_fs_output_file)\n",
"\n",
"print(feature_select_norm_sc_df.shape)\n",
"feature_select_norm_sc_df.head()"
]
},
{
"cell_type": "markdown",
"id": "480448ba-a0fc-4c4f-94e2-311543dce6df",
Expand All @@ -837,7 +1165,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "0468ba4c-f6f2-4597-a930-ca20c20ee1bf",
"metadata": {},
"outputs": [
Expand All @@ -849,7 +1177,7 @@
"Name: Metadata_genotype, dtype: int64"
]
},
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -860,7 +1188,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "dda21ea5-1111-4f75-8562-269ce5540160",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -940,7 +1268,7 @@
"WT 12 0 5 0 9 0 7 0"
]
},
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
Loading

0 comments on commit 8da88a4

Please sign in to comment.