diff --git a/5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.ipynb b/5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.ipynb
index 6255103..363121e 100644
--- a/5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.ipynb
+++ b/5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.ipynb
@@ -41,6 +41,13 @@
"execution_count": 2,
"metadata": {},
"outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(149, 444)\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -206,339 +213,111 @@
"
-1.781201 | \n",
" 0.518641 | \n",
" \n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 144 | \n",
- " F | \n",
- " 7 | \n",
- " 46 | \n",
- " NF1 | \n",
- " Null | \n",
- " 32 | \n",
- " 1 | \n",
- " F7 | \n",
- " 8 | \n",
- " 12 | \n",
- " ... | \n",
- " -0.730713 | \n",
- " -3.721664 | \n",
- " 0.344692 | \n",
- " 0.526792 | \n",
- " 0.444591 | \n",
- " 0.664847 | \n",
- " 1.259085 | \n",
- " 0.745004 | \n",
- " 0.968347 | \n",
- " -0.322389 | \n",
- "
\n",
- " \n",
- " 145 | \n",
- " F | \n",
- " 7 | \n",
- " 46 | \n",
- " NF1 | \n",
- " Null | \n",
- " 32 | \n",
- " 1 | \n",
- " F7 | \n",
- " 9 | \n",
- " 13 | \n",
- " ... | \n",
- " 0.925737 | \n",
- " -1.410478 | \n",
- " 0.047610 | \n",
- " -0.030179 | \n",
- " -0.897448 | \n",
- " -1.032699 | \n",
- " 0.849219 | \n",
- " 0.627234 | \n",
- " 0.096817 | \n",
- " -0.326481 | \n",
- "
\n",
- " \n",
- " 146 | \n",
- " F | \n",
- " 7 | \n",
- " 46 | \n",
- " NF1 | \n",
- " Null | \n",
- " 32 | \n",
- " 1 | \n",
- " F7 | \n",
- " 10 | \n",
- " 14 | \n",
- " ... | \n",
- " 1.010381 | \n",
- " -1.976427 | \n",
- " -1.631117 | \n",
- " -0.338652 | \n",
- " -0.706971 | \n",
- " -1.383757 | \n",
- " 0.484107 | \n",
- " 0.874705 | \n",
- " 0.836486 | \n",
- " -0.367298 | \n",
- "
\n",
- " \n",
- " 147 | \n",
- " F | \n",
- " 7 | \n",
- " 46 | \n",
- " NF1 | \n",
- " Null | \n",
- " 32 | \n",
- " 1 | \n",
- " F7 | \n",
- " 11 | \n",
- " 16 | \n",
- " ... | \n",
- " 0.816456 | \n",
- " -0.562581 | \n",
- " -0.583551 | \n",
- " 0.145784 | \n",
- " 0.008261 | \n",
- " -0.761469 | \n",
- " -0.490888 | \n",
- " 0.058825 | \n",
- " -0.088908 | \n",
- " -0.304325 | \n",
- "
\n",
- " \n",
- " 148 | \n",
- " F | \n",
- " 7 | \n",
- " 46 | \n",
- " NF1 | \n",
- " Null | \n",
- " 32 | \n",
- " 1 | \n",
- " F7 | \n",
- " 12 | \n",
- " 18 | \n",
- " ... | \n",
- " 1.049336 | \n",
- " -0.086971 | \n",
- " -1.339268 | \n",
- " -0.283924 | \n",
- " -1.436801 | \n",
- " -1.305860 | \n",
- " 1.200450 | \n",
- " 0.431040 | \n",
- " 0.486067 | \n",
- " -0.341519 | \n",
- "
\n",
" \n",
"\n",
- "149 rows × 444 columns
\n",
+ "5 rows × 444 columns
\n",
""
],
"text/plain": [
- " Metadata_WellRow Metadata_WellCol Metadata_number_of_singlecells \\\n",
- "0 C 6 12 \n",
- "1 C 6 12 \n",
- "2 C 6 12 \n",
- "3 C 6 12 \n",
- "4 C 6 12 \n",
- ".. ... ... ... \n",
- "144 F 7 46 \n",
- "145 F 7 46 \n",
- "146 F 7 46 \n",
- "147 F 7 46 \n",
- "148 F 7 46 \n",
+ " Metadata_WellRow Metadata_WellCol Metadata_number_of_singlecells \\\n",
+ "0 C 6 12 \n",
+ "1 C 6 12 \n",
+ "2 C 6 12 \n",
+ "3 C 6 12 \n",
+ "4 C 6 12 \n",
"\n",
- " Metadata_gene_name Metadata_genotype Metadata_ImageNumber \\\n",
- "0 NF1 WT 1 \n",
- "1 NF1 WT 1 \n",
- "2 NF1 WT 1 \n",
- "3 NF1 WT 1 \n",
- "4 NF1 WT 4 \n",
- ".. ... ... ... \n",
- "144 NF1 Null 32 \n",
- "145 NF1 Null 32 \n",
- "146 NF1 Null 32 \n",
- "147 NF1 Null 32 \n",
- "148 NF1 Null 32 \n",
+ " Metadata_gene_name Metadata_genotype Metadata_ImageNumber Metadata_Plate \\\n",
+ "0 NF1 WT 1 1 \n",
+ "1 NF1 WT 1 1 \n",
+ "2 NF1 WT 1 1 \n",
+ "3 NF1 WT 1 1 \n",
+ "4 NF1 WT 4 1 \n",
"\n",
- " Metadata_Plate Metadata_Well Metadata_Cytoplasm_Parent_Cells \\\n",
- "0 1 C6 1 \n",
- "1 1 C6 2 \n",
- "2 1 C6 3 \n",
- "3 1 C6 4 \n",
- "4 1 C6 1 \n",
- ".. ... ... ... \n",
- "144 1 F7 8 \n",
- "145 1 F7 9 \n",
- "146 1 F7 10 \n",
- "147 1 F7 11 \n",
- "148 1 F7 12 \n",
+ " Metadata_Well Metadata_Cytoplasm_Parent_Cells \\\n",
+ "0 C6 1 \n",
+ "1 C6 2 \n",
+ "2 C6 3 \n",
+ "3 C6 4 \n",
+ "4 C6 1 \n",
"\n",
- " Metadata_Cytoplasm_Parent_OrigNuclei ... \\\n",
- "0 4 ... \n",
- "1 5 ... \n",
- "2 7 ... \n",
- "3 8 ... \n",
- "4 3 ... \n",
- ".. ... ... \n",
- "144 12 ... \n",
- "145 13 ... \n",
- "146 14 ... \n",
- "147 16 ... \n",
- "148 18 ... \n",
+ " Metadata_Cytoplasm_Parent_OrigNuclei ... \\\n",
+ "0 4 ... \n",
+ "1 5 ... \n",
+ "2 7 ... \n",
+ "3 8 ... \n",
+ "4 3 ... \n",
"\n",
- " Nuclei_Texture_InfoMeas1_RFP_3_03_256 \\\n",
- "0 -1.197488 \n",
- "1 0.188414 \n",
- "2 -1.087258 \n",
- "3 -1.250742 \n",
- "4 -0.258815 \n",
- ".. ... \n",
- "144 -0.730713 \n",
- "145 0.925737 \n",
- "146 1.010381 \n",
- "147 0.816456 \n",
- "148 1.049336 \n",
+ " Nuclei_Texture_InfoMeas1_RFP_3_03_256 \\\n",
+ "0 -1.197488 \n",
+ "1 0.188414 \n",
+ "2 -1.087258 \n",
+ "3 -1.250742 \n",
+ "4 -0.258815 \n",
"\n",
- " Nuclei_Texture_InfoMeas2_GFP_3_00_256 \\\n",
- "0 0.289091 \n",
- "1 0.611666 \n",
- "2 0.843883 \n",
- "3 0.638684 \n",
- "4 -2.222128 \n",
- ".. ... \n",
- "144 -3.721664 \n",
- "145 -1.410478 \n",
- "146 -1.976427 \n",
- "147 -0.562581 \n",
- "148 -0.086971 \n",
+ " Nuclei_Texture_InfoMeas2_GFP_3_00_256 \\\n",
+ "0 0.289091 \n",
+ "1 0.611666 \n",
+ "2 0.843883 \n",
+ "3 0.638684 \n",
+ "4 -2.222128 \n",
"\n",
- " Nuclei_Texture_InfoMeas2_RFP_3_00_256 \\\n",
- "0 0.969456 \n",
- "1 0.481954 \n",
- "2 -0.214887 \n",
- "3 1.163023 \n",
- "4 -0.048779 \n",
- ".. ... \n",
- "144 0.344692 \n",
- "145 0.047610 \n",
- "146 -1.631117 \n",
- "147 -0.583551 \n",
- "148 -1.339268 \n",
+ " Nuclei_Texture_InfoMeas2_RFP_3_00_256 \\\n",
+ "0 0.969456 \n",
+ "1 0.481954 \n",
+ "2 -0.214887 \n",
+ "3 1.163023 \n",
+ "4 -0.048779 \n",
"\n",
- " Nuclei_Texture_InfoMeas2_RFP_3_01_256 \\\n",
- "0 1.131385 \n",
- "1 0.748184 \n",
- "2 0.238299 \n",
- "3 1.062039 \n",
- "4 0.504843 \n",
- ".. ... \n",
- "144 0.526792 \n",
- "145 -0.030179 \n",
- "146 -0.338652 \n",
- "147 0.145784 \n",
- "148 -0.283924 \n",
+ " Nuclei_Texture_InfoMeas2_RFP_3_01_256 \\\n",
+ "0 1.131385 \n",
+ "1 0.748184 \n",
+ "2 0.238299 \n",
+ "3 1.062039 \n",
+ "4 0.504843 \n",
"\n",
- " Nuclei_Texture_InfoMeas2_RFP_3_02_256 \\\n",
- "0 1.303680 \n",
- "1 0.750277 \n",
- "2 0.482832 \n",
- "3 1.082605 \n",
- "4 1.340830 \n",
- ".. ... \n",
- "144 0.444591 \n",
- "145 -0.897448 \n",
- "146 -0.706971 \n",
- "147 0.008261 \n",
- "148 -1.436801 \n",
+ " Nuclei_Texture_InfoMeas2_RFP_3_02_256 \\\n",
+ "0 1.303680 \n",
+ "1 0.750277 \n",
+ "2 0.482832 \n",
+ "3 1.082605 \n",
+ "4 1.340830 \n",
"\n",
- " Nuclei_Texture_InfoMeas2_RFP_3_03_256 \\\n",
- "0 1.416917 \n",
- "1 0.511083 \n",
- "2 1.264950 \n",
- "3 1.386850 \n",
- "4 0.924382 \n",
- ".. ... \n",
- "144 0.664847 \n",
- "145 -1.032699 \n",
- "146 -1.383757 \n",
- "147 -0.761469 \n",
- "148 -1.305860 \n",
+ " Nuclei_Texture_InfoMeas2_RFP_3_03_256 \\\n",
+ "0 1.416917 \n",
+ "1 0.511083 \n",
+ "2 1.264950 \n",
+ "3 1.386850 \n",
+ "4 0.924382 \n",
"\n",
- " Nuclei_Texture_InverseDifferenceMoment_GFP_3_03_256 \\\n",
- "0 -0.079438 \n",
- "1 -0.065958 \n",
- "2 -0.069749 \n",
- "3 -0.272864 \n",
- "4 0.612704 \n",
- ".. ... \n",
- "144 1.259085 \n",
- "145 0.849219 \n",
- "146 0.484107 \n",
- "147 -0.490888 \n",
- "148 1.200450 \n",
+ " Nuclei_Texture_InverseDifferenceMoment_GFP_3_03_256 \\\n",
+ "0 -0.079438 \n",
+ "1 -0.065958 \n",
+ "2 -0.069749 \n",
+ "3 -0.272864 \n",
+ "4 0.612704 \n",
"\n",
- " Nuclei_Texture_InverseDifferenceMoment_RFP_3_00_256 \\\n",
- "0 -2.315521 \n",
- "1 -1.460076 \n",
- "2 -1.841707 \n",
- "3 -1.789888 \n",
- "4 -2.158178 \n",
- ".. ... \n",
- "144 0.745004 \n",
- "145 0.627234 \n",
- "146 0.874705 \n",
- "147 0.058825 \n",
- "148 0.431040 \n",
+ " Nuclei_Texture_InverseDifferenceMoment_RFP_3_00_256 \\\n",
+ "0 -2.315521 \n",
+ "1 -1.460076 \n",
+ "2 -1.841707 \n",
+ "3 -1.789888 \n",
+ "4 -2.158178 \n",
"\n",
- " Nuclei_Texture_InverseDifferenceMoment_RFP_3_03_256 \\\n",
- "0 -1.693210 \n",
- "1 -1.427579 \n",
- "2 -0.798368 \n",
- "3 -1.432404 \n",
- "4 -1.781201 \n",
- ".. ... \n",
- "144 0.968347 \n",
- "145 0.096817 \n",
- "146 0.836486 \n",
- "147 -0.088908 \n",
- "148 0.486067 \n",
+ " Nuclei_Texture_InverseDifferenceMoment_RFP_3_03_256 \\\n",
+ "0 -1.693210 \n",
+ "1 -1.427579 \n",
+ "2 -0.798368 \n",
+ "3 -1.432404 \n",
+ "4 -1.781201 \n",
"\n",
- " Nuclei_Texture_SumVariance_RFP_3_01_256 \n",
- "0 2.881199 \n",
- "1 0.304121 \n",
- "2 0.257284 \n",
- "3 1.083761 \n",
- "4 0.518641 \n",
- ".. ... \n",
- "144 -0.322389 \n",
- "145 -0.326481 \n",
- "146 -0.367298 \n",
- "147 -0.304325 \n",
- "148 -0.341519 \n",
+ " Nuclei_Texture_SumVariance_RFP_3_01_256 \n",
+ "0 2.881199 \n",
+ "1 0.304121 \n",
+ "2 0.257284 \n",
+ "3 1.083761 \n",
+ "4 0.518641 \n",
"\n",
- "[149 rows x 444 columns]"
+ "[5 rows x 444 columns]"
]
},
"execution_count": 2,
@@ -550,7 +329,9 @@
"norm_fs_data = pathlib.Path(\"../../../4_processing_features/data/nf1_sc_norm_fs_cellprofiler.csv.gz\")\n",
"\n",
"data = pd.read_csv(norm_fs_data, index_col=0)\n",
- "data"
+ "\n",
+ "print(data.shape)\n",
+ "data.head()"
]
},
{
@@ -1654,7 +1435,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": 7,
diff --git a/5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.py b/5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.py
index 9cbd505..3d90467 100644
--- a/5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.py
+++ b/5_analyze_data/notebooks/Heatmap_analysis/nf1_heatmap.py
@@ -22,7 +22,9 @@
norm_fs_data = pathlib.Path("../../../4_processing_features/data/nf1_sc_norm_fs_cellprofiler.csv.gz")
data = pd.read_csv(norm_fs_data, index_col=0)
-data
+
+print(data.shape)
+data.head()
# ## Helper function to split `csv` into metadata and features
diff --git a/5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.ipynb b/5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.ipynb
index 45eeef3..6169a4e 100644
--- a/5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.ipynb
+++ b/5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.ipynb
@@ -18,7 +18,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -38,7 +38,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -55,13 +55,272 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 10,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(149, 1055)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Metadata_WellRow | \n",
+ " Metadata_WellCol | \n",
+ " Metadata_number_of_singlecells | \n",
+ " Metadata_gene_name | \n",
+ " Metadata_genotype | \n",
+ " Metadata_ImageNumber | \n",
+ " Metadata_Plate | \n",
+ " Metadata_Well | \n",
+ " Metadata_Cytoplasm_Parent_Cells | \n",
+ " Metadata_Cytoplasm_Parent_OrigNuclei | \n",
+ " ... | \n",
+ " Nuclei_Texture_SumVariance_RFP_3_02_256 | \n",
+ " Nuclei_Texture_SumVariance_RFP_3_03_256 | \n",
+ " Nuclei_Texture_Variance_GFP_3_00_256 | \n",
+ " Nuclei_Texture_Variance_GFP_3_01_256 | \n",
+ " Nuclei_Texture_Variance_GFP_3_02_256 | \n",
+ " Nuclei_Texture_Variance_GFP_3_03_256 | \n",
+ " Nuclei_Texture_Variance_RFP_3_00_256 | \n",
+ " Nuclei_Texture_Variance_RFP_3_01_256 | \n",
+ " Nuclei_Texture_Variance_RFP_3_02_256 | \n",
+ " Nuclei_Texture_Variance_RFP_3_03_256 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C | \n",
+ " 6 | \n",
+ " 12 | \n",
+ " NF1 | \n",
+ " WT | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " C6 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 3.141540 | \n",
+ " 3.202273 | \n",
+ " -0.097356 | \n",
+ " -0.096165 | \n",
+ " -0.094202 | \n",
+ " -0.106456 | \n",
+ " 3.337969 | \n",
+ " 3.350528 | \n",
+ " 3.278168 | \n",
+ " 3.310371 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " C | \n",
+ " 6 | \n",
+ " 12 | \n",
+ " NF1 | \n",
+ " WT | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " C6 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " ... | \n",
+ " 0.315924 | \n",
+ " 0.258633 | \n",
+ " -0.087971 | \n",
+ " -0.069493 | \n",
+ " -0.065539 | \n",
+ " -0.095377 | \n",
+ " 0.314776 | \n",
+ " 0.313920 | \n",
+ " 0.348420 | \n",
+ " 0.318693 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C | \n",
+ " 6 | \n",
+ " 12 | \n",
+ " NF1 | \n",
+ " WT | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " C6 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " ... | \n",
+ " 0.295233 | \n",
+ " 0.383161 | \n",
+ " 0.065251 | \n",
+ " 0.005550 | \n",
+ " -0.015212 | \n",
+ " -0.029087 | \n",
+ " 0.348492 | \n",
+ " 0.333940 | \n",
+ " 0.341312 | \n",
+ " 0.347999 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " C | \n",
+ " 6 | \n",
+ " 12 | \n",
+ " NF1 | \n",
+ " WT | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " C6 | \n",
+ " 4 | \n",
+ " 8 | \n",
+ " ... | \n",
+ " 1.151725 | \n",
+ " 1.159965 | \n",
+ " 0.023403 | \n",
+ " 0.051931 | \n",
+ " 0.026268 | \n",
+ " -0.002094 | \n",
+ " 1.184695 | \n",
+ " 1.243519 | \n",
+ " 1.263751 | \n",
+ " 1.167156 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " C | \n",
+ " 6 | \n",
+ " 12 | \n",
+ " NF1 | \n",
+ " WT | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " C6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 0.699723 | \n",
+ " 0.628294 | \n",
+ " -0.428904 | \n",
+ " -0.416992 | \n",
+ " -0.429383 | \n",
+ " -0.420997 | \n",
+ " 0.690298 | \n",
+ " 0.662006 | \n",
+ " 0.685883 | \n",
+ " 0.701466 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 1055 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Metadata_WellRow Metadata_WellCol Metadata_number_of_singlecells \\\n",
+ "0 C 6 12 \n",
+ "1 C 6 12 \n",
+ "2 C 6 12 \n",
+ "3 C 6 12 \n",
+ "4 C 6 12 \n",
+ "\n",
+ " Metadata_gene_name Metadata_genotype Metadata_ImageNumber Metadata_Plate \\\n",
+ "0 NF1 WT 1 1 \n",
+ "1 NF1 WT 1 1 \n",
+ "2 NF1 WT 1 1 \n",
+ "3 NF1 WT 1 1 \n",
+ "4 NF1 WT 4 1 \n",
+ "\n",
+ " Metadata_Well Metadata_Cytoplasm_Parent_Cells \\\n",
+ "0 C6 1 \n",
+ "1 C6 2 \n",
+ "2 C6 3 \n",
+ "3 C6 4 \n",
+ "4 C6 1 \n",
+ "\n",
+ " Metadata_Cytoplasm_Parent_OrigNuclei ... \\\n",
+ "0 4 ... \n",
+ "1 5 ... \n",
+ "2 7 ... \n",
+ "3 8 ... \n",
+ "4 3 ... \n",
+ "\n",
+ " Nuclei_Texture_SumVariance_RFP_3_02_256 \\\n",
+ "0 3.141540 \n",
+ "1 0.315924 \n",
+ "2 0.295233 \n",
+ "3 1.151725 \n",
+ "4 0.699723 \n",
+ "\n",
+ " Nuclei_Texture_SumVariance_RFP_3_03_256 \\\n",
+ "0 3.202273 \n",
+ "1 0.258633 \n",
+ "2 0.383161 \n",
+ "3 1.159965 \n",
+ "4 0.628294 \n",
+ "\n",
+ " Nuclei_Texture_Variance_GFP_3_00_256 Nuclei_Texture_Variance_GFP_3_01_256 \\\n",
+ "0 -0.097356 -0.096165 \n",
+ "1 -0.087971 -0.069493 \n",
+ "2 0.065251 0.005550 \n",
+ "3 0.023403 0.051931 \n",
+ "4 -0.428904 -0.416992 \n",
+ "\n",
+ " Nuclei_Texture_Variance_GFP_3_02_256 Nuclei_Texture_Variance_GFP_3_03_256 \\\n",
+ "0 -0.094202 -0.106456 \n",
+ "1 -0.065539 -0.095377 \n",
+ "2 -0.015212 -0.029087 \n",
+ "3 0.026268 -0.002094 \n",
+ "4 -0.429383 -0.420997 \n",
+ "\n",
+ " Nuclei_Texture_Variance_RFP_3_00_256 Nuclei_Texture_Variance_RFP_3_01_256 \\\n",
+ "0 3.337969 3.350528 \n",
+ "1 0.314776 0.313920 \n",
+ "2 0.348492 0.333940 \n",
+ "3 1.184695 1.243519 \n",
+ "4 0.690298 0.662006 \n",
+ "\n",
+ " Nuclei_Texture_Variance_RFP_3_02_256 Nuclei_Texture_Variance_RFP_3_03_256 \n",
+ "0 3.278168 3.310371 \n",
+ "1 0.348420 0.318693 \n",
+ "2 0.341312 0.347999 \n",
+ "3 1.263751 1.167156 \n",
+ "4 0.685883 0.701466 \n",
+ "\n",
+ "[5 rows x 1055 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"norm_fs_data = pathlib.Path(\"../../../4_processing_features/data/nf1_sc_norm_cellprofiler.csv.gz\")\n",
"\n",
- "data = pd.read_csv(norm_fs_data, compression=\"gzip\", index_col=0)"
+ "data = pd.read_csv(norm_fs_data, compression=\"gzip\", index_col=0)\n",
+ "\n",
+ "print(data.shape)\n",
+ "data.head()"
]
},
{
@@ -74,7 +333,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -93,13 +352,19 @@
" \"\"\"\n",
" feature_results = []\n",
"\n",
+ " # divide the NF1 data based on genotype\n",
" null_features = data[(data[\"Metadata_genotype\"] == \"Null\")]\n",
" wt_features = data[(data[\"Metadata_genotype\"] == \"WT\")]\n",
"\n",
+ " # iterate through the columns in the data (both of the genotype dataframes will have the same columns)\n",
" for column in data:\n",
+ " # do not include metadata columns\n",
" if \"Metadata\" not in column:\n",
+ " # convert each individual column (feature) into numpy array\n",
" null_feature = null_features[column].to_numpy()\n",
" wt_feature = wt_features[column].to_numpy()\n",
+ " \n",
+ " # run two-sample ks-test for each feature \n",
" results = ks_2samp(wt_feature, null_feature)\n",
" # have to seperate out namedtuple due to scipy hiding the last two results \n",
" results = tuple([results.statistic, results.pvalue, results.statistic_location, results.statistic_sign])\n",
@@ -109,10 +374,9 @@
"\n",
" return feature_results\n",
"\n",
- "\n",
"def merge_features_kstest(\n",
" feature_results: pd.DataFrame,\n",
- " column_names: list,\n",
+ " feature_names: list,\n",
" save_path: pathlib.Path = None,\n",
"):\n",
" \"\"\"\n",
@@ -133,7 +397,7 @@
" merged dataframe with features and ks-test results\n",
" \"\"\"\n",
" # put dataframes into list of where the columns should go\n",
- " dataframes = [column_names, feature_results]\n",
+ " dataframes = [feature_names, feature_results]\n",
"\n",
" # merge dataframes together\n",
" merged_dataframe = pd.concat(dataframes, axis=1)\n",
@@ -155,7 +419,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -285,7 +549,7 @@
"[1043 rows x 4 columns]"
]
},
- "execution_count": 5,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -305,7 +569,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -399,18 +663,23 @@
"[1043 rows x 1 columns]"
]
},
- "execution_count": 6,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "features = data.iloc[:,12:]\n",
- "column_names = features.columns.tolist()\n",
- "column_names = pd.DataFrame(column_names)\n",
- "column_names.columns = [\"Features\"]\n",
+ "# find feature names in the columns from the data\n",
+ "feature_names = [\n",
+ " col_name\n",
+ " for col_name in data.columns.tolist()\n",
+ " if \"Metadata\" not in col_name\n",
+ " ]\n",
+ "\n",
+ "feature_names = pd.DataFrame(feature_names)\n",
+ "feature_names.columns = [\"Features\"]\n",
"\n",
- "column_names"
+ "feature_names"
]
},
{
@@ -423,7 +692,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -578,7 +847,7 @@
"[1043 rows x 5 columns]"
]
},
- "execution_count": 7,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -586,7 +855,7 @@
"source": [
"save_path = pathlib.Path(\"data/nf1_kstest_two_sample_results.csv\")\n",
"\n",
- "merge_features_kstest(feature_results, column_names, save_path)"
+ "merge_features_kstest(feature_results, feature_names, save_path)"
]
}
],
diff --git a/5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.py b/5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.py
index 62f748a..e8bac74 100644
--- a/5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.py
+++ b/5_analyze_data/notebooks/KS-test_analysis/nf1_ks_test.py
@@ -5,7 +5,7 @@
# ## Import libraries
-# In[1]:
+# In[8]:
import numpy as np
@@ -16,7 +16,7 @@
# ## Set seed
-# In[2]:
+# In[9]:
np.random.seed(0)
@@ -24,17 +24,20 @@
# ## Load in NF1 data
-# In[3]:
+# In[10]:
norm_fs_data = pathlib.Path("../../../4_processing_features/data/nf1_sc_norm_cellprofiler.csv.gz")
data = pd.read_csv(norm_fs_data, compression="gzip", index_col=0)
+print(data.shape)
+data.head()
+
# ## Helper functions to perform KS-test and create final `csv` file with results
-# In[4]:
+# In[11]:
def nf1_ks_test_two_sample(data: pd.DataFrame):
@@ -52,13 +55,19 @@ def nf1_ks_test_two_sample(data: pd.DataFrame):
"""
feature_results = []
+ # divide the NF1 data based on genotype
null_features = data[(data["Metadata_genotype"] == "Null")]
wt_features = data[(data["Metadata_genotype"] == "WT")]
+ # iterate through the columns in the data (both of the genotype dataframes will have the same columns)
for column in data:
+ # do not include metadata columns
if "Metadata" not in column:
+ # convert each individual column (feature) into numpy array
null_feature = null_features[column].to_numpy()
wt_feature = wt_features[column].to_numpy()
+
+ # run two-sample ks-test for each feature
results = ks_2samp(wt_feature, null_feature)
# have to seperate out namedtuple due to scipy hiding the last two results
results = tuple([results.statistic, results.pvalue, results.statistic_location, results.statistic_sign])
@@ -68,10 +77,9 @@ def nf1_ks_test_two_sample(data: pd.DataFrame):
return feature_results
-
def merge_features_kstest(
feature_results: pd.DataFrame,
- column_names: list,
+ feature_names: list,
save_path: pathlib.Path = None,
):
"""
@@ -92,7 +100,7 @@ def merge_features_kstest(
merged dataframe with features and ks-test results
"""
# put dataframes into list of where the columns should go
- dataframes = [column_names, feature_results]
+ dataframes = [feature_names, feature_results]
# merge dataframes together
merged_dataframe = pd.concat(dataframes, axis=1)
@@ -106,7 +114,7 @@ def merge_features_kstest(
# ## Peform two sample KS-test
-# In[5]:
+# In[12]:
feature_results = nf1_ks_test_two_sample(data)
@@ -115,23 +123,28 @@ def merge_features_kstest(
# ## Take feature columns from data and create a list
-# In[6]:
+# In[13]:
+
+# find feature names in the columns from the data
+feature_names = [
+ col_name
+ for col_name in data.columns.tolist()
+ if "Metadata" not in col_name
+ ]
-features = data.iloc[:,12:]
-column_names = features.columns.tolist()
-column_names = pd.DataFrame(column_names)
-column_names.columns = ["Features"]
+feature_names = pd.DataFrame(feature_names)
+feature_names.columns = ["Features"]
-column_names
+feature_names
# ## Save the final `csv` file with merged features and results
-# In[7]:
+# In[14]:
save_path = pathlib.Path("data/nf1_kstest_two_sample_results.csv")
-merge_features_kstest(feature_results, column_names, save_path)
+merge_features_kstest(feature_results, feature_names, save_path)