Skip to content

Commit

Permalink
make independent t-test have equal variance!
Browse files Browse the repository at this point in the history
  • Loading branch information
gwaybio committed Sep 19, 2019
1 parent 323d363 commit ef98d58
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 115 deletions.
232 changes: 120 additions & 112 deletions 10.gene-expression-signatures/1.separate.ipynb
Expand Up @@ -54,7 +54,7 @@
" b_activation = feature_series[feature_series.index.isin(group_b_ids)]\n",
" \n",
" # Perform t-test on two groups\n",
" t_stat, t_p = ttest_ind(a_activation, b_activation)\n",
" t_stat, t_p = ttest_ind(a_activation, b_activation, equal_var=False)\n",
" \n",
" return([t_stat, t_p, feature_algorithm, feature_num])\n",
"\n",
Expand Down Expand Up @@ -707,6 +707,9 @@
" :]\n",
")\n",
"\n",
"out_file = os.path.join(\"results\", \"balanced_gtex_tissues.tsv\")\n",
"balanced.to_csv(out_file, sep='\\t')\n",
" \n",
"balanced"
]
},
Expand Down Expand Up @@ -753,7 +756,8 @@
"# Perform t-test for all compressed features\n",
"gtex_full_results_df = get_ttest_results(z_matrix_dict=gtex_z_matrix_dict,\n",
" group_a_ids=gtex_males,\n",
" group_b_ids=gtex_females)"
" group_b_ids=gtex_females,\n",
" train_or_test=\"test\")"
]
},
{
Expand Down Expand Up @@ -801,78 +805,78 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>708</th>\n",
" <td>-43.711096</td>\n",
" <td>2.032291e-214</td>\n",
" <td>vae</td>\n",
" <td>108</td>\n",
" <th>511</th>\n",
" <td>44.507291</td>\n",
" <td>7.302909e-176</td>\n",
" <td>nmf</td>\n",
" <td>111</td>\n",
" <td>200</td>\n",
" <td>signal</td>\n",
" <td>486191</td>\n",
" <td>213.692014</td>\n",
" <td>451283</td>\n",
" <td>175.136504</td>\n",
" </tr>\n",
" <tr>\n",
" <th>590</th>\n",
" <td>-37.265821</td>\n",
" <td>3.509756e-177</td>\n",
" <td>vae</td>\n",
" <td>140</td>\n",
" <th>411</th>\n",
" <td>43.596212</td>\n",
" <td>2.655225e-172</td>\n",
" <td>nmf</td>\n",
" <td>111</td>\n",
" <td>150</td>\n",
" <td>signal</td>\n",
" <td>978124</td>\n",
" <td>176.454723</td>\n",
" <td>486191</td>\n",
" <td>171.575899</td>\n",
" </tr>\n",
" <tr>\n",
" <th>511</th>\n",
" <td>34.695955</td>\n",
" <td>8.241419e-162</td>\n",
" <th>361</th>\n",
" <td>43.448300</td>\n",
" <td>1.021953e-171</td>\n",
" <td>nmf</td>\n",
" <td>111</td>\n",
" <td>200</td>\n",
" <td>125</td>\n",
" <td>signal</td>\n",
" <td>451283</td>\n",
" <td>161.083998</td>\n",
" <td>486191</td>\n",
" <td>170.990569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>411</th>\n",
" <td>33.986135</td>\n",
" <td>1.602271e-157</td>\n",
" <td>43.297948</td>\n",
" <td>4.074371e-171</td>\n",
" <td>nmf</td>\n",
" <td>111</td>\n",
" <td>150</td>\n",
" <td>signal</td>\n",
" <td>486191</td>\n",
" <td>156.795264</td>\n",
" <td>165158</td>\n",
" <td>170.389939</td>\n",
" </tr>\n",
" <tr>\n",
" <th>361</th>\n",
" <td>33.870699</td>\n",
" <td>8.012848e-157</td>\n",
" <th>411</th>\n",
" <td>42.894468</td>\n",
" <td>1.614220e-169</td>\n",
" <td>nmf</td>\n",
" <td>111</td>\n",
" <td>125</td>\n",
" <td>150</td>\n",
" <td>signal</td>\n",
" <td>486191</td>\n",
" <td>156.096213</td>\n",
" <td>978124</td>\n",
" <td>168.792037</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" t_stat t_p algorithm feature_num z_dim signal seed \\\n",
"708 -43.711096 2.032291e-214 vae 108 200 signal 486191 \n",
"590 -37.265821 3.509756e-177 vae 140 150 signal 978124 \n",
"511 34.695955 8.241419e-162 nmf 111 200 signal 451283 \n",
"411 33.986135 1.602271e-157 nmf 111 150 signal 486191 \n",
"361 33.870699 8.012848e-157 nmf 111 125 signal 486191 \n",
"511 44.507291 7.302909e-176 nmf 111 200 signal 451283 \n",
"411 43.596212 2.655225e-172 nmf 111 150 signal 486191 \n",
"361 43.448300 1.021953e-171 nmf 111 125 signal 486191 \n",
"411 43.297948 4.074371e-171 nmf 111 150 signal 165158 \n",
"411 42.894468 1.614220e-169 nmf 111 150 signal 978124 \n",
"\n",
" neg_log_p \n",
"708 213.692014 \n",
"590 176.454723 \n",
"511 161.083998 \n",
"411 156.795264 \n",
"361 156.096213 "
"511 175.136504 \n",
"411 171.575899 \n",
"361 170.990569 \n",
"411 170.389939 \n",
"411 168.792037 "
]
},
"execution_count": 10,
Expand Down Expand Up @@ -1568,6 +1572,9 @@
" :]\n",
")\n",
"\n",
"out_file = os.path.join(\"results\", \"balanced_tcga_tissues.tsv\")\n",
"balanced.to_csv(out_file, sep='\\t')\n",
"\n",
"balanced"
]
},
Expand Down Expand Up @@ -1614,7 +1621,8 @@
"# Perform t-test for all compressed features\n",
"tcga_full_results_df = get_ttest_results(z_matrix_dict=tcga_z_matrix_dict,\n",
" group_a_ids=tcga_males,\n",
" group_b_ids=tcga_females)"
" group_b_ids=tcga_females,\n",
" train_or_test=\"test\")"
]
},
{
Expand Down Expand Up @@ -1662,71 +1670,71 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>-4.917736</td>\n",
" <td>0.000001</td>\n",
" <th>143</th>\n",
" <td>4.864065</td>\n",
" <td>0.000002</td>\n",
" <td>ica</td>\n",
" <td>44</td>\n",
" <td>53</td>\n",
" <td>90</td>\n",
" <td>signal</td>\n",
" <td>451283</td>\n",
" <td>5.920055</td>\n",
" <td>165158</td>\n",
" <td>5.798404</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143</th>\n",
" <td>4.893661</td>\n",
" <td>0.000001</td>\n",
" <th>134</th>\n",
" <td>-4.857106</td>\n",
" <td>0.000002</td>\n",
" <td>ica</td>\n",
" <td>53</td>\n",
" <td>44</td>\n",
" <td>90</td>\n",
" <td>signal</td>\n",
" <td>165158</td>\n",
" <td>5.869316</td>\n",
" <td>451283</td>\n",
" <td>5.779352</td>\n",
" </tr>\n",
" <tr>\n",
" <th>105</th>\n",
" <td>-4.848366</td>\n",
" <td>-4.797379</td>\n",
" <td>0.000002</td>\n",
" <td>ica</td>\n",
" <td>15</td>\n",
" <td>90</td>\n",
" <td>signal</td>\n",
" <td>908341</td>\n",
" <td>5.774418</td>\n",
" <td>5.657098</td>\n",
" </tr>\n",
" <tr>\n",
" <th>104</th>\n",
" <td>4.823725</td>\n",
" <td>0.000002</td>\n",
" <td>4.766643</td>\n",
" <td>0.000003</td>\n",
" <td>ica</td>\n",
" <td>14</td>\n",
" <td>90</td>\n",
" <td>signal</td>\n",
" <td>486191</td>\n",
" <td>5.723102</td>\n",
" <td>5.593012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>132</th>\n",
" <td>4.678127</td>\n",
" <td>0.000004</td>\n",
" <td>4.621749</td>\n",
" <td>0.000005</td>\n",
" <td>ica</td>\n",
" <td>42</td>\n",
" <td>90</td>\n",
" <td>signal</td>\n",
" <td>978124</td>\n",
" <td>5.424366</td>\n",
" <td>5.299861</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" t_stat t_p algorithm feature_num z_dim signal seed neg_log_p\n",
"134 -4.917736 0.000001 ica 44 90 signal 451283 5.920055\n",
"143 4.893661 0.000001 ica 53 90 signal 165158 5.869316\n",
"105 -4.848366 0.000002 ica 15 90 signal 908341 5.774418\n",
"104 4.823725 0.000002 ica 14 90 signal 486191 5.723102\n",
"132 4.678127 0.000004 ica 42 90 signal 978124 5.424366"
"143 4.864065 0.000002 ica 53 90 signal 165158 5.798404\n",
"134 -4.857106 0.000002 ica 44 90 signal 451283 5.779352\n",
"105 -4.797379 0.000002 ica 15 90 signal 908341 5.657098\n",
"104 4.766643 0.000003 ica 14 90 signal 486191 5.593012\n",
"132 4.621749 0.000005 ica 42 90 signal 978124 5.299861"
]
},
"execution_count": 17,
Expand Down Expand Up @@ -2324,78 +2332,78 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>711</th>\n",
" <td>17.452038</td>\n",
" <td>2.988269e-37</td>\n",
" <td>vae</td>\n",
" <td>111</td>\n",
" <td>200</td>\n",
" <th>488</th>\n",
" <td>-13.979762</td>\n",
" <td>3.886965e-25</td>\n",
" <td>dae</td>\n",
" <td>88</td>\n",
" <td>100</td>\n",
" <td>signal</td>\n",
" <td>451283</td>\n",
" <td>36.524580</td>\n",
" <td>24.410389</td>\n",
" </tr>\n",
" <tr>\n",
" <th>325</th>\n",
" <td>16.118447</td>\n",
" <td>5.746450e-34</td>\n",
" <th>530</th>\n",
" <td>-12.759128</td>\n",
" <td>4.343456e-25</td>\n",
" <td>vae</td>\n",
" <td>55</td>\n",
" <td>90</td>\n",
" <td>80</td>\n",
" <td>150</td>\n",
" <td>signal</td>\n",
" <td>451283</td>\n",
" <td>33.240600</td>\n",
" <td>165158</td>\n",
" <td>24.362165</td>\n",
" </tr>\n",
" <tr>\n",
" <th>440</th>\n",
" <td>15.863525</td>\n",
" <td>2.493347e-33</td>\n",
" <th>430</th>\n",
" <td>-12.770938</td>\n",
" <td>4.808223e-24</td>\n",
" <td>vae</td>\n",
" <td>65</td>\n",
" <td>55</td>\n",
" <td>125</td>\n",
" <td>signal</td>\n",
" <td>978124</td>\n",
" <td>32.603217</td>\n",
" <td>451283</td>\n",
" <td>23.318015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>516</th>\n",
" <td>15.157871</td>\n",
" <td>1.499645e-31</td>\n",
" <th>598</th>\n",
" <td>-11.964911</td>\n",
" <td>5.395353e-23</td>\n",
" <td>vae</td>\n",
" <td>66</td>\n",
" <td>148</td>\n",
" <td>150</td>\n",
" <td>signal</td>\n",
" <td>165158</td>\n",
" <td>30.824012</td>\n",
" <td>486191</td>\n",
" <td>22.267980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>315</th>\n",
" <td>14.840073</td>\n",
" <td>9.637579e-31</td>\n",
" <td>nmf</td>\n",
" <td>15</td>\n",
" <td>150</td>\n",
" <th>84</th>\n",
" <td>-12.538324</td>\n",
" <td>5.796144e-23</td>\n",
" <td>vae</td>\n",
" <td>9</td>\n",
" <td>25</td>\n",
" <td>signal</td>\n",
" <td>486191</td>\n",
" <td>30.016032</td>\n",
" <td>451283</td>\n",
" <td>22.236861</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" t_stat t_p algorithm feature_num z_dim signal seed \\\n",
"711 17.452038 2.988269e-37 vae 111 200 signal 451283 \n",
"325 16.118447 5.746450e-34 vae 55 90 signal 451283 \n",
"440 15.863525 2.493347e-33 vae 65 125 signal 978124 \n",
"516 15.157871 1.499645e-31 vae 66 150 signal 165158 \n",
"315 14.840073 9.637579e-31 nmf 15 150 signal 486191 \n",
"488 -13.979762 3.886965e-25 dae 88 100 signal 451283 \n",
"530 -12.759128 4.343456e-25 vae 80 150 signal 165158 \n",
"430 -12.770938 4.808223e-24 vae 55 125 signal 451283 \n",
"598 -11.964911 5.395353e-23 vae 148 150 signal 486191 \n",
"84 -12.538324 5.796144e-23 vae 9 25 signal 451283 \n",
"\n",
" neg_log_p \n",
"711 36.524580 \n",
"325 33.240600 \n",
"440 32.603217 \n",
"516 30.824012 \n",
"315 30.016032 "
"488 24.410389 \n",
"530 24.362165 \n",
"430 23.318015 \n",
"598 22.267980 \n",
"84 22.236861 "
]
},
"execution_count": 22,
Expand Down

0 comments on commit ef98d58

Please sign in to comment.