Make p-value comparison for multiple comparison more uniform.

Need to: - Debug printing for adjusted p values
emjun · Apr 28, 2019 · 9ebd810 · 9ebd810
1 parent 59b3d1c
commit 9ebd810
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 5 deletions.
diff --git a/tea/evaluate.py b/tea/evaluate.py
@@ -406,7 +406,7 @@ def evaluate(dataset: Dataset, expr: Node, assumptions: Dict[str, str], design:
             # if len(preds > 1): 
             # FOR DEBUGGING: 
             if len(preds) >= 1: 
-                correct_multiple_comparison(res_data)
+                correct_multiple_comparison(res_data,  len(preds))
 
         return res_data
 

diff --git a/tea/helpers/evaluateHelperMethods.py b/tea/helpers/evaluateHelperMethods.py
@@ -7,6 +7,7 @@
 from tea.runtimeDataStructures.bivariateData import BivariateData
 from tea.runtimeDataStructures.multivariateData import MultivariateData
 from tea.runtimeDataStructures.resultData import ResultData
+from tea.runtimeDataStructures.testResult import TestResult
 
 # Stats
 from scipy import stats # Stats library used
@@ -499,7 +500,9 @@ def spearman_corr(dataset: Dataset, predictions, combined_data: CombinedData):
         data.append(var_data)
 
     assert(len(data) == 2)
-    return stats.spearmanr(data[0], data[1])
+    results =  stats.spearmanr(data[0], data[1])
+
+    return TestResult('Spearman R Correlation', results[0], results[1])
 
 # https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.kendalltau.html
 # Parameters: x (array-like) | y (array-like) : Arrays of rankings, of the same shape. If arrays are not 1-D, they will be flattened to 1-D.
@@ -512,8 +515,9 @@ def kendalltau_corr(dataset: Dataset, predictions, combined_data: CombinedData):
         data.append(var_data)
 
     assert(len(data) == 2)
+    results = stats.kendalltau(data[0], data[1])
 
-    return stats.kendalltau(data[0], data[1])
+    return TestResult('Kendall Tau Correlation', results[0], results[1])
 
 def pointbiserial(dataset: Dataset, predictions, combined_data: CombinedData): 
     xs = combined_data.get_explanatory_variables()
@@ -839,6 +843,8 @@ def execute_test(dataset, design, predictions, combined_data: CombinedData, test
     return stat_result
 
 # Correct for multiple comparisons
-def correct_multiple_comparison(res_data: ResultData): 
+def correct_multiple_comparison(res_data: ResultData, num_comparisons: int): 
     # TODO: refactor ResultData first. 
-    pass
+    res_data.adjust_p_values(num_comparisons) 
+
+    import pdb; pdb.set_trace()
diff --git a/tea/runtimeDataStructures/resultData.py b/tea/runtimeDataStructures/resultData.py
@@ -28,6 +28,19 @@ def __init__(self, test_to_results):
 
         #         self.test_to_assumptions[test.name] = test_assumptions
 
+    def get_all_test_results(self): 
+        results = [v for k,v in self.test_to_results.items()]
+        return results
+
+    def adjust_p_values(self, correction): 
+        test_results = self.get_all_test_results()
+
+        for result in test_results:
+            result.adjust_p_val(correction)
+            import pdb; pdb.set_trace()
+
+        import pdb; pdb.set_trace()
+
     def _pretty_print(self):
         output = "\nResults:\n--------------"
         for test_name, results in self.test_to_results.items():

diff --git a/tea/runtimeDataStructures/testResult.py b/tea/runtimeDataStructures/testResult.py
@@ -0,0 +1,14 @@
+from .value import Value
+
+import attr
+
+@attr.s(init=True)
+class TestResult(Value): 
+    name = attr.ib()
+    test_statistic = attr.ib()
+    p_value = attr.ib()
+
+    def adjust_p_val(self, correction): 
+        self.adjusted_p_val = attr.ib()
+        self.adjusted_p_val = self.p_value/correction
+        import pdb; pdb.set_trace()