Merge pull request #111 from hammerlab/response-labels

Allow labeling of response/benefit labels
hammerlab · Aug 10, 2016 · 4a45617 · 4a45617
2 parents bbaf7af + a1a2738
commit 4a45617
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 14 deletions.
diff --git a/cohorts/load.py b/cohorts/load.py
@@ -455,7 +455,7 @@ def is_lambda(func):
                 col, df = apply_func(on=elem, col=col, df=df)
                 cols.append(col)
 
-        if (rename_cols):
+        if rename_cols:
             rename_dict = _strip_column_names(df.columns, keep_paren_contents=keep_paren_contents)
             df.rename(columns=rename_dict, inplace=True)
             cols = [rename_dict[col] for col in cols]
@@ -1075,34 +1075,55 @@ def plot_roc_curve(self, on, bootstrap_samples=100, col=None, ax=None, **kwargs)
         df.benefit = df.benefit.astype(bool)
         return roc_curve_plot(df, plot_col, "benefit", bootstrap_samples, ax=ax)
 
-    def plot_benefit(self, on, col=None, benefit_col="benefit", ax=None,
+    def plot_benefit(self, on, col=None, benefit_col="benefit", label="Response", ax=None,
                      alternative="two-sided", **kwargs):
         """Plot a comparison of benefit/response in the cohort on a given variable
         """
         return self.plot_boolean(on=on,
                                  boolean_col=benefit_col,
                                  col=col,
                                  alternative=alternative,
+                                 boolean_label=label,
+                                 boolean_value_map={True: "Benefit", False: "No Benefit"},
+                                 order=["No Benefit", "Benefit"],
                                  ax=ax,
                                  **kwargs)
 
-    def plot_boolean(self, on, boolean_col, col=None, ax=None,
-                     alternative="two-sided", **kwargs):
+    def plot_boolean(self, 
+                     on, 
+                     boolean_col, 
+                     boolean_label=None,
+                     boolean_value_map={},
+                     col=None, 
+                     order=None, 
+                     ax=None,
+                     alternative="two-sided", 
+                     **kwargs):
         """Plot a comparison of `boolean_col` in the cohort on a given variable via
         `on` or `col`.
-
-        If the variable (through `on` or `col` is binary) this will compare
+        
+        If the variable (through `on` or `col`) is binary this will compare
         odds-ratios and perform a Fisher's exact test.
-
+        
         If the variable is numeric, this will compare the distributions through
         a Mann-Whitney test and plot the distributions with box-strip plot
-
+        
         Parameters
         ----------
         on : str or function
             See `cohort.load.as_dataframe`
+        boolean_col : str
+            Column name of boolean column to plot or compare against
+        boolean_label : None, optional
+            Label to give boolean column in the plot
+        boolean_value_map : dict, optional
+            Map of conversions for values in the boolean column, i.e. {True: 'High', False: 'Low'}
         col : str, optional
             If specified, store the result of `on`. See `cohort.load.as_dataframe`
+        order : None, optional
+            Order of the labels on the x-axis
+        ax : None, optional
+            Axes to plot on
         alternative : str, optional
             Choose the sidedness of the mannwhitneyu or Fisher's Exact test.
 
@@ -1114,7 +1135,18 @@ def plot_boolean(self, on, boolean_col, col=None, ax=None,
         plot_col, df = self.as_dataframe(on, col, **kwargs)
         df = filter_not_null(df, boolean_col)
         df = filter_not_null(df, plot_col)
-        df[boolean_col] = df[boolean_col].astype(bool)
+
+        if boolean_label:
+            df[boolean_label] = df[boolean_col]
+            boolean_col = boolean_label
+
+        condition_value = None
+        if boolean_value_map:
+            assert set(boolean_value_map.keys()) == set([True, False]), \
+                "Improper mapping of boolean column provided"
+            df[boolean_col] = df[boolean_col].map(lambda v: boolean_value_map[v])
+            condition_value = boolean_value_map[True]
+
         if df[plot_col].dtype == "bool":
             results = fishers_exact_plot(
                 data=df,
@@ -1127,7 +1159,9 @@ def plot_boolean(self, on, boolean_col, col=None, ax=None,
                 data=df,
                 condition=boolean_col,
                 distribution=plot_col,
+                condition_value=condition_value,
                 alternative=alternative,
+                order=order,
                 ax=ax)
         return results
 

diff --git a/cohorts/plot.py b/cohorts/plot.py
@@ -31,7 +31,8 @@ def stripboxplot(x, y, data, ax=None, **kwargs):
         y=y,
         data=data,
         ax=ax,
-        fliersize=0
+        fliersize=0,
+        **kwargs
     )
 
     return sb.stripplot(
@@ -97,9 +98,14 @@ def fishers_exact_plot(data, condition1, condition2, ax=None, alternative="two-s
 
 MannWhitneyResults = namedtuple("MannWhitneyResults", ["U", "pvalue", "sided_str", "with_condition_series", "without_condition_series", "plot"])
 
-def mann_whitney_plot(data, condition, distribution, ax=None,
-                      condition_value=None, alternative="two-sided",
-                      skip_plot=False):
+def mann_whitney_plot(data, 
+                      condition,
+                      distribution, 
+                      ax=None,
+                      condition_value=None, 
+                      alternative="two-sided",
+                      skip_plot=False,
+                      **kwargs):
     """
     Create a box plot comparing a condition and perform a
     Mann Whitney test to compare the distribution in condition A v B
@@ -134,7 +140,8 @@ def mann_whitney_plot(data, condition, distribution, ax=None,
             x=condition,
             y=distribution,
             data=data,
-            ax=ax
+            ax=ax,
+            **kwargs
         )
 
     if condition_value: