Skip to content

Commit

Permalink
Merge pull request #111 from hammerlab/response-labels
Browse files Browse the repository at this point in the history
Allow labeling of response/benefit labels
  • Loading branch information
arahuja authored Aug 10, 2016
2 parents bbaf7af + a1a2738 commit 4a45617
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 14 deletions.
52 changes: 43 additions & 9 deletions cohorts/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def is_lambda(func):
col, df = apply_func(on=elem, col=col, df=df)
cols.append(col)

if (rename_cols):
if rename_cols:
rename_dict = _strip_column_names(df.columns, keep_paren_contents=keep_paren_contents)
df.rename(columns=rename_dict, inplace=True)
cols = [rename_dict[col] for col in cols]
Expand Down Expand Up @@ -1075,34 +1075,55 @@ def plot_roc_curve(self, on, bootstrap_samples=100, col=None, ax=None, **kwargs)
df.benefit = df.benefit.astype(bool)
return roc_curve_plot(df, plot_col, "benefit", bootstrap_samples, ax=ax)

def plot_benefit(self, on, col=None, benefit_col="benefit", ax=None,
def plot_benefit(self, on, col=None, benefit_col="benefit", label="Response", ax=None,
alternative="two-sided", **kwargs):
"""Plot a comparison of benefit/response in the cohort on a given variable
"""
return self.plot_boolean(on=on,
boolean_col=benefit_col,
col=col,
alternative=alternative,
boolean_label=label,
boolean_value_map={True: "Benefit", False: "No Benefit"},
order=["No Benefit", "Benefit"],
ax=ax,
**kwargs)

def plot_boolean(self, on, boolean_col, col=None, ax=None,
alternative="two-sided", **kwargs):
def plot_boolean(self,
on,
boolean_col,
boolean_label=None,
boolean_value_map={},
col=None,
order=None,
ax=None,
alternative="two-sided",
**kwargs):
"""Plot a comparison of `boolean_col` in the cohort on a given variable via
`on` or `col`.
If the variable (through `on` or `col` is binary) this will compare
If the variable (through `on` or `col`) is binary this will compare
odds-ratios and perform a Fisher's exact test.
If the variable is numeric, this will compare the distributions through
a Mann-Whitney test and plot the distributions with box-strip plot
Parameters
----------
on : str or function
See `cohort.load.as_dataframe`
boolean_col : str
Column name of boolean column to plot or compare against
boolean_label : None, optional
Label to give boolean column in the plot
boolean_value_map : dict, optional
Map of conversions for values in the boolean column, i.e. {True: 'High', False: 'Low'}
col : str, optional
If specified, store the result of `on`. See `cohort.load.as_dataframe`
order : None, optional
Order of the labels on the x-axis
ax : None, optional
Axes to plot on
alternative : str, optional
Choose the sidedness of the mannwhitneyu or Fisher's Exact test.
Expand All @@ -1114,7 +1135,18 @@ def plot_boolean(self, on, boolean_col, col=None, ax=None,
plot_col, df = self.as_dataframe(on, col, **kwargs)
df = filter_not_null(df, boolean_col)
df = filter_not_null(df, plot_col)
df[boolean_col] = df[boolean_col].astype(bool)

if boolean_label:
df[boolean_label] = df[boolean_col]
boolean_col = boolean_label

condition_value = None
if boolean_value_map:
assert set(boolean_value_map.keys()) == set([True, False]), \
"Improper mapping of boolean column provided"
df[boolean_col] = df[boolean_col].map(lambda v: boolean_value_map[v])
condition_value = boolean_value_map[True]

if df[plot_col].dtype == "bool":
results = fishers_exact_plot(
data=df,
Expand All @@ -1127,7 +1159,9 @@ def plot_boolean(self, on, boolean_col, col=None, ax=None,
data=df,
condition=boolean_col,
distribution=plot_col,
condition_value=condition_value,
alternative=alternative,
order=order,
ax=ax)
return results

Expand Down
17 changes: 12 additions & 5 deletions cohorts/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def stripboxplot(x, y, data, ax=None, **kwargs):
y=y,
data=data,
ax=ax,
fliersize=0
fliersize=0,
**kwargs
)

return sb.stripplot(
Expand Down Expand Up @@ -97,9 +98,14 @@ def fishers_exact_plot(data, condition1, condition2, ax=None, alternative="two-s

MannWhitneyResults = namedtuple("MannWhitneyResults", ["U", "pvalue", "sided_str", "with_condition_series", "without_condition_series", "plot"])

def mann_whitney_plot(data, condition, distribution, ax=None,
condition_value=None, alternative="two-sided",
skip_plot=False):
def mann_whitney_plot(data,
condition,
distribution,
ax=None,
condition_value=None,
alternative="two-sided",
skip_plot=False,
**kwargs):
"""
Create a box plot comparing a condition and perform a
Mann Whitney test to compare the distribution in condition A v B
Expand Down Expand Up @@ -134,7 +140,8 @@ def mann_whitney_plot(data, condition, distribution, ax=None,
x=condition,
y=distribution,
data=data,
ax=ax
ax=ax,
**kwargs
)

if condition_value:
Expand Down

0 comments on commit 4a45617

Please sign in to comment.