Skip to content

Commit

Permalink
add landscape plot utils
Browse files Browse the repository at this point in the history
  • Loading branch information
arahuja committed May 25, 2016
1 parent ffb7cc1 commit 07f0a07
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 0 deletions.
108 changes: 108 additions & 0 deletions cohorts/landscape_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import seaborn as sb
import matplotlib.pyplot as plt

def _bar_plot(data,
title=None,
figsize=None,
colormap=None,
ax=None):
plot = data.plot(title=title,
kind='bar',
stacked=True,
figsize=figsize,
ax=ax,
colormap=colormap).axes.get_xaxis().set_visible(False)
return plot

def _binned_bar_plot(data,
sample_col,
bin_by,
title=None,
figsize=None,
colormap=None,
ax=None):

by_bin = data.groupby(
[sample_col, bin_by]
)[[sample_col]].count().unstack()

by_bin.columns = by_bin.columns.get_level_values(1)
return _bar_plot(by_bin,
title,
figsize,
colormap,
ax)

def _indicator_plot(data,
sample_col,
indicator_col,
colormap=None,
figsize=None,
ax=None):
indicator_data = data.set_index([sample_col])[[indicator_col]].T
indicator_plot = sb.heatmap(indicator_data,
square=True,
cbar=None,
xticklabels=True,
linewidths=1,
cmap=colormap,
ax=ax)
plt.setp(indicator_plot.axes.get_xticklabels(), rotation=90)
plt.setp(indicator_plot.axes.get_yticklabels(), rotation=0)
return indicator_plot

def landscape_plot(cohort,
effects_df,
sample_col,
width=10,
bar_height=4,
bin_columns=[],
indicator_columns=[],
value_columns=[]):

cohort_size = len(cohort)
min_square_size = float(width) / (.9 * cohort_size)
num_bar_plots = len(bin_columns) + len(value_columns)
height = len(indicator_columns) * min_square_size + num_bar_plots * bar_height

grid_rows = int(float(height) / min_square_size)
indicator_column_rows = len(indicator_columns)

bar_rows = int((grid_rows - indicator_column_rows) / num_bar_plots)
gridsize = (grid_rows, 1)

plt.figure(0, figsize=(width - 1, height))

current_row = 0
for bin_by_col in bin_columns:
ax = plt.subplot2grid(gridsize,
(current_row, 0),
colspan=1,
rowspan=bar_rows)
_binned_bar_plot(effects_df,
sample_col,
bin_by_col,
ax=ax,)
current_row += bar_rows

for on in value_columns:
ax = plt.subplot2grid(gridsize,
(current_row, 0),
colspan=1,
rowspan=bar_rows)
plot_col, df = cohort.as_dataframe(on)
_bar_plot(df[plot_col],
ax=ax,)
current_row += bar_rows

for (idx, indicator_on) in enumerate(indicator_columns):
ax = plt.subplot2grid(gridsize,
(current_row, 0))
indicator_col, df = cohort.as_dataframe(indicator_on)
ip = _indicator_plot(df,
sample_col,
indicator_col,
ax=ax,)
current_row += 1
if idx != len(indicator_columns) - 1:
ip.axes.xaxis.set_visible(False)
13 changes: 13 additions & 0 deletions cohorts/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,19 @@ def _load_single_patient_polyphen(self, patient):
self.save_to_cache(df, cache_name, patient.id, cached_file_name)
return df

def load_effects_dataframe(self, patients=None, only_nonsynonymous=False, variant_type="snv", merge_type="union"):
all_effects = self.load_effects(patients=patients,
only_nonsynonymous=only_nonsynonymous,
variant_type=variant_type,
merge_type=merge_type)
dfs = []
for (patient_id, effects) in all_effects.items():
df = effects.to_dataframe()
df['patient_id'] = patient_id
dfs.append(df)
effects_df = pd.concat(dfs, copy=False)
return effects_df

def load_effects(self, patients=None, only_nonsynonymous=False, variant_type="snv", merge_type="union"):
"""Load a dictionary of patient_id to varcode.EffectCollection
Parameters
Expand Down

0 comments on commit 07f0a07

Please sign in to comment.