From 9b18c9f38956ec58db7ed6a8dd44aa4a8691a44d Mon Sep 17 00:00:00 2001 From: konradjk Date: Mon, 4 Feb 2019 21:56:35 -0500 Subject: [PATCH 01/20] Adding 2d histogram function --- hail/python/hail/plot/plots.py | 137 +++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 1da4f3af3d1..24e0df829e7 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -126,6 +126,143 @@ def cumulative_histogram(data, range=None, bins=50, legend=None, title=None, nor return p +def set_font_size(p, font_size: str = "12pt"): + """Set as many font sizes as possible in a bokeh figure + + Parameters + ---------- + p : :class:`bokeh.plotting.figure.Figure` + Input Figure. + font_size : str + String of font size in points (e.g. "12pt"). + + Returns + ------- + :class:`bokeh.plotting.figure.Figure` + """ + p.title.text_font_size = font_size + p.legend.label_text_font_size = font_size + p.xaxis.axis_label_text_font_size = font_size + p.yaxis.axis_label_text_font_size = font_size + p.xaxis.major_label_text_font_size = font_size + p.yaxis.major_label_text_font_size = font_size + if hasattr(p.xaxis, 'group_text_font_size'): + p.xaxis.group_text_font_size = font_size + return p + + +def histogram_2d(x: NumericExpression, y: NumericExpression, + x_range: Tuple[float, float] = None, y_range: Tuple[float, float] = None, + bins: int = 40, x_bins: int = None, y_bins: int = None, + plot_title: str = '2-D histogram', plot_width: int = 600, plot_height: int = 600, + font_size: str = '7pt', + colors=["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]): + """Plot a 2-D histogram of x vs y, which are NumericExpressions from a Table. + + If x_range or y_range are not provided, the function will do a pass through the data to determine min and max + of each variable. + + Parameters + ---------- + x : :class:`.NumericExpression` + Expression for x-axis. + y : :class:`.NumericExpression` + Expression for y-axis. + x_range : Tuple[float] + Tuple of (min, max) bounds for the x-axis. + y_range : Tuple[float] + Tuple of (min, max) bounds for the y-axis. + bins : int + Number of bins in the histogram (default 40). + x_bins : int + Number of bins on x-axis, will override ``bins`` if provided. + y_bins : int + Number of bins on y-axis, will override ``bins`` if provided. + plot_width : int + Plot width (default 600px). + plot_height : int + Plot height (default 600px). + plot_title : str + Title of the plot. + font_size : str + String of font size in points (default "7pt"). + colors : List[str] + List of hex colors from low to high. + + Returns + ------- + :class:`bokeh.plotting.figure.Figure` + """ + source = x._indices.source + y_source = y._indices.source + + if source is None or y_source is None: + raise ValueError("histogram_2d expects two expressions of 'Table', found scalar expression") + if isinstance(source, hail.MatrixTable): + raise ValueError("hisogram_2d requires source to be Table, not MatrixTable") + check_row_indexed('histogram_2d', source.x) + check_row_indexed('histogram_2d', source.y) + if source != y_source: + raise ValueError(f"histogram_2d expects two expressions of 'Table', found {source} and {y_source}") + if x_bins is None: + x_bins = bins + if y_bins is None: + y_bins = bins + if x_range is None or y_range is None: + warnings.warn('At least one range was not defined. Doing two passes...') + ranges = source.aggregate(hail.struct(x_stats=hail.agg.stats(x), + y_stats=hail.agg.stats(y))) + if x_range is None: + x_range = (ranges.x_stats.min, ranges.x_stats.max) + if y_range is None: + y_range = (ranges.y_stats.min, ranges.y_stats.max) + x_range = list(map(float, x_range)) + y_range = list(map(float, y_range)) + x_spacing = (x_range[1] - x_range[0]) / x_bins + y_spacing = (y_range[1] - y_range[0]) / y_bins + + def frange(start, stop, step): + from itertools import count, takewhile + return takewhile(lambda x: x < stop, count(start, step)) + + x_levels = hail.literal(list(frange(x_range[0], x_range[1], x_spacing))[::-1]) + y_levels = hail.literal(list(frange(y_range[0], y_range[1], y_spacing))[::-1]) + + data = source.group_by( + x=hail.str(x_levels.find(lambda w: x >= w)), y=hail.str(y_levels.find(lambda w: y >= w)) + ).aggregate(c=hail.agg.count()).to_pandas() + + mapper = LinearColorMapper(palette=colors, low=data.c.min(), high=data.c.max()) + + x_axis = sorted(set(data.x), key=lambda z: float(z)) + y_axis = sorted(set(data.y), key=lambda z: float(z)) + p = figure(title=plot_title, + x_range=x_axis, y_range=list(reversed(y_axis)), + x_axis_location="above", plot_width=plot_width, plot_height=plot_height, + tools="hover,save,pan,box_zoom,reset,wheel_zoom", toolbar_location='below', + tooltips=[('x', '@x'), ('y', '@y',), ('count', '@c')]) + + p.grid.grid_line_color = None + p.axis.axis_line_color = None + p.axis.major_tick_line_color = None + p.axis.major_label_standoff = 0 + p.axis.major_label_text_font_size = font_size + import math + p.xaxis.major_label_orientation = math.pi / 3 + + p.rect(x="x", y="y", width=1, height=1, + source=data, + fill_color={'field': 'c', 'transform': mapper}, + line_color=None) + + color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size=font_size, + ticker=BasicTicker(desired_num_ticks=6), + label_standoff=6, border_line_color=None, location=(0, 0)) + p.add_layout(color_bar, 'right') + p = set_font_size(p, font_size) + return p + + @typecheck(x=oneof(sequenceof(numeric), expr_float64), y=oneof(sequenceof(numeric), expr_float64), label=oneof(nullable(str), expr_str, sequenceof(str)), title=nullable(str), xlabel=nullable(str), ylabel=nullable(str), size=int, legend=bool, From 641ae334b859ea5c5b3ac0a83c9c30fa840edeb0 Mon Sep 17 00:00:00 2001 From: konradjk Date: Mon, 4 Feb 2019 22:45:37 -0500 Subject: [PATCH 02/20] added typecheck and into index --- hail/python/hail/docs/plot.rst | 2 ++ hail/python/hail/plot/__init__.py | 3 ++- hail/python/hail/plot/plots.py | 6 ++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/hail/python/hail/docs/plot.rst b/hail/python/hail/docs/plot.rst index 4e6e58c3179..e5a51acc2bd 100644 --- a/hail/python/hail/docs/plot.rst +++ b/hail/python/hail/docs/plot.rst @@ -21,12 +21,14 @@ Plot functions in Hail accept data in the form of either Python objects or :clas histogram cumulative_histogram + histogram_2d scatter qq manhattan .. autofunction:: histogram .. autofunction:: cumulative_histogram +.. autofunction:: histogram_2d .. autofunction:: scatter .. autofunction:: qq .. autofunction:: manhattan diff --git a/hail/python/hail/plot/__init__.py b/hail/python/hail/plot/__init__.py index e354836078b..d4200829edc 100644 --- a/hail/python/hail/plot/__init__.py +++ b/hail/python/hail/plot/__init__.py @@ -1,7 +1,8 @@ -from .plots import histogram, cumulative_histogram, scatter, qq, manhattan +from .plots import histogram, cumulative_histogram, histogram_2d, scatter, qq, manhattan __all__ = ['histogram', 'cumulative_histogram', 'scatter', + 'histogram_2d', 'qq', 'manhattan'] diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 24e0df829e7..92599f50ada 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -151,6 +151,12 @@ def set_font_size(p, font_size: str = "12pt"): return p +@typecheck(x=expr_numeric, y=expr_numeric, + x_range=nullable(sized_tupleof(numeric, numeric)), + y_range=nullable(sized_tupleof(numeric, numeric)), + bins=int, x_bins=nullable(int), y_bins=nullable(int), + plot_title=nullable(str), plot_width=int, plot_height=int, + font_size=str, colors=sequenceof(str)) def histogram_2d(x: NumericExpression, y: NumericExpression, x_range: Tuple[float, float] = None, y_range: Tuple[float, float] = None, bins: int = 40, x_bins: int = None, y_bins: int = None, From 8b75f937a9ea5ca2d21f9c911c584c4fc2d495c6 Mon Sep 17 00:00:00 2001 From: konradjk Date: Mon, 4 Feb 2019 23:18:28 -0500 Subject: [PATCH 03/20] removing typehints --- hail/python/hail/plot/plots.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 92599f50ada..9b54540d967 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -157,11 +157,8 @@ def set_font_size(p, font_size: str = "12pt"): bins=int, x_bins=nullable(int), y_bins=nullable(int), plot_title=nullable(str), plot_width=int, plot_height=int, font_size=str, colors=sequenceof(str)) -def histogram_2d(x: NumericExpression, y: NumericExpression, - x_range: Tuple[float, float] = None, y_range: Tuple[float, float] = None, - bins: int = 40, x_bins: int = None, y_bins: int = None, - plot_title: str = '2-D histogram', plot_width: int = 600, plot_height: int = 600, - font_size: str = '7pt', +def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins=None, + plot_title='2-D histogram', plot_width=600, plot_height=600, font_size='7pt', colors=["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]): """Plot a 2-D histogram of x vs y, which are NumericExpressions from a Table. From 895db58c9e6484131a505a691d64cb0ad7c3754b Mon Sep 17 00:00:00 2001 From: konradjk Date: Tue, 5 Feb 2019 21:11:00 -0500 Subject: [PATCH 04/20] bugfix --- hail/python/hail/plot/plots.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 9b54540d967..0e4e07ef070 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -203,8 +203,8 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= raise ValueError("histogram_2d expects two expressions of 'Table', found scalar expression") if isinstance(source, hail.MatrixTable): raise ValueError("hisogram_2d requires source to be Table, not MatrixTable") - check_row_indexed('histogram_2d', source.x) - check_row_indexed('histogram_2d', source.y) + check_row_indexed('histogram_2d', x) + check_row_indexed('histogram_2d', y) if source != y_source: raise ValueError(f"histogram_2d expects two expressions of 'Table', found {source} and {y_source}") if x_bins is None: From e8db66faddce70fcdde89b9bc319cf4051d79050 Mon Sep 17 00:00:00 2001 From: konradjk Date: Tue, 5 Feb 2019 21:17:20 -0500 Subject: [PATCH 05/20] fix for when range is provided and some values are below minimum --- hail/python/hail/plot/plots.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 0e4e07ef070..31b93f61f2b 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -231,9 +231,10 @@ def frange(start, stop, step): x_levels = hail.literal(list(frange(x_range[0], x_range[1], x_spacing))[::-1]) y_levels = hail.literal(list(frange(y_range[0], y_range[1], y_spacing))[::-1]) - data = source.group_by( + grouped_ht = source.group_by( x=hail.str(x_levels.find(lambda w: x >= w)), y=hail.str(y_levels.find(lambda w: y >= w)) - ).aggregate(c=hail.agg.count()).to_pandas() + ).aggregate(c=hail.agg.count()) + data = grouped_ht.filter(hail.is_defined(grouped_ht.x) & hail.is_defined(grouped_ht.y)).to_pandas() mapper = LinearColorMapper(palette=colors, low=data.c.min(), high=data.c.max()) From b4a0ac771f08d83260d327d15b0b1f6864498488 Mon Sep 17 00:00:00 2001 From: konradjk Date: Tue, 5 Feb 2019 21:18:54 -0500 Subject: [PATCH 06/20] idk why this was reversed --- hail/python/hail/plot/plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 31b93f61f2b..156621cfc2b 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -241,7 +241,7 @@ def frange(start, stop, step): x_axis = sorted(set(data.x), key=lambda z: float(z)) y_axis = sorted(set(data.y), key=lambda z: float(z)) p = figure(title=plot_title, - x_range=x_axis, y_range=list(reversed(y_axis)), + x_range=x_axis, y_range=y_axis, x_axis_location="above", plot_width=plot_width, plot_height=plot_height, tools="hover,save,pan,box_zoom,reset,wheel_zoom", toolbar_location='below', tooltips=[('x', '@x'), ('y', '@y',), ('count', '@c')]) From d0a53c1554a9a749b0d3dbe46ca127dcdc9fa063 Mon Sep 17 00:00:00 2001 From: konradjk Date: Tue, 5 Feb 2019 21:39:09 -0500 Subject: [PATCH 07/20] picked a more color-blind friendly palette (darkening blues) --- hail/python/hail/plot/plots.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 156621cfc2b..f57573ee2b0 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -1,6 +1,7 @@ from math import log, isnan, log10 import numpy as np +import bokeh from bokeh.models import * from bokeh.plotting import figure from itertools import cycle @@ -159,7 +160,7 @@ def set_font_size(p, font_size: str = "12pt"): font_size=str, colors=sequenceof(str)) def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins=None, plot_title='2-D histogram', plot_width=600, plot_height=600, font_size='7pt', - colors=["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]): + colors=bokeh.palettes.all_palettes['Blues'][7][::-1]): """Plot a 2-D histogram of x vs y, which are NumericExpressions from a Table. If x_range or y_range are not provided, the function will do a pass through the data to determine min and max From 22d46d3c39e233de3c4a770b8be016fb3199d6a8 Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 08:56:37 -0500 Subject: [PATCH 08/20] addressed most comments --- hail/python/hail/plot/plots.py | 70 +++++++++++++++++----------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index f57573ee2b0..e96dc27439f 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -127,31 +127,6 @@ def cumulative_histogram(data, range=None, bins=50, legend=None, title=None, nor return p -def set_font_size(p, font_size: str = "12pt"): - """Set as many font sizes as possible in a bokeh figure - - Parameters - ---------- - p : :class:`bokeh.plotting.figure.Figure` - Input Figure. - font_size : str - String of font size in points (e.g. "12pt"). - - Returns - ------- - :class:`bokeh.plotting.figure.Figure` - """ - p.title.text_font_size = font_size - p.legend.label_text_font_size = font_size - p.xaxis.axis_label_text_font_size = font_size - p.yaxis.axis_label_text_font_size = font_size - p.xaxis.major_label_text_font_size = font_size - p.yaxis.major_label_text_font_size = font_size - if hasattr(p.xaxis, 'group_text_font_size'): - p.xaxis.group_text_font_size = font_size - return p - - @typecheck(x=expr_numeric, y=expr_numeric, x_range=nullable(sized_tupleof(numeric, numeric)), y_range=nullable(sized_tupleof(numeric, numeric)), @@ -169,9 +144,9 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= Parameters ---------- x : :class:`.NumericExpression` - Expression for x-axis. + Expression for x-axis (from a Hail table). y : :class:`.NumericExpression` - Expression for y-axis. + Expression for y-axis (from the same Hail table as ``x``). x_range : Tuple[float] Tuple of (min, max) bounds for the x-axis. y_range : Tuple[float] @@ -189,9 +164,11 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= plot_title : str Title of the plot. font_size : str - String of font size in points (default "7pt"). + String of font size in points (default '7pt"). colors : List[str] - List of hex colors from low to high. + List of colors (hex codes, or strings as described + `here `). Effective with one of the many + built-in palettes available `here `. Returns ------- @@ -203,17 +180,17 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= if source is None or y_source is None: raise ValueError("histogram_2d expects two expressions of 'Table', found scalar expression") if isinstance(source, hail.MatrixTable): - raise ValueError("hisogram_2d requires source to be Table, not MatrixTable") + raise ValueError("histogram_2d requires source to be Table, not MatrixTable") + if source != y_source: + raise ValueError(f"histogram_2d expects two expressions from the same 'Table', found {source} and {y_source}") check_row_indexed('histogram_2d', x) check_row_indexed('histogram_2d', y) - if source != y_source: - raise ValueError(f"histogram_2d expects two expressions of 'Table', found {source} and {y_source}") if x_bins is None: x_bins = bins if y_bins is None: y_bins = bins if x_range is None or y_range is None: - warnings.warn('At least one range was not defined. Doing two passes...') + warnings.warn('At least one range was not defined in histogram_2d. Doing two passes...') ranges = source.aggregate(hail.struct(x_stats=hail.agg.stats(x), y_stats=hail.agg.stats(y))) if x_range is None: @@ -255,7 +232,7 @@ def frange(start, stop, step): import math p.xaxis.major_label_orientation = math.pi / 3 - p.rect(x="x", y="y", width=1, height=1, + p.rect(x='x', y='y', width=1, height=1, source=data, fill_color={'field': 'c', 'transform': mapper}, line_color=None) @@ -264,6 +241,31 @@ def frange(start, stop, step): ticker=BasicTicker(desired_num_ticks=6), label_standoff=6, border_line_color=None, location=(0, 0)) p.add_layout(color_bar, 'right') + + def set_font_size(p, font_size: str = '12pt'): + """Set most of the font sizes in a bokeh figure + + Parameters + ---------- + p : :class:`bokeh.plotting.figure.Figure` + Input figure. + font_size : str + String of font size in points (e.g. '12pt'). + + Returns + ------- + :class:`bokeh.plotting.figure.Figure` + """ + p.title.text_font_size = font_size + p.legend.label_text_font_size = font_size + p.xaxis.axis_label_text_font_size = font_size + p.yaxis.axis_label_text_font_size = font_size + p.xaxis.major_label_text_font_size = font_size + p.yaxis.major_label_text_font_size = font_size + if hasattr(p.xaxis, 'group_text_font_size'): + p.xaxis.group_text_font_size = font_size + return p + p = set_font_size(p, font_size) return p From 9e2c5b370f71644f3c8e5413de675e35d4a925e2 Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 08:56:52 -0500 Subject: [PATCH 09/20] expressly removing points outside bounds --- hail/python/hail/plot/plots.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index e96dc27439f..2a3fef156d0 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -197,6 +197,9 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= x_range = (ranges.x_stats.min, ranges.x_stats.max) if y_range is None: y_range = (ranges.y_stats.min, ranges.y_stats.max) + else: + warnings.warn('If x_range or y_range are specified in histogram_2d, and there are points ' + 'outside of these ranges, they will not be plotted') x_range = list(map(float, x_range)) y_range = list(map(float, y_range)) x_spacing = (x_range[1] - x_range[0]) / x_bins @@ -204,15 +207,17 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= def frange(start, stop, step): from itertools import count, takewhile - return takewhile(lambda x: x < stop, count(start, step)) + return takewhile(lambda x: x <= stop, count(start, step)) x_levels = hail.literal(list(frange(x_range[0], x_range[1], x_spacing))[::-1]) y_levels = hail.literal(list(frange(y_range[0], y_range[1], y_spacing))[::-1]) grouped_ht = source.group_by( - x=hail.str(x_levels.find(lambda w: x >= w)), y=hail.str(y_levels.find(lambda w: y >= w)) + x=hail.str(x_levels.find(lambda w: x >= w)), + y=hail.str(y_levels.find(lambda w: y >= w)) ).aggregate(c=hail.agg.count()) - data = grouped_ht.filter(hail.is_defined(grouped_ht.x) & hail.is_defined(grouped_ht.y)).to_pandas() + data = grouped_ht.filter(hail.is_defined(grouped_ht.x) & (grouped_ht.x != x_range[1]) & + hail.is_defined(grouped_ht.y) & (grouped_ht.y != y_range[1])).to_pandas() mapper = LinearColorMapper(palette=colors, low=data.c.min(), high=data.c.max()) From ffb030fdccf87bef3644f6c4fdb12a40af645f97 Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 12:26:54 -0500 Subject: [PATCH 10/20] doc fixes, added example --- hail/python/hail/plot/plots.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 2a3fef156d0..5659deb7c16 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -136,10 +136,14 @@ def cumulative_histogram(data, range=None, bins=50, legend=None, title=None, nor def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins=None, plot_title='2-D histogram', plot_width=600, plot_height=600, font_size='7pt', colors=bokeh.palettes.all_palettes['Blues'][7][::-1]): - """Plot a 2-D histogram of x vs y, which are NumericExpressions from a Table. + """Plot a 2-D histogram of ``x`` vs ``y``, which are :class:`NumericExpression`s from a :class:`Table`. + + If ``x_range`` or ``y_range`` are not provided, the function will do a pass through the data to determine + min and max of each variable. + + >>> ht = hail.utils.range_table(1000).annotate(x=hail.rand_norm(), y=hail.rand_norm()) + >>> p_hist = hail.plot.histogram_2d(ht.x, ht.y) - If x_range or y_range are not provided, the function will do a pass through the data to determine min and max - of each variable. Parameters ---------- @@ -164,11 +168,11 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= plot_title : str Title of the plot. font_size : str - String of font size in points (default '7pt"). + String of font size in points (default '7pt'). colors : List[str] List of colors (hex codes, or strings as described - `here `). Effective with one of the many - built-in palettes available `here `. + `here `__). Effective with one of the many + built-in palettes available `here `__. Returns ------- From 8db275e6fa2b54e0da6fd078c1bcbc0633b785fc Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 12:26:59 -0500 Subject: [PATCH 11/20] bugfix --- hail/python/hail/plot/plots.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 5659deb7c16..bae80f79882 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -220,8 +220,8 @@ def frange(start, stop, step): x=hail.str(x_levels.find(lambda w: x >= w)), y=hail.str(y_levels.find(lambda w: y >= w)) ).aggregate(c=hail.agg.count()) - data = grouped_ht.filter(hail.is_defined(grouped_ht.x) & (grouped_ht.x != x_range[1]) & - hail.is_defined(grouped_ht.y) & (grouped_ht.y != y_range[1])).to_pandas() + data = grouped_ht.filter(hail.is_defined(grouped_ht.x) & (grouped_ht.x != str(x_range[1])) & + hail.is_defined(grouped_ht.y) & (grouped_ht.y != str(y_range[1]))).to_pandas() mapper = LinearColorMapper(palette=colors, low=data.c.min(), high=data.c.max()) From 1c4b7ab63420a7a2eef434d365d5cb27d51f4292 Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 12:27:11 -0500 Subject: [PATCH 12/20] added example to plotting tutorial --- hail/python/hail/docs/tutorials/plotting.ipynb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hail/python/hail/docs/tutorials/plotting.ipynb b/hail/python/hail/docs/tutorials/plotting.ipynb index d28513e856d..9615e89c3a6 100644 --- a/hail/python/hail/docs/tutorials/plotting.ipynb +++ b/hail/python/hail/docs/tutorials/plotting.ipynb @@ -234,7 +234,10 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "p = hl.plot.histogram_2d(pca_scores.scores[0], pca_scores.scores[1])\n", + "show(p)" + ] } ], "metadata": { From 29849b524b38300954b568505de2906d2d54a170 Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 13:15:02 -0500 Subject: [PATCH 13/20] fix for an older version of bokeh than required by hail --- hail/python/hail/plot/plots.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index bae80f79882..3b160054144 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -230,8 +230,7 @@ def frange(start, stop, step): p = figure(title=plot_title, x_range=x_axis, y_range=y_axis, x_axis_location="above", plot_width=plot_width, plot_height=plot_height, - tools="hover,save,pan,box_zoom,reset,wheel_zoom", toolbar_location='below', - tooltips=[('x', '@x'), ('y', '@y',), ('count', '@c')]) + tools="hover,save,pan,box_zoom,reset,wheel_zoom", toolbar_location='below') p.grid.grid_line_color = None p.axis.axis_line_color = None @@ -275,6 +274,10 @@ def set_font_size(p, font_size: str = '12pt'): p.xaxis.group_text_font_size = font_size return p + tooltips = [('x', '@x'), ('y', '@y',), ('count', '@c')] + p.add_tools(HoverTool( + tooltips=tooltips + )) p = set_font_size(p, font_size) return p From 71c2c1105342ce888f3f669b06aba6db03aa5054 Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 18:43:08 -0500 Subject: [PATCH 14/20] fix tooltip, docstring cleanup --- hail/python/hail/plot/plots.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 3b160054144..4e9051d3752 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -136,7 +136,9 @@ def cumulative_histogram(data, range=None, bins=50, legend=None, title=None, nor def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins=None, plot_title='2-D histogram', plot_width=600, plot_height=600, font_size='7pt', colors=bokeh.palettes.all_palettes['Blues'][7][::-1]): - """Plot a 2-D histogram of ``x`` vs ``y``, which are :class:`NumericExpression`s from a :class:`Table`. + """Plot a 2-D histogram. + + ``x`` and ``y`` must be :class:`NumericExpression`s from the same :class:`Table`. If ``x_range`` or ``y_range`` are not provided, the function will do a pass through the data to determine min and max of each variable. @@ -144,7 +146,6 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= >>> ht = hail.utils.range_table(1000).annotate(x=hail.rand_norm(), y=hail.rand_norm()) >>> p_hist = hail.plot.histogram_2d(ht.x, ht.y) - Parameters ---------- x : :class:`.NumericExpression` @@ -274,10 +275,7 @@ def set_font_size(p, font_size: str = '12pt'): p.xaxis.group_text_font_size = font_size return p - tooltips = [('x', '@x'), ('y', '@y',), ('count', '@c')] - p.add_tools(HoverTool( - tooltips=tooltips - )) + p.select_one(HoverTool).tooltips = [('x', '@x'), ('y', '@y',), ('count', '@c')] p = set_font_size(p, font_size) return p From cda40ac8b71b78eb6cc0979f54dbe9af1da0fe54 Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 18:55:26 -0500 Subject: [PATCH 15/20] describing plotting tutorial --- .../python/hail/docs/tutorials/plotting.ipynb | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/hail/python/hail/docs/tutorials/plotting.ipynb b/hail/python/hail/docs/tutorials/plotting.ipynb index 9615e89c3a6..60fa499b4f1 100644 --- a/hail/python/hail/docs/tutorials/plotting.ipynb +++ b/hail/python/hail/docs/tutorials/plotting.ipynb @@ -171,6 +171,25 @@ "show(gridplot([p, p2], ncols=2, plot_width=400, plot_height=400))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2-D histogram\n", + "\n", + "For visualizing relationships between variables in large datasets (where scatter plots may be less informative since they highlight outliers), the `histogram_2d()` function will create a heatmap with the number of observations in each section of a 2-d grid based on two variables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p = hl.plot.histogram_2d(pca_scores.scores[0], pca_scores.scores[1])\n", + "show(p)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -228,16 +247,6 @@ "p = hl.plot.manhattan(gwas.p_value, hover_fields=hover_fields, collect_all=True)\n", "show(p)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.histogram_2d(pca_scores.scores[0], pca_scores.scores[1])\n", - "show(p)" - ] } ], "metadata": { From 8949d0504991e6654ddc3014bb7c6375c25aab17 Mon Sep 17 00:00:00 2001 From: konradjk Date: Wed, 6 Feb 2019 21:30:01 -0500 Subject: [PATCH 16/20] docfix --- hail/python/hail/plot/plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 05b6642617d..12f38acb085 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -158,7 +158,7 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= colors=bokeh.palettes.all_palettes['Blues'][7][::-1]): """Plot a 2-D histogram. - ``x`` and ``y`` must be :class:`NumericExpression`s from the same :class:`Table`. + ``x`` and ``y`` must both be a :class:`NumericExpression` from the same :class:`Table`. If ``x_range`` or ``y_range`` are not provided, the function will do a pass through the data to determine min and max of each variable. From 34767ceb6d92c340033aebfefed8f3aae30b5747 Mon Sep 17 00:00:00 2001 From: konradjk Date: Thu, 7 Feb 2019 14:51:26 -0500 Subject: [PATCH 17/20] addressed comments, consolidated range and bins arguments --- hail/python/hail/plot/plots.py | 65 +++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 12f38acb085..f6bf3778bd3 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -147,52 +147,57 @@ def cumulative_histogram(data, range=None, bins=50, legend=None, title=None, nor return p -@typecheck(x=expr_numeric, y=expr_numeric, - x_range=nullable(sized_tupleof(numeric, numeric)), - y_range=nullable(sized_tupleof(numeric, numeric)), - bins=int, x_bins=nullable(int), y_bins=nullable(int), - plot_title=nullable(str), plot_width=int, plot_height=int, +@typecheck(x=expr_numeric, y=expr_numeric, bins=oneof(int, sequenceof(int)), + range=nullable(sized_tupleof(nullable(sized_tupleof(numeric, numeric)), + nullable(sized_tupleof(numeric, numeric)))), + title=nullable(str), width=int, height=int, font_size=str, colors=sequenceof(str)) -def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins=None, - plot_title='2-D histogram', plot_width=600, plot_height=600, font_size='7pt', +def histogram_2d(x, y, bins=40, range=None, + title=None, width=600, height=600, font_size='7pt', colors=bokeh.palettes.all_palettes['Blues'][7][::-1]): - """Plot a 2-D histogram. + """Plot a two-dimensional histogram. ``x`` and ``y`` must both be a :class:`NumericExpression` from the same :class:`Table`. If ``x_range`` or ``y_range`` are not provided, the function will do a pass through the data to determine min and max of each variable. + Examples + -------- + >>> ht = hail.utils.range_table(1000).annotate(x=hail.rand_norm(), y=hail.rand_norm()) >>> p_hist = hail.plot.histogram_2d(ht.x, ht.y) + >>> ht = hail.utils.range_table(1000).annotate(x=hail.rand_norm(), y=hail.rand_norm()) + >>> p_hist = hail.plot.histogram_2d(ht.x, ht.y, bins=10, range=[[0, 1], None]) + Parameters ---------- x : :class:`.NumericExpression` Expression for x-axis (from a Hail table). y : :class:`.NumericExpression` Expression for y-axis (from the same Hail table as ``x``). - x_range : Tuple[float] - Tuple of (min, max) bounds for the x-axis. - y_range : Tuple[float] - Tuple of (min, max) bounds for the y-axis. - bins : int - Number of bins in the histogram (default 40). - x_bins : int - Number of bins on x-axis, will override ``bins`` if provided. - y_bins : int - Number of bins on y-axis, will override ``bins`` if provided. - plot_width : int + bins : int or [int, int] + The bin specification: + - If int, the number of bins for the two dimensions (nx = ny = bins). + - If [int, int], the number of bins in each dimension (nx, ny = bins). + The default value is 40. + range : None or [[float, float], [float, float]] + The leftmost and rightmost edges of the bins along each dimension: + [[xmin, xmax], [ymin, ymax]]. All values outside of this range will be considered outliers + and not tallied in the histogram. If this value is None, or either of the inner lists is None, + the range will be computed from the data. + width : int Plot width (default 600px). - plot_height : int + height : int Plot height (default 600px). - plot_title : str + title : str Title of the plot. font_size : str String of font size in points (default '7pt'). colors : List[str] List of colors (hex codes, or strings as described - `here `__). Effective with one of the many + `here `__). Compatible with one of the many built-in palettes available `here `__. Returns @@ -210,10 +215,14 @@ def histogram_2d(x, y, x_range=None, y_range=None, bins=40, x_bins=None, y_bins= raise ValueError(f"histogram_2d expects two expressions from the same 'Table', found {source} and {y_source}") check_row_indexed('histogram_2d', x) check_row_indexed('histogram_2d', y) - if x_bins is None: - x_bins = bins - if y_bins is None: - y_bins = bins + if isinstance(bins, int): + x_bins = y_bins = bins + else: + x_bins, y_bins = bins + if range is None: + x_range = y_range = None + else: + x_range, y_range = range if x_range is None or y_range is None: warnings.warn('At least one range was not defined in histogram_2d. Doing two passes...') ranges = source.aggregate(hail.struct(x_stats=hail.agg.stats(x), @@ -248,9 +257,9 @@ def frange(start, stop, step): x_axis = sorted(set(data.x), key=lambda z: float(z)) y_axis = sorted(set(data.y), key=lambda z: float(z)) - p = figure(title=plot_title, + p = figure(title=title, x_range=x_axis, y_range=y_axis, - x_axis_location="above", plot_width=plot_width, plot_height=plot_height, + x_axis_location="above", plot_width=width, plot_height=height, tools="hover,save,pan,box_zoom,reset,wheel_zoom", toolbar_location='below') p.grid.grid_line_color = None From 5605cf63a6ea1ce09a606adefa96098feaba1403 Mon Sep 17 00:00:00 2001 From: konradjk Date: Thu, 7 Feb 2019 14:59:56 -0500 Subject: [PATCH 18/20] removed underscore from name --- hail/python/hail/docs/plot.rst | 4 ++-- hail/python/hail/docs/tutorials/plotting.ipynb | 2 +- hail/python/hail/plot/__init__.py | 4 ++-- hail/python/hail/plot/plots.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hail/python/hail/docs/plot.rst b/hail/python/hail/docs/plot.rst index e5a51acc2bd..0bd42563310 100644 --- a/hail/python/hail/docs/plot.rst +++ b/hail/python/hail/docs/plot.rst @@ -21,14 +21,14 @@ Plot functions in Hail accept data in the form of either Python objects or :clas histogram cumulative_histogram - histogram_2d + histogram2d scatter qq manhattan .. autofunction:: histogram .. autofunction:: cumulative_histogram -.. autofunction:: histogram_2d +.. autofunction:: histogram2d .. autofunction:: scatter .. autofunction:: qq .. autofunction:: manhattan diff --git a/hail/python/hail/docs/tutorials/plotting.ipynb b/hail/python/hail/docs/tutorials/plotting.ipynb index 60fa499b4f1..749d037d99d 100644 --- a/hail/python/hail/docs/tutorials/plotting.ipynb +++ b/hail/python/hail/docs/tutorials/plotting.ipynb @@ -186,7 +186,7 @@ "metadata": {}, "outputs": [], "source": [ - "p = hl.plot.histogram_2d(pca_scores.scores[0], pca_scores.scores[1])\n", + "p = hl.plot.histogram2d(pca_scores.scores[0], pca_scores.scores[1])\n", "show(p)" ] }, diff --git a/hail/python/hail/plot/__init__.py b/hail/python/hail/plot/__init__.py index d3a200df57f..9c12289b149 100644 --- a/hail/python/hail/plot/__init__.py +++ b/hail/python/hail/plot/__init__.py @@ -1,10 +1,10 @@ -from .plots import output_notebook, show, histogram, cumulative_histogram, histogram_2d, scatter, qq, manhattan +from .plots import output_notebook, show, histogram, cumulative_histogram, histogram2d, scatter, qq, manhattan __all__ = ['output_notebook', 'show', 'histogram', 'cumulative_histogram', 'scatter', - 'histogram_2d', + 'histogram2d', 'qq', 'manhattan'] diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index f6bf3778bd3..76022a61c6a 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -152,7 +152,7 @@ def cumulative_histogram(data, range=None, bins=50, legend=None, title=None, nor nullable(sized_tupleof(numeric, numeric)))), title=nullable(str), width=int, height=int, font_size=str, colors=sequenceof(str)) -def histogram_2d(x, y, bins=40, range=None, +def histogram2d(x, y, bins=40, range=None, title=None, width=600, height=600, font_size='7pt', colors=bokeh.palettes.all_palettes['Blues'][7][::-1]): """Plot a two-dimensional histogram. @@ -166,10 +166,10 @@ def histogram_2d(x, y, bins=40, range=None, -------- >>> ht = hail.utils.range_table(1000).annotate(x=hail.rand_norm(), y=hail.rand_norm()) - >>> p_hist = hail.plot.histogram_2d(ht.x, ht.y) + >>> p_hist = hail.plot.histogram2d(ht.x, ht.y) >>> ht = hail.utils.range_table(1000).annotate(x=hail.rand_norm(), y=hail.rand_norm()) - >>> p_hist = hail.plot.histogram_2d(ht.x, ht.y, bins=10, range=[[0, 1], None]) + >>> p_hist = hail.plot.histogram2d(ht.x, ht.y, bins=10, range=[[0, 1], None]) Parameters ---------- From 567243a79ddb6105e9f50bd93442202be87db48e Mon Sep 17 00:00:00 2001 From: konradjk Date: Fri, 8 Feb 2019 00:50:53 -0500 Subject: [PATCH 19/20] moving title font size out if none exists --- hail/python/hail/plot/plots.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index 76022a61c6a..ea600a183b2 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -294,12 +294,13 @@ def set_font_size(p, font_size: str = '12pt'): ------- :class:`bokeh.plotting.figure.Figure` """ - p.title.text_font_size = font_size p.legend.label_text_font_size = font_size p.xaxis.axis_label_text_font_size = font_size p.yaxis.axis_label_text_font_size = font_size p.xaxis.major_label_text_font_size = font_size p.yaxis.major_label_text_font_size = font_size + if hasattr(p.title, 'text_font_size'): + p.title.text_font_size = font_size if hasattr(p.xaxis, 'group_text_font_size'): p.xaxis.group_text_font_size = font_size return p From d25eabfca91d43e402e5085fd1ee9ba837d0c2bb Mon Sep 17 00:00:00 2001 From: konradjk Date: Fri, 8 Feb 2019 01:35:08 -0500 Subject: [PATCH 20/20] list to tuple --- hail/python/hail/plot/plots.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hail/python/hail/plot/plots.py b/hail/python/hail/plot/plots.py index ea600a183b2..b7614234818 100644 --- a/hail/python/hail/plot/plots.py +++ b/hail/python/hail/plot/plots.py @@ -169,7 +169,7 @@ def histogram2d(x, y, bins=40, range=None, >>> p_hist = hail.plot.histogram2d(ht.x, ht.y) >>> ht = hail.utils.range_table(1000).annotate(x=hail.rand_norm(), y=hail.rand_norm()) - >>> p_hist = hail.plot.histogram2d(ht.x, ht.y, bins=10, range=[[0, 1], None]) + >>> p_hist = hail.plot.histogram2d(ht.x, ht.y, bins=10, range=((0, 1), None)) Parameters ---------- @@ -182,9 +182,9 @@ def histogram2d(x, y, bins=40, range=None, - If int, the number of bins for the two dimensions (nx = ny = bins). - If [int, int], the number of bins in each dimension (nx, ny = bins). The default value is 40. - range : None or [[float, float], [float, float]] + range : None or ((float, float), (float, float)) The leftmost and rightmost edges of the bins along each dimension: - [[xmin, xmax], [ymin, ymax]]. All values outside of this range will be considered outliers + ((xmin, xmax), (ymin, ymax)). All values outside of this range will be considered outliers and not tallied in the histogram. If this value is None, or either of the inner lists is None, the range will be computed from the data. width : int