Skip to content

Commit

Permalink
Support categorical only input to bar chart, refactor bar chart attri…
Browse files Browse the repository at this point in the history
…butes. Add a setup method to builder for any pre glyph creation setup. Have chart handle builder providing x/y labels. If a column isn't provided, provide indication that the column was computed. Add a column property that extends the array property and is always an instance of a pandas series.
  • Loading branch information
nroth-dealnews committed Aug 25, 2015
1 parent 6d60be3 commit 3eaa24a
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 41 deletions.
9 changes: 7 additions & 2 deletions bokeh/charts/_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from itertools import cycle
from copy import copy

from bokeh.properties import HasProps, String, List, Instance
from bokeh.properties import HasProps, String, List, Instance, Either
from bokeh.models.sources import ColumnDataSource
from bokeh.charts import DEFAULT_PALETTE
from bokeh.charts._properties import ColumnLabel
from bokeh.charts.utils import marker_types


Expand All @@ -21,7 +22,7 @@ class AttrSpec(HasProps):

data = Instance(ColumnDataSource)
name = String(help='Name of the attribute the spec provides.')
columns = List(String)
columns = Either(ColumnLabel, List(ColumnLabel))

def __init__(self, columns=None, df=None, iterable=None, default=None, **properties):

Expand Down Expand Up @@ -80,6 +81,10 @@ def _create_attr_map(self, df, columns):
iter_map[item] = next(iterable)
return iter_map

def set_columns(self, columns):
self.columns = self._ensure_list(columns)
self.setup()

def setup(self):
if self.columns is not None and self.data is not None:
self._attr_map = self._create_attr_map(self.data.to_df(), self.columns)
Expand Down
11 changes: 11 additions & 0 deletions bokeh/charts/_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ class Builder(HasProps):
x_range = Instance(Range)
y_range = Instance(Range)

xlabel = String()
ylabel = String()

# Dimensional Modeling
dimensions = List(String, help="""The dimension
labels that drive the position of the glyphs.""")
Expand Down Expand Up @@ -202,6 +205,10 @@ def _setup_attrs(self, data, kws):
# Store updated attributes
self.attributes = attributes

def _setup(self):
"""Perform any initial pre-processing, attribute config."""
pass

def _process_data(self):
"""Make any global data manipulations before grouping.
Expand All @@ -228,6 +235,7 @@ def _yield_renderers(self):
raise NotImplementedError('Subclasses of %s must implement _yield_renderers.' % self.__class__.__name__)

def create(self, chart=None):
self._setup()
self._process_data()

renderers = self._yield_renderers()
Expand All @@ -242,6 +250,9 @@ def create(self, chart=None):
# always contribute legends, let Chart sort it out
chart.add_legend(self._legends)

chart.add_labels('x', self.xlabel)
chart.add_labels('y', self.ylabel)

return chart


Expand Down
16 changes: 14 additions & 2 deletions bokeh/charts/_chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def __init__(self):
self._builders = []
self._renderer_map = []
self._ranges = defaultdict(list)
self._labels = defaultdict(list)

# Add to document and session if server output is asked
_doc = None
Expand Down Expand Up @@ -127,9 +128,18 @@ def add_builder(self, builder):
def add_ranges(self, dim, range):
self._ranges[dim].append(range)

def add_labels(self, dim, label):
self._labels[dim].append(label)

def _get_labels(self, dim):
if not getattr(self._options, dim + 'label') and len(self._labels[dim]) > 0:
return self._labels[dim][0]
else:
return getattr(self._options, dim + 'label')

def create_axes(self):
self._xaxis = self.make_axis('x', "below", self._options.xscale, self._options.xlabel)
self._yaxis = self.make_axis('y', "left", self._options.yscale, self._options.ylabel)
self._xaxis = self.make_axis('x', "below", self._options.xscale, self._get_labels('x'))
self._yaxis = self.make_axis('y', "left", self._options.yscale, self._get_labels('y'))

def create_grids(self, xgrid=True, ygrid=True):
if xgrid:
Expand Down Expand Up @@ -213,6 +223,8 @@ def make_axis(self, dim, location, scale, label):
axis = CategoricalAxis(
major_label_orientation=np.pi / 4, axis_label=label
)
else:
axis = LinearAxis(axis_label=label)

self.add_layout(axis, location)
return axis
Expand Down
8 changes: 8 additions & 0 deletions bokeh/charts/_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
blaze = None

DEFAULT_COLUMN_NAMES = 'abcdefghijklmnopqrstuvwxyz'
COMPUTED_COLUMN_NAMES = ['_charts_ones']
ARRAY_TYPES = [tuple, list, np.ndarray, pd.Series]
TABLE_TYPES = [dict, pd.DataFrame]

Expand Down Expand Up @@ -364,3 +365,10 @@ def index(self):
def values(self):
return self._data.values

@staticmethod
def is_computed(column):
if column in COMPUTED_COLUMN_NAMES:
return True
else:
return False

47 changes: 42 additions & 5 deletions bokeh/charts/_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,32 @@
Bool, PrimitiveProperty, bokeh_integer_types, Array)


class Column(Array):

def _is_seq(self, value):
is_array = super(Column, self)._is_seq(value)
if not is_array:
return isinstance(value, pd.Series)
else:
return is_array

def _new_instance(self, value):
return pd.Series(value)

def transform(self, value):
if isinstance(value, pd.Series):
arr = value.values
else:
arr = value

trans_array = super(Column, self).transform(arr)
try:
return pd.Series(trans_array)
except ValueError:

raise ValueError("Could not transform %r" % value)


class Logical(Bool):
"""A boolean like data type."""
def validate(self, value):
Expand All @@ -32,19 +58,19 @@ def validate(self, value):
raise ValueError('expected a Bool or array with 2 unique values, got %s' % value)


class Column(Either):
class ColumnLabel(Either):
"""Specify a column by name or index."""

def __init__(self, columns=None, default=None, help=None):
# ToDo: make sure we can select by integer
types = (String,
Int)
self.columns = columns
super(Column, self).__init__(*types, default=default, help=help)
super(ColumnLabel, self).__init__(*types, default=default, help=help)

def validate(self, value):
"""If we are given a column list, make sure that the column provided is valid."""
super(Column, self).validate(value)
super(ColumnLabel, self).validate(value)

if self.columns:
if type(value) in bokeh_integer_types:
Expand Down Expand Up @@ -75,17 +101,18 @@ class Dimension(HasProps):

name = String()
alt_names = Either(String, List(String), default=None)
columns = Either(Column, List(Column), default=None)
columns = Either(ColumnLabel, List(ColumnLabel), default=None)

valid = Either(PrimitiveProperty, List(PrimitiveProperty), default=None)
invalid = Either(PrimitiveProperty, List(PrimitiveProperty), default=None)

selection = Either(Column, List(Column), default=None)
selection = Either(ColumnLabel, List(ColumnLabel), default=None)

def __init__(self, name, **properties):
properties['name'] = name
super(Dimension, self).__init__(**properties)
self._data = pd.DataFrame()
self._chart_source = None

def get_valid_types(self, col_data):
"""Returns all property types that are matched."""
Expand Down Expand Up @@ -123,7 +150,10 @@ def data(self):
def set_data(self, data):
"""Builder must provide data so that builder has access to configuration metadata."""
self.selection = data[self.name]
self._chart_source = data
self._data = data.df
if self.columns is None:
self.columns = list(self._data.columns.values)

@property
def min(self):
Expand All @@ -140,3 +170,10 @@ def max(self):
return self.data.max()
else:
return self.data.max(axis=1).max()

@property
def computed(self):
if self._chart_source is None:
return False
else:
return self._chart_source.is_computed(self.selection)
66 changes: 43 additions & 23 deletions bokeh/charts/builder/bar_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
from .._builder import Builder, create_and_build
from ...models import ColumnDataSource, FactorRange, GlyphRenderer, Range1d
from ...models.glyphs import Rect
from ...properties import Any, Bool, Either, List, String, Array, Float, HasProps, Instance
from .._properties import Dimension
from ...properties import Any, Bool, Either, List, String, Float, HasProps, Instance
from .._properties import Dimension, Column
from .._attributes import ColorAttr, NestedAttr

#-----------------------------------------------------------------------------
Expand Down Expand Up @@ -103,24 +103,13 @@ def Bar(data, label=None, values=None, color=None, stack=None, group=None, agg="
return create_and_build(BarBuilder, data, **kw)


class GlyphTransform(object):

def stack(self, *glyphs):
pass

def dodge(self, *glyphs):
pass

def jitter(self, *glyphs):
pass


class BarGlyph(HasProps):
"""Represents a single bar within a bar chart."""

label = String('All')
values = Either(Array(Float), Array(String))
values = Either(Column(Float), Column(String))
agg = String('sum')
width = Float(default=0.8)
source = Instance(ColumnDataSource)

def __init__(self, label, values, agg, **kwargs):
Expand All @@ -136,8 +125,8 @@ def __init__(self, label, values, agg, **kwargs):
self.source = self.aggregate()

def aggregate(self):
width = [0.5]
height = [getattr(np, self.agg)(self.values)]
width = [self.width]
height = [getattr(self.values, self.agg)()]
x = [self.label]
y = [height[0]/2]

Expand All @@ -149,7 +138,6 @@ def renderers(self):
return GlyphRenderer(data_source=self.source, glyph=glyph)



class BarBuilder(Builder):
"""This is the Bar class and it is in charge of plotting
Bar chart (grouped and stacked) in an easy and intuitive way.
Expand Down Expand Up @@ -181,11 +169,39 @@ class BarBuilder(Builder):
['label', 'values']]

attributes = {'color': ColorAttr(),
'stack': NestedAttr()}
'stack': NestedAttr(),
'group': NestedAttr()}

group = Bool(False)
agg = String('sum')

max_height = Float(1.0)
bar_width = Float(default=0.8)

def _setup(self):

stack = self.attributes['stack']
group = self.attributes['group']

# label is equivalent to group
if stack.columns is None and group.columns is None:
self.attributes['group'].set_columns(self.label.selection)

# ToDo: perform aggregation validation
# Not given values kw, so using only categorical data
if self.values.computed:
# agg must be count
self.agg = 'count'
else:
pass

if self.xlabel is None:
self.xlabel = str(self.label.selection).title()

if self.ylabel is None:
if not self.values.computed:
self.ylabel = '%s( %s )' % (self.agg.title(), str(self.values.selection).title())
else:
self.ylabel = '%s( %s )' % (self.agg.title(), str(self.label.selection).title())

def _process_data(self):
"""Take the Bar data from the input **value.
Expand All @@ -200,8 +216,10 @@ def _set_ranges(self):
"""Push the Bar data into the ColumnDataSource and calculate
the proper ranges.
"""
x_items = self.attributes['stack']._items
x_items = self.attributes['group']._items
x_labels = []

# Items are identified by tuples. If the tuple has a single value, we unpack it
for item in x_items:
if len(item) == 1:
item = item[0]
Expand All @@ -219,12 +237,14 @@ def _yield_renderers(self):

color = self.attributes['color']
stack = self.attributes['stack']
group = self.attributes['group']

for group in self._data.groupby(color, stack):
for group in self._data.groupby(color, stack, group):

renderer = BarGlyph(label=group.label,
values=group.data[self.values.selection].values,
agg=self.agg).renderers
agg=self.agg,
width=self.bar_width).renderers

# a higher level function of bar chart is to keep track of max height of all bars
self.max_height = max(max(renderer.data_source._data['height']), self.max_height)
Expand Down
23 changes: 14 additions & 9 deletions examples/charts/file/bar.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from bokeh.charts import Bar, output_file, show, vplot, hplot
from bokeh.models import Range1d
from bokeh.sampledata.autompg import autompg as df

width = 700
height = 500
width = 500
height = 400
legend_position = "top_right"

bar_plot = Bar(
df, label='cyl', values='mpg', stack='cyl', agg='mean',
title="label='cyl' values='mpg', agg='mean'",
ylabel="Mean(mpg)", xlabel="Cylinder", width=width, height=height
)
bar_plot = Bar(df, label='cyl', title="label='cyl'")

bar_plot2 = Bar(df, label='cyl', bar_width=0.4, title="label='cyl' bar_width=0.4")

bar_plot3 = Bar(df, label='cyl', values='mpg', agg='mean',
title="label='cyl' values='mpg', agg='mean'")

# np_negative_grouped = Bar(
# random * -1, cat=categories, title="All negative input | Grouped",
Expand All @@ -31,4 +31,9 @@
# collect and display
output_file("bar.html")

show(bar_plot)
show(
vplot(
hplot(bar_plot, bar_plot2),
hplot(bar_plot3)
)
)

0 comments on commit 3eaa24a

Please sign in to comment.