Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a cumsum transform to cumulatively sum a single column #7961

Merged
merged 6 commits into from Jun 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 29 additions & 1 deletion bokeh/models/expressions.py
Expand Up @@ -24,7 +24,7 @@
from __future__ import absolute_import

from ..core.has_props import abstract
from ..core.properties import Seq, String
from ..core.properties import Bool, Seq, String
from ..model import Model

@abstract
Expand All @@ -46,6 +46,34 @@ class Expression(Model):
'''
pass

class CumSum(Expression):
''' An expression for generating arrays by cumulatively summing a single
column from a ``ColumnDataSource``.

'''

field = String(help="""
The name of a ColumnDataSource column to cumulatively sum for new values.
""")

include_zero = Bool(default=False, help="""
Whether to include zero at the start of the result. Note that the length
of the result is always the same as the input column. Therefore if this
property is True, then the last value of the column will not be included
in the sum.

.. code-block:: python

source = ColumnDataSource(data=dict(foo=[1, 2, 3, 4]))

CumSum(field='foo')
# -> [1, 3, 6, 10]

CumSum(field='foo', include_zero=True)
# -> [0, 1, 3, 6]

""")

class Stack(Expression):
''' An expression for generating arrays by summing different columns from
a ``ColumnDataSource``.
Expand Down
22 changes: 21 additions & 1 deletion bokeh/tests/test_transform.py
Expand Up @@ -22,7 +22,7 @@
# External imports

# Bokeh imports
from bokeh.models import CategoricalColorMapper, Dodge, FactorRange, Jitter, LinearColorMapper, LogColorMapper, Stack
from bokeh.models import CategoricalColorMapper, CumSum, Dodge, FactorRange, Jitter, LinearColorMapper, LogColorMapper, Stack
from bokeh.util.testing import verify_all

# Module under test
Expand All @@ -33,6 +33,7 @@
#-----------------------------------------------------------------------------

ALL = (
'cumsum',
'dodge',
'factor_cmap',
'jitter',
Expand All @@ -48,6 +49,25 @@

Test___all__ = verify_all(bt, ALL)

class Test_cumsum(object):

def test_basic(object):
s = bt.cumsum("foo")
assert isinstance(s, dict)
assert list(s.keys()) == ["expr"]
assert isinstance(s['expr'], CumSum)
assert s['expr'].field == 'foo'
assert s['expr'].include_zero == False

def test_include_zero(object):
s = bt.cumsum("foo", include_zero=True)
assert isinstance(s, dict)
assert list(s.keys()) == ["expr"]
assert isinstance(s['expr'], CumSum)
assert s['expr'].field == 'foo'
assert s['expr'].include_zero == True


class Test_dodge(object):

def test_basic(self):
Expand Down
25 changes: 24 additions & 1 deletion bokeh/transform.py
Expand Up @@ -28,7 +28,7 @@

# Bokeh imports
from .core.properties import expr, field
from .models.expressions import Stack
from .models.expressions import CumSum, Stack
from .models.mappers import CategoricalColorMapper, LinearColorMapper, LogColorMapper
from .models.transforms import Dodge, Jitter

Expand All @@ -37,6 +37,7 @@
#-----------------------------------------------------------------------------

__all__ = (
'cumsum',
'dodge',
'factor_cmap',
'jitter',
Expand All @@ -50,6 +51,27 @@
# General API
#-----------------------------------------------------------------------------

def cumsum(field, include_zero=False):
''' Create a Create a ``DataSpec`` dict to generate a ``CumSum`` expression
for a ``ColumnDataSource``.

Examples:

.. code-block:: python

p.wedge(start_angle=cumsum('angle', include_zero=True),
end_angle=cumsum('angle'),
...)

will generate a ``CumSum`` expressions that sum the ``"angle"`` column
of a data source. For the ``start_angle`` value, the cumulative sums
will start with a zero value. For ``start_angle``, no initial zero will
be added (i.e. the sums will start with the first angle value, and
include the last).

'''
return expr(CumSum(field=field, include_zero=include_zero))

def dodge(field_name, value, range=None):
''' Create a ``DataSpec`` dict to apply a client-side ``Jitter``
transformation to a ``ColumnDataSource`` column.
Expand Down Expand Up @@ -211,6 +233,7 @@ def stack(*fields):
coordinate for a ``VBar``.

'''

return expr(Stack(fields=fields))

def transform(field_name, transform):
Expand Down
45 changes: 45 additions & 0 deletions bokehjs/src/lib/models/expressions/cumsum.ts
@@ -0,0 +1,45 @@
import {ColumnarDataSource} from "../sources/columnar_data_source"
import {Expression} from "./expression"
import {Arrayable} from "core/types"
import * as p from "core/properties"

export namespace CumSum {
export interface Attrs extends Expression.Attrs {
field: string
include_zero: boolean
}

export interface Props extends Expression.Props {}
}

export interface CumSum extends CumSum.Attrs {}

export class CumSum extends Expression {

properties: CumSum.Props

constructor(attrs?: Partial<CumSum.Attrs>) {
super(attrs)
}

static initClass(): void {
this.prototype.type = "CumSum"

this.define({
field: [ p.String ],
include_zero: [ p.Boolean, false ],
})
}

_v_compute(source: ColumnarDataSource): Arrayable<number> {
const result = new Float64Array(source.get_length() || 0)
const col = source.data[this.field]
const offset = this.include_zero ? 1 : 0
result[0] = this.include_zero ? 0 : col[0]
for (let i = 1; i < result.length; i++) {
result[i] = result[i-1] + col[i-offset]
}
return result
}
}
CumSum.initClass()
1 change: 1 addition & 0 deletions bokehjs/src/lib/models/expressions/index.ts
@@ -1,2 +1,3 @@
export {Expression} from "./expression"
export {Stack} from "./stack"
export {CumSum} from "./cumsum"
68 changes: 68 additions & 0 deletions bokehjs/test/models/expressions/cumsum.coffee
@@ -0,0 +1,68 @@
{expect} = require "chai"

{ColumnDataSource} = require("models/sources/column_data_source")
{CumSum} = require("models/expressions/cumsum")

describe "CumSum", ->

it "should should compute for a source", ->
source = new ColumnDataSource({data: {foo: [1, 2, 3, 4]}})
s = new CumSum({field: 'foo'})
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([1, 3, 6, 10])

s = new CumSum({field: 'foo', include_zero: true})
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([0, 1, 3, 6])

it "should should compute for different sources", ->
source1 = new ColumnDataSource({data: {foo: [1, 2, 3, 4]}})
source2 = new ColumnDataSource({data: {foo: [10, 20, 30, 40]}})
s = new CumSum({field: 'foo'})
ret = s.v_compute(source1)
expect(ret).to.deep.equal new Float64Array([1, 3, 6, 10])

s = new CumSum({field: 'foo', include_zero: true})
ret = s.v_compute(source1)
expect(ret).to.deep.equal new Float64Array([0, 1, 3, 6])
s = new CumSum({field: 'foo'})
ret = s.v_compute(source2)
expect(ret).to.deep.equal new Float64Array([10, 30, 60, 100])

s = new CumSum({field: 'foo', include_zero: true})
ret = s.v_compute(source2)
expect(ret).to.deep.equal new Float64Array([0, 10, 30, 60])

it "should should re-compute if a source changes", ->
source = new ColumnDataSource({data: {foo: [1, 2, 3, 4]}})
s = new CumSum({field: 'foo'})
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([1, 3, 6, 10])

source.data = {foo: [10, 20, 30, 40]}
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([10, 30, 60, 100])

it "should should re-compute if a source patches", ->
source = new ColumnDataSource({data: {foo: [1, 2, 3, 4]}})
s = new CumSum({field: 'foo'})
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([1, 3, 6, 10])

source.patch({"foo": [[1, 12]]})
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([1, 13, 16, 20])

source.patch({"foo": [[0, 1.1]]})
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([1.1, 13.1, 16.1, 20.1])

it "should should re-compute if a source streams", ->
source = new ColumnDataSource({data: {foo: [1, 2, 3, 4]}})
s = new CumSum({field: 'foo'})
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([1, 3, 6, 10])

source.stream({foo: [5]})
ret = s.v_compute(source)
expect(ret).to.deep.equal new Float64Array([1, 3, 6, 10, 15])
1 change: 1 addition & 0 deletions bokehjs/test/models/expressions/index.ts
@@ -1 +1,2 @@
import "./cumsum"
import "./stack"
3 changes: 2 additions & 1 deletion bokehjs/tslint.json
Expand Up @@ -27,6 +27,7 @@
"no-var-keyword": true,
"no-string-throw": true,
"no-invalid-template-strings": true,
"return-type": true
"return-type": true,
"no-debugger": true
}
}
43 changes: 43 additions & 0 deletions examples/plotting/file/pie.py
@@ -0,0 +1,43 @@
from collections import Counter
from math import pi

import pandas as pd

from bokeh.io import output_file, show
from bokeh.palettes import Category20c
from bokeh.plotting import figure
from bokeh.transform import cumsum

output_file("pie.py")

x = Counter({
'United States': 157,
'United Kingdom': 93,
'Japan': 89,
'China': 63,
'Germany': 44,
'India': 42,
'Italy': 40,
'Australia': 35,
'Brazil': 32,
'France': 31,
'Taiwan': 31,
'Spain': 29
})

data = pd.DataFrame.from_dict(dict(x), orient='index').reset_index().rename(index=str, columns={0:'value', 'index':'country'})
data['angle'] = data['value']/sum(x.values()) * 2*pi
data['color'] = Category20c[len(x)]

p = figure(plot_height=350, title="Pie Chart", toolbar_location=None,
tools="hover", tooltips=[("Country", "@country"),("Value", "@value")])

p.wedge(x=0, y=1, radius=0.4,
start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
line_color="white", fill_color='color', legend='country', source=data)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None

show(p)
Binary file added sphinx/source/_images/gallery/pie_chart.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions sphinx/source/docs/gallery.json
Expand Up @@ -6,6 +6,7 @@
{ "path": "examples/plotting/file/hex_tile.py", "name": "hex_tile" },
{ "path": "examples/plotting/file/bar_colormapped.py", "name": "bar_colormapped" },
{ "path": "examples/plotting/file/bar_intervals.py", "name": "bar_intervals" },
{ "path": "examples/plotting/file/pie.py", "name": "pie_chart" },
{ "path": "examples/plotting/file/bar_mixed.py", "name": "bar_mixed" },
{ "path": "examples/plotting/file/bar_nested_colormapped.py", "name": "bar_nested_colormapped" },
{ "path": "examples/plotting/file/categorical.py", "name": "categorical" },
Expand Down
2 changes: 2 additions & 0 deletions sphinx/source/docs/releases/0.13.0.rst
Expand Up @@ -5,6 +5,8 @@ Bokeh Version ``0.13.0`` is an incremental update that adds a few
new features and fixes several bugs. Some of the highlights include:

* Improved hover tool fields for common stacked bar plot cases
* New ``CumSum`` transform to generate values from cumulative sums
of CDS columns on the client side.

And several other bug fixes and docs additions. For full details see the
:bokeh-tree:`CHANGELOG`.
Expand Down