In [12]:
from blaze.expr import *

In [3]:
import blaze

In [6]:
print(dir(blaze.expr))

['Add', 'And', 'Apply', 'Arithmetic', 'BinOp', 'BooleanMath', 'Broadcast', 'By', 'DataShape', 'Date', 'DateTime', 'DateTimeTruncate', 'Day', 'Distinct', 'Div', 'ElemWise', 'Eq', 'Expr', 'Field', 'FloorDiv', 'Ge', 'Gt', 'Head', 'Hour', 'IntegerMath', 'Join', 'Label', 'Le', 'Like', 'Lt', 'Map', 'Math', 'Merge', 'Microsecond', 'Millisecond', 'Mod', 'Month', 'Mult', 'Ne', 'Node', 'Not', 'Or', 'Pow', 'Projection', 'ReLabel', 'RealMath', 'Record', 'Reduction', 'Relational', 'Second', 'Selection', 'Slice', 'Sort', 'Sub', 'Summary', 'Symbol', 'TableSymbol', 'TensorDot', 'Time', 'Transpose', 'USub', 'UTCFromTimestamp', 'UnaryOp', 'Year', '__builtins__', '__doc__', '__file__', '__name__', '__package__', '__path__', 'abs', 'absolute_import', 'acos', 'acosh', 'all', 'any', 'arithmetic', 'arrays', 'asin', 'asinh', 'atan', 'atanh', 'broadcast', 'by', 'ceil', 'collections', 'common_subexpression', 'core', 'cos', 'cosh', 'count', 'count_values', 'ct', 'datashape', 'date', 'datetime', 'day', 'degrees',

In [7]:
t = Symbol('t', 'var * {id: int, name: string, amount: int}')
amounts = Field(t, 'amount')

In [10]:
amounts, amounts._inputs, amounts._args

(t.amount, (t,), (t, 'amount'))

In [22]:
deadbeats = t[t.amount < 0].name
deadbeats, type(deadbeats)

(t[t.amount < 0].name, blaze.expr.expressions.Field)

In [23]:
 data = [[1, 'Alice', 100],
         [2, 'Bob', -200],
         [3, 'Charlie', 300]]

namespace maps the symbol to the data. 

In [24]:
namespace = {t: data}

In [28]:
from blaze import compute
list(compute(deadbeats, namespace))

['Bob']

Internally `compute` traverses the expression from the leaves (`t`) on up while transforming data. 

In [29]:
 selection_t = t[t.amount < 0]

In [30]:
predicate = lambda amt: amt < 0
data = filter(predicate, data)

In [31]:
data

[]

# Computational Pipeline 

In [19]:
import numpy as np
from blaze.compute import compute_up, compute

In [20]:
xdata = np.array([ 1,  2,  3,  4,  5])
ydata = np.array([10, 20, 30, 40, 50])

In [21]:
x = Symbol('x', '5 * int')
y = Symbol('y', '5 * int')
expr = sum(x**2 + y)

In [25]:
expr._leaves()

[x, y]

In [27]:
expr._args

((x ** 2) + y, (0,), False)

In [22]:
ns = {x: xdata, y: ydata}

In [23]:
compute(expr, ns)

TypeError: ufunc '<lambda>' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

The full pipeline looks like this

    1. Pre-compute all leaves of data
    2. Optimize the expression
    3. Try calling compute_down on the entire expression tree
    4. Otherwise, traverse up the tree from the leaves, calling compute_up. Repeat this until the data significantly changes type (e.g. list to int after a sum operation)
    5. Reevaluate optimize on the expression and pre_compute on all of the data elements.
    6. Go to step 3
    7. Call post_compute on the result

## Practicing with leaves

The `leaves` of an expression tree are all the nodes without inputs. They are returned from left to right. 

In [9]:
from blaze.expr import symbol, join, by, Symbol

t = symbol('t', 'var * {id: int32, name: string}')
t._leaves()

[t]

In [3]:
v = symbol('v', 'var * {id: int32, city: string}')
join(t,v)._leaves()

[t, v]

In [4]:
type(t)

blaze.expr.expressions.Symbol