In [None]:
import numpy as np
import pandas as pd
import grblas

In [None]:
import dimsum as ds
from dimsum import Dimension, CalendarDimension, Schema, NULL, Flat, Pivot
from dimsum import alignment

In [None]:
size = Dimension('size', ['S', 'M', 'L', 'XL'])
shape = Dimension('shape', ['Circle', 'Square', 'Hexagon', 'Triangle', 'Pentagon', 'Octagon'])
color = Dimension('color', ['Red', 'Blue', 'Yellow', 'Green'])
quality = Dimension('quality', ['Bad', 'Okay', 'Good'])
# Calendar Dimensions
quarters = CalendarDimension('Q', pd.period_range('2020-Q1', '2021-Q4', freq='Q'), format='%Y-Q%q')
months = CalendarDimension('M', pd.period_range('2020-01', '2021-12', freq='M'))
years = CalendarDimension('Y', pd.period_range('2020', periods=5, freq='A'))
days = CalendarDimension('D', pd.period_range('2020-01-01', '2020-03-03', freq='D'))

In [None]:
schema = Schema([size, shape, color, quality, years, quarters, months, days])

In [None]:
list(schema.calendar)

In [None]:
schema.calendar['Q->Y']

### A

In [None]:
vals = [1, 2, 3, 4, 5]
codes = [
    {'size': 'S', 'color': 'Green', 'shape': 'Circle'},
    {'size': 'M', 'color': 'Green', 'shape': 'Triangle'},
    {'size': 'L', 'color': 'Red', 'shape': 'Triangle'},
    {'size': 'M', 'color': 'Blue', 'shape': 'Circle'},
    {'size': 'S', 'color': 'Yellow', 'shape': 'Circle'},
]
df = pd.DataFrame(codes)
df['value'] = vals
s = df.set_index(['size', 'color', 'shape'])['value']
# Load from Series
a = schema.load(s)
a

### A_alt

In [None]:
vals = [10, 20, 30, 40, 50]
codes = [
    {'size': 'S', 'color': 'Green', 'shape': 'Hexagon'},
    {'size': 'M', 'color': 'Green', 'shape': 'Triangle'},
    {'size': 'XL', 'color': 'Yellow', 'shape': 'Triangle'},
    {'size': 'M', 'color': 'Blue', 'shape': 'Circle'},
    {'size': 'S', 'color': 'Yellow', 'shape': 'Circle'},
]
df = pd.DataFrame(codes)
df['value'] = vals
# Load from DataFrame with value_column
a_alt = schema.load(df, dims=['size', 'color', 'shape'], value_column='value')
a_alt

### A_sub

In [None]:
vals = [1, 2, 3, 4, 5]
codes = [
    {'size': 'S', 'color': 'Green'},
    {'size': 'M', 'color': 'Green'},
    {'size': 'L', 'color': 'Red'},
    {'size': 'M', 'color': 'Blue'},
    {'size': 'S', 'color': 'Yellow'},
]
df = pd.DataFrame(codes)
df['value'] = vals
# Load from DataFrame
a_sub = schema.load(df, ['size', 'color'])
a_sub

### B_sub

In [None]:
d = {
    ('S', 'Circle'): 10,
    ('M', 'Circle'): 20,
    ('L', 'Triangle'): 30,
    ('XL', 'Triangle'): 40,
}
# Load from dict
b_sub = schema.load(d, ['size', 'shape'])
b_sub

### B

In [None]:
data = [
    ['S', 'Circle', 'Bad', 5],
    ['S', 'Circle', 'Okay', 10],
    ['M', 'Circle', 'Okay', 20],
    ['L', 'Triangle', 'Good', 30],
    ['L', 'Triangle', 'Bad', 35],
    ['XL', 'Triangle', 'Bad', 40],
]
# Load from list of lists
b = schema.load(data, ['size', 'shape', 'quality'])
b

### C

In [None]:
data = {'Circle': 5, 'Square': 10, 'Hexagon': 20, 'Triangle': 30, 'Pentagon': 35, 'Octagon': 40}
# Load from dict with single dimension
c = schema.load(data, 'shape')
c

### D

In [None]:
df = pd.DataFrame({
    'size': ['S', 'S', 'M', 'L', 'L', 'XL', NULL],
    'quality': ['Bad', 'Okay', 'Okay', 'Good', 'Bad', NULL, None],
    'vals': [1.1, 2.2, 3.3, 5.5, 4.4, 6.6, 7.7],
})
d = schema.load(df, ['quality', 'size'])
d

### Already aligned

In [None]:
x, y = ds.align(a.X[a_alt.X], a_alt.X[-1])

In [None]:
a

In [None]:
a_alt

In [None]:
x

In [None]:
y

In [None]:
a.X[a_alt.X] + a_alt.X[-100]

### Subset alignment

In [None]:
x, y = ds.align(a, b_sub)

In [None]:
a

In [None]:
b_sub

In [None]:
x

In [None]:
y

In [None]:
a + b_sub

In [None]:
a.X[-100] + b_sub

In [None]:
a + b_sub.X[-100]

### Partial disjoint alignment

In [None]:
x, y = ds.align(a, b)

In [None]:
a.pivot(left={'size', 'shape'})

In [None]:
b.pivot(left={'size', 'shape'})

In [None]:
x

In [None]:
y

In [None]:
a + b

In [None]:
a.X[-100] + b

In [None]:
a + b.X[-100]

### Fully disjoint alignment

In [None]:
x, y = ds.align(d, c)

In [None]:
c

In [None]:
d

In [None]:
x

In [None]:
y

In [None]:
d + c

### Pushdown

In [None]:
a

In [None]:
qual_pushdown = schema.load({'Bad': 0.05, 'Okay': 0.75, 'Good': 0.2}, 'quality')
qual_pushdown

In [None]:
# Verify that pushdown values sum to 1.0
qual_pushdown.reduce()

In [None]:
pushdown = a * qual_pushdown
pushdown

### Time Aggregation

In [None]:
csv = """
size,color,M,value
S,Red,2020-01,100.5
S,Blue,2020-01,174.2
M,Red,2020-01,77.5
M,Blue,2020-01,28.6
M,Green,2020-01,198.0
S,Red,2020-02,100.5
S,Blue,2020-02,174.2
M,Red,2020-02,71.5
M,Blue,2020-02,29.7
M,Green,2020-02,201.0
S,Red,2020-03,99.3
S,Blue,2020-03,177.1
M,Red,2020-03,62.1
M,Blue,2020-03,21.9
M,Green,2020-03,205.1
S,Red,2020-04,88.8
S,Blue,2020-04,173.3
M,Red,2020-04,72.5
M,Blue,2020-04,19.8
M,Green,2020-04,222.0
S,Red,2020-05,67.7
S,Blue,2020-05,171.1
M,Red,2020-05,77.7
M,Blue,2020-05,31.8
M,Green,2020-05,222.2
S,Red,2020-06,55.3
S,Blue,2020-06,164.6
M,Red,2020-06,79.3
M,Blue,2020-06,23.3
M,Green,2020-06,192.7
S,Red,2020-07,111.9
S,Blue,2020-07,177.1
M,Red,2020-07,66.6
M,Blue,2020-07,29.2
M,Green,2020-07,199.9
"""
import io
df = pd.read_csv(io.StringIO(csv))
data = schema.load(df.set_index(['size', 'color', 'M'])['value'])
data.pivot(top='M')

In [None]:
q2m = schema.calendar['M->Q']
q2m

In [None]:
x = data * q2m
x

In [None]:
xm = x.reduce_columns()
xm.pivot(top='Q')

In [None]:
data.pivot(top='M')

# Random examples

In [None]:
a.pivot(top='color')

In [None]:
b

In [None]:
a + 2

In [None]:
a - b.pivot(top='quality').reduce_rows()

In [None]:
a.X[1] * b.X[1]

In [None]:
a.X[b]

In [None]:
b.X[a]

In [None]:
np.sin(a.pivot(left='size'))

In [None]:
np.arctan2(a, b.X)

In [None]:
np.arctan2(a, b.X[5.5])

In [None]:
np.sin(a)

In [None]:
a == 5

In [None]:
a << 4

In [None]:
a > 2

In [None]:
cond = schema.load(pd.DataFrame({'size': ['L', 'S', 'M'], 'shape': ['Circle', 'Triangle', 'Circle'], 'value': [True, False, False]}), ['size', 'shape'], 'value')
cond

In [None]:
a.filter(cond.X[True])

In [None]:
ds.where(cond.X[False], 5, b)

In [None]:
a.X[99].filter(cond)

In [None]:
b.filter(~cond)

In [None]:
x = schema.load({'S': 5, 'M': 0, 'L': 12.9, NULL: 44.4}, 'size')
x

In [None]:
y = schema.load({'XL': 14, 'L': 8}, 'size')
y

In [None]:
shifter = schema.load({'M': 1, NULL: 1}, 'size')
shifter

In [None]:
x.shift('size', -shifter)

In [None]:
shifter

In [None]:
shifter = schema.load({'M': 1}, 'size')
shifter

In [None]:
x.shift('size', shifter.X[0])

In [None]:
a

In [None]:
a.X+b

In [None]:
a.X[22].align(b)

In [None]:
a.X[b]

In [None]:
b.pivot(left={'size', 'shape'})

In [None]:
b.X[a]

In [None]:
a.pivot(left={'size', 'shape'})

In [None]:
c

In [None]:
a

In [None]:
c.cross_align(a)

In [None]:
a.cross_align(c)

In [None]:
a.align(c)

In [None]:
shifter = schema.load({'M': 1}, 'size')
shifter

In [None]:
a.shift('size', shifter.X)

In [None]:
a.codes('shape')

In [None]:
schema.dimension_enums('shape')

In [None]:
a.pivot(left='shape').reduce_rows()

In [None]:
a.filter(a.match(size={'M', 'S'}, color={'Yellow', 'Blue'}))

In [None]:
schema.encode(size='M')

In [None]:
a.filter(a.codes('size') > schema.encode(size='S'))

In [None]:
ds.where(a.match(size='S', color='Yellow'), -99, a)