In [1]:
import numpy as np
import pandas as pd
import altair as alt

In [2]:
# user exposed function
def attendance(p, guests):
    trial = 1 
    simulations = 5000
    p_sim = sum(np.random.binomial(trial, p, [guests, simulations]))
    return pd.DataFrame(data=p_sim, columns=["simulated_guests"])



In [3]:
p_sim_df = attendance(.75, 100)
p_sim_df
p_sim_df.simulated_guests

0       69
1       76
2       69
3       78
4       76
        ..
4995    70
4996    81
4997    74
4998    66
4999    72
Name: simulated_guests, Length: 5000, dtype: int64

In [6]:
# internal helper function
def generate_percentile_ci(df):
    lower_ci = np.percentile(df, 2.5, axis=0)
    upper_ci = np.percentile(df, 97.5, axis=0)
    return pd.DataFrame({
        'lower_ci': lower_ci,
        'upper_ci': upper_ci
    })

In [7]:
# user exposed function
def viz_possible_guests(df):
    cis = generate_percentile_ci(df)
    
    chart = alt.Chart(df).mark_bar().encode(
        alt.X("simulated_guests", bin=alt.Bin(maxbins=30)),
        y='count()',
    )

    lower_ci_line = alt.Chart(pd.DataFrame({'x': [int(cis.lower_ci)]})).mark_rule().encode(x='x')
    upper_ci_line = alt.Chart(pd.DataFrame({'x': [int(cis.upper_ci)]})).mark_rule().encode(x='x')

    return chart + lower_ci_line + upper_ci_line

In [12]:
viz_possible_guests(p_sim_df)

In [8]:
small_party = pd.DataFrame({
        'simulated_guests': np.array([25, 25, 50, 50, 100, 100])
    })
small_party

Unnamed: 0,simulated_guests
0,25
1,25
2,50
3,50
4,100
5,100


In [9]:
small_hist = viz_possible_guests(small_party)
small_hist

In [14]:
small_hist.layer[0].encoding.x.field

'simulated_guests'

In [38]:
assert small_hist.layer[0].encoding.x.field == 'simulated_guests'

In [42]:
def scatter(df, x_axis, y_axis):
    chart = alt.Chart(df).mark_line().encode(
        alt.X(x_axis + ':Q',
            scale=alt.Scale(zero=False),
              axis=alt.Axis(tickMinStep=1)
        ),
        y=y_axis
    )
    return chart

In [43]:
small_data = pd.DataFrame({
        'year': np.array([1901, 1902, 1903, 1904, 1905]),
        'measure' : np.array([25, 25, 50, 50, 100])
    })
small_data

Unnamed: 0,year,measure
0,1901,25
1,1902,25
2,1903,50
3,1904,50
4,1905,100


In [48]:
small_scatter = scatter(small_data, 'year', 'measure')
small_scatter

In [59]:
small_scatter.encoding.x.axis.tickMinStep

1

In [60]:
def test_scatter():
    assert small_scatter.encoding.x.field == 'year', 'x_axis should be mapped to the x axis'
    assert small_scatter.encoding.y.field == 'measure', 'y_axis should be mapped to the y axis'
    assert small_scatter.mark == 'line', 'mark should be a line'
    assert small_scatter.encoding.x.scale.zero == False, "x-axis should not start at 0"
    assert small_scatter.encoding.x.axis.tickMinStep == 1, "x-axis small tick step should be 1"