In [14]:
from IPython.display import HTML

def add_toggle_button(desc, *inputs):
    x = ', '.join([str(x) for x in inputs])
    n = str(inputs[0])
    code = '''
        <script>
        var others%n = [%x];
        var code_shown%n = true; 
        function code_toggle%n () {
            var selector = "div.input";
            var inputs = $(selector).toArray();
            if (code_shown%n) {
                for (var i in others%n) {
                    var x = others%n[i];
                    $(inputs[x]).hide();
                }
            }
            else {
                for (var i in others%n) {
                    var x = others%n[i];
                    $(inputs[x]).show();
                }
            }

            code_shown%n = !code_shown%n;
        } 
        $( document ).ready(code_toggle%n);
        </script>
        <form action="javascript:code_toggle%n()">
        <input type="submit" value="Click to toggle on/off the display of the %d code.">
        </form>'''
    
    return code.replace('%d', desc).replace('%n', str(n)).replace('%x', x)

HTML(add_toggle_button('setup', 0, 1, 2, 3, 7))

In [2]:
%%html
<style>
.widget-label { width: unset !important; }
</style>

In [3]:
import numpy
from bqplot import LinearScale, Axis, Lines, Figure, Hist, Bars, Scatter
from ipywidgets import HBox, VBox, SelectionSlider

In [4]:
class Harvester(object):
    def __init__(self, seed, target_samples_per_harvest, harvests_per_hour):
        self.num_sampled = []
        self.timesteps_per_sample = []
        self.samples_per_harvest = []
        self.xdata = []
        self.xdata_ds = []
        self.sph_ds = []

        self.seed = seed
        self.target_samples_per_harvest = target_samples_per_harvest
        self.harvests_per_hour = harvests_per_hour
        
        self.current_timestamp = 0
        self.rand = numpy.random.RandomState(self.seed)
        
    def prep_data(self, samples_per_hour):
        for h in range(0, 24):
            sh = samples_per_hour[h]
            raw_sph = self.rand.exponential(sh/self.harvests_per_hour, self.harvests_per_hour)
            sph = numpy.rint(raw_sph*(sh/sum(raw_sph)))
            self.samples_per_harvest = numpy.append(self.samples_per_harvest, sph)

            timesteps = []
            for v in range(0, self.harvests_per_hour):
                x = h + float(v)/float(self.harvests_per_hour)
                self.xdata.append(x)
                shh = sph[v]
                if shh == 0:
                    self.timesteps_per_sample.append([])
                else:
                    raw_sps = self.rand.exponential(self.harvests_per_hour/shh, int(shh))
                    sps = raw_sps*(sum(raw_sps)/shh)
                    self.timesteps_per_sample.append(sps)

        ds_fac = int(len(self.samples_per_harvest)/240.0)
        for i in range(0, 240):
            self.xdata_ds.append(self.xdata[ds_fac*i])
            self.sph_ds.append(self.samples_per_harvest[ds_fac*i])
            
    def simulate(self):
        for i in range(len(self.samples_per_harvest)):
            self.harvest(i)
                    
    def harvest(self, i):
        samples = self.timesteps_per_sample[i]

        last_harvest_count = -1
        if i > 0:
            last_harvest_count = int(self.samples_per_harvest[i-1])

        sampled_count = 0
        for timestep in samples:
            self.current_timestamp += timestep

            if sampled_count < 2*self.target_samples_per_harvest:
                sampled_count += int(self.adaptively_sample(self.current_timestamp, last_harvest_count))

        self.num_sampled.append(sampled_count)

    def adaptively_sample(self, timestamp, last_harvest_count):
        raise NotImplementedError('IMPLEMENT THIS, YA NUMBSKULL!')

In [5]:
samples_per_hour = [
    294, 254, 325, 516, 886, 3022,
    10109, 16507, 22262, 24752, 26362, 25034,
    27172, 27208, 26751, 20724, 14936, 10160,
    5808, 3053, 1397, 571, 152, 351
]

initial_seed = 1234567
initial_harvester = 'Per-Spec'
baseline_sph = 10
initial_hpm = 60
red_height = 500.0

In [6]:
class StupidHarvester(Harvester):
    def adaptively_sample(self, timestamp, last_harvest_count):
        if last_harvest_count == -1:
            return False
        
        if self.rand.uniform(1) > 0.5:
            return True
        
        return False

In [7]:
class SpecHarvester(Harvester):
    def adaptively_sample(self, timestamp, last_harvest_count):
        if last_harvest_count == -1:
            return False

        if self.rand.uniform(last_harvest_count) < self.target_samples_per_harvest:
            return True

        return False

In [8]:
harvester_classes = {
    'Per-Spec': SpecHarvester,
    'Random': StupidHarvester
}

tas_cache = {}
def test_adaptive_sampling(class_name, seed, target_samples_per_harvest, harvests_per_hour):
    cls = harvester_classes[class_name]
    tag = "%s/%s/%s" % (cls.__name__, seed, harvests_per_hour)
    if tag in tas_cache:
        harvester = tas_cache[tag]
    else:
        harvester = cls(seed, target_samples_per_harvest, harvests_per_hour)
        harvester.prep_data(samples_per_hour)
        harvester.simulate()
        tas_cache[tag] = harvester
    
    return harvester

In [15]:
initial_data = test_adaptive_sampling(initial_harvester, initial_seed, baseline_sph, initial_hpm)

bx_sc = LinearScale()
by_sc = LinearScale()
bax_x = Axis(label='x', scale=bx_sc, grid_lines='solid')
bax_y = Axis(label='y', scale=by_sc, orientation='vertical', side='left', grid_lines='solid')

hx_sc = LinearScale()
hy_sc = LinearScale()
hax_x = Axis(label='x', scale=hx_sc, grid_lines='solid')
hax_y = Axis(label='y', scale=hy_sc, orientation='vertical', side='left', grid_lines='solid')

ss = SelectionSlider(
    options=['Random', 'Per-Spec'],
    value='Per-Spec',
    description='Sampling Algorithm',
)

rss = SelectionSlider(
    options=[1234567, 23523465, 57433462],
    value=initial_seed,
    description='Random Seed',
)

hphs = SelectionSlider(
    options=[30, 60, 120, 240],
    value=initial_hpm,
    description='Harvests Per Hour',
)

samples_bars = Scatter(
    x=initial_data.xdata_ds,
    y=initial_data.sph_ds,
    scales={'x': bx_sc, 'y': by_sc},
    visible=True)

red_line = Lines(
    x=[baseline_sph-0.5, baseline_sph-0.5, baseline_sph+0.5, baseline_sph+0.5],
    y=[0, red_height, red_height, 0],
    colors=['red'],
    fill='inside',
    scales={'x': hx_sc, 'y': hy_sc})

samples_hist = Hist(
    sample=initial_data.num_sampled,
    scales={'sample': hx_sc, 'count': hy_sc},
    bins=20)

HTML(add_toggle_button('plotting', 8, 9, 10))

In [10]:
def update_plots(slider):
    class_name = ss.value
    random_seed = rss.value
    harvests_per_hour = hphs.value
    
    factor = 60.0/harvests_per_hour
    target_samples_per_harvest = int(baseline_sph*factor)
    
    harvester = test_adaptive_sampling(
        class_name, random_seed,
        target_samples_per_harvest, harvests_per_hour
    )
    
    samples_bars.x = harvester.xdata_ds
    samples_bars.y = harvester.sph_ds
    
    hax_x.tick_values = range(0, 2*target_samples_per_harvest+1)
    samples_hist.bins = 2*target_samples_per_harvest
    samples_hist.sample = harvester.num_sampled
    
    l = target_samples_per_harvest - 0.5
    h = target_samples_per_harvest + 0.5
    red_line.x = [l, l, h, h]
    red_line.y = [0, red_height/factor, red_height/factor, 0]
    
ss.observe(update_plots, names='value')
rss.observe(update_plots, names='value')
hphs.observe(update_plots, names='value')

In [11]:
fig_bar = Figure(marks=[samples_bars], axes=[bax_x, bax_y], title='Requests Per Harvest')
fig_hist = Figure(marks=[samples_hist, red_line], axes=[hax_x, hax_y], title='Super-Samples per Harvest')

VBox([HBox([ss]), HBox([hphs, rss]), HBox([fig_bar, fig_hist])])

VBox(children=(HBox(children=(SelectionSlider(description='Sampling Algorithm', index=1, options=('Random', 'P…