In [1]:
import time
import numpy as np
import pandas as pd
import ipywidgets as widgets
from vega.widget import VegaWidget

spec_no_data = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "data": {"name": "data"},
  "mark": "bar",
  "encoding": {
    "x": {"aggregate": "sum", "field": "yield"},
    "y": {"field": "variety"},
    "color": {"field": "site"}
  }
}


In [2]:
URL = "https://forge.scilab.org/index.php/p/rdataset/source/file/368b19abcb4292c56e4f21079f750eb76b325907/csv/lattice/barley.csv"
df = pd.read_csv(URL, usecols=['yield', 'variety', 'year', 'site'])
varieties = np.unique(df['variety'].astype("category")).tolist()
years = np.unique(df['year'].astype("category")).tolist()
sites = np.unique(df['site'].astype("category")).tolist()
df = pd.read_csv(URL, usecols=['yield', 'variety', 'year', 'site'],
                converters={'variety': lambda x: varieties.index(x),
                            'year': lambda x: years.index(int(x)),
                            'site': lambda x: sites.index(x)
                           })
df_list = [df.copy() for _ in range(1000)]
big_df = pd.concat(df_list)
big_dict = big_df.to_dict(orient='records')

In [3]:
start = 0
times = []
callback_ = None
steps = 20
cases = ['vega', 'itw', 'lz4', 'zlib']

progress = widgets.IntProgress(value=0, min=0, 
                                     max=steps, 
                                     description='Progress', 
                                     orientation='horizontal')              

mean_d = {k: widgets.Text(value="", description='', disables=True)
          for k in cases}
        
std_d = {k: widgets.Text(value="",  description='', disables=True)
         for k in cases}

_lab = widgets.Label

items = ([_lab('Format'), _lab('Vega'), _lab('Table'), _lab('Table+lz4'), _lab('Table+zlib')] + 
         [_lab('Mean time [transf / transf+conv / total]')]+list(mean_d.values())+
         [_lab('Std time [transf / transf+conv / total]')]+list(std_d.values()))

grid = widgets.GridBox(items, layout=widgets.Layout(grid_template_columns="250px repeat(4, 150px)"))

mean = None
std = None

def clear_():
    times.clear()
    progress.value = 0
    mean.value = ''
    std.value = ''

def set_col(k):
    global progress, mean, std
    #progress = progress_d[k]
    mean = mean_d[k]
    std = std_d[k]
    if progress is not None:
        clear_()
    
def on_value_change(change):
    global start
    transf, tr_conv, total = change['new']
    times.append([transf-start, tr_conv-start, total-start])
    progress.value = len(times)
    if callback_ is not None and len(times)<steps:
        callback_()
    else:
        times_tr = [e[0] for e in times]
        times_tc = [e[1] for e in times]
        times_tt = [e[2] for e in times]
        mean_tr = np.mean(times_tr, dtype=int)
        mean_tc = np.mean(times_tc, dtype=int)
        mean_tt = np.mean(times_tt, dtype=int)
        std_tr = np.std(times_tr, dtype=int)
        std_tc = np.std(times_tc, dtype=int)
        std_tt = np.std(times_tt, dtype=int)
        mean.value = f"{mean_tr} / {mean_tc} / {mean_tt} ms"
        std.value = f"{std_tr} / {std_tc} / {std_tt} ms"
        
widget = VegaWidget(spec=spec_no_data)
widget.observe(on_value_change, names='rec_time')
display(widgets.VBox([widget, progress, grid]))

VBox(children=(VegaWidget(), IntProgress(value=0, description='Progress', max=20), GridBox(children=(Label(val…

In [4]:
big_df.head()

Unnamed: 0,yield,variety,year,site
0,27.0,1,0,4
1,48.86667,1,0,5
2,27.43334,1,0,3
3,39.93333,1,0,0
4,32.96667,1,0,2


In [5]:
set_col('vega')
def vega_fun():
    global start
    start = int(time.time()*1000)
    widget.update('data', insert=big_dict, remove="true")
    time.sleep(1)
callback_ = vega_fun
vega_fun()

In [6]:
set_col('itw')
widget.compression = None
def itw_fun():
    global start
    start = int(time.time()*1000)
    widget.update('data', insert=big_df, remove="true")
    time.sleep(1)
callback_ = itw_fun
itw_fun()

In [7]:
set_col('lz4')
start = int(time.time()*1000)
widget.compression = "lz4"
widget.update('data', insert=big_df, remove="true")

In [8]:
set_col('zlib')
start = int(time.time()*1000)
widget.compression = "zlib"
widget.update('data', insert=big_df, remove="true")