In [1]:
import pandas as pd
import math
import asyncio
import vega
import ipywidgets as widgets
from ipywidgets import VBox, HBox, Label, Button, interact, FloatProgress
from vega.widget import VegaWidget
import time
import numpy as np

In [2]:
randomdf = pd.read_csv("./data/random100k.csv")[["x", "y"]]
randomdf

Unnamed: 0,x,y
0,79,24
1,85,27
2,73,47
3,55,85
4,52,3
...,...,...
99995,25,5
99996,74,54
99997,18,74
99998,95,86


In [3]:
randomdf.loc[1,"x"]

85

In [4]:
# params
# xSpan = 50  # count is better for progressive
# ySpan = 50 # count is better for progressive
width = 300
height=300
xRectCount = 5
yRectCount = 5
xDomain = [0, 100]
yDomain = [0, 100]
xStep = int(math.ceil(xDomain[1] / xRectCount))
yStep = int(math.ceil(yDomain[1] / yRectCount))
xDomain = [0, xStep * xRectCount]
yDomain=[0, yStep * yRectCount]

In [5]:
spec_with_data = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "width": width,
  "height": height,
  "params": [
    {"name": "xRectCount", "value": xRectCount},
    {"name": "yRectCount", "value": yRectCount},
    {"name": "xDomain", "value": xDomain},
    {"name": "yDomain", "value": yDomain}
  ],
  "data": {
    "values": [
      {"x": 25, "y": 25, "count": 1},
      {"x": 25, "y": 75, "count": 2},
      {"x": 75, "y": 25, "count": 3},
      {"x": 75, "y": 75, "count": 4}
    ]
  },
  "mark": {
    "type": "rect",
    "cornerRadius": 0,
    #"width": {"expr": "width/xRectCount"},
    #"height": {"expr": "height/yRectCount"},
    "width": width/xRectCount,
    "height": height/yRectCount
  },
  "encoding": {
    "x": {
      "field": "x",
      "type": "quantitative",
      # "scale": {"type": "linear", "domain": {"expr": "xDomain"}}
      "scale": {"type": "linear", "domain": xDomain}
    },
    "y": {
      "field": "y",
      "type": "quantitative",
      #"scale": {"type": "linear", "domain": {"expr": "yDomain"}}
      "scale": {"type": "linear", "domain": xDomain}
    },
    "fill": {
      "field": "count",
      "type": "quantitative"
    }
  }
}

In [6]:
spec_no_data = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "title": "x-y-bin",
  "width": width,
  "height": height,

  "params": [
    {"name": "xRectCount", "value": xRectCount},
    {"name": "yRectCount", "value": yRectCount},
    {"name": "xDomain", "value": xDomain},
    {"name": "yDomain", "value": yDomain}
  ],
  "data": {"name": "randomNumbers"},
  "mark": {
    "type": "rect",
    "cornerRadius": 0,
    "width": width/xRectCount,
    "height": height/yRectCount
  },
  "encoding": {
    "x": {
      "field": "x",
      "type": "quantitative",
      "scale": {"type": "linear", "domain": xDomain}
    },
    "y": {
      "field": "y",
      "type": "quantitative",
      "scale": {"type": "linear", "domain": yDomain}
    },
    "fill": {
      "field": "count",
      "type": "quantitative"
    }
  }
}

In [7]:
testSummary = [{"x": 25, "y": 25, "count": 1},
      {"x": 25, "y": 75, "count": 2},
      {"x": 75, "y": 25, "count": 3},
      {"x": 75, "y": 75, "count": 4}]

In [8]:

def initSummary(xDomain, yDomain, xStep, yStep):
    halfXStep = xStep/2
    halfYStep = yStep/2
    summary = []
    for i in range(xDomain[0], xDomain[1], xStep):
        for j in range(yDomain[0], yDomain[1], yStep):
            summary.append({"x": i+halfXStep, "y": j+halfYStep, "count": 0})
    return summary

initSummary(xDomain, yDomain, 50, 50)

[{'x': 25.0, 'y': 25.0, 'count': 0},
 {'x': 25.0, 'y': 75.0, 'count': 0},
 {'x': 75.0, 'y': 25.0, 'count': 0},
 {'x': 75.0, 'y': 75.0, 'count': 0}]

In [9]:
def accSummary(summary, df, xStep, yStep, start):
    halfXStep = xStep/2
    halfYStep = yStep/2
    for i in range(len(df)):
        x = df.loc[start+i, "x"]
        y = df.loc[start+i, "y"]
        find = False
        for j in range(len(summary)):
            summaryX = summary[j]['x']
            summaryY = summary[j]['y']
            if summaryX-halfXStep<=x and x <=summaryX+halfXStep and summaryY-halfYStep<=y and y<=summaryY+halfYStep:
                summary[j]['count'] = summary[j]['count'] + 1
                find=True
                break
        if(not find):
            print("error occured: ", "x:", x, "y:",y)
    return summary

# display(accSummary(initSummary(xDomain, yDomain, 50, 50), randomdf, xStep, yStep, 0))

In [10]:
widget = VegaWidget(spec=spec_no_data)#, opt={"height": "300"})
stopButton = Button(
    description='Stop/Continue',
    disabled=False,
    button_style='',
    tooltip='click to stop calculation and view udpate'
)
restartButton = Button(
    description='Restart',
    disabled=False,
    button_style='',
    tooltip='click to restart calculation and view update'
)
progressBar = FloatProgress(min=0, max=1.0)

In [11]:
"""shared data"""
stop = False
progressiveSummaryTask = None

In [17]:
async def progressiveSummary(df):
    """
    Split data into segments. caclulate summary and update widgets progressively.
    """
    global stop  # a boolean flag to control stop/continue
    summary = initSummary(xDomain, yDomain, xStep, yStep)
    
    itrNum = 10
    batchSize = len(df) / itrNum 
    for itr in range(itrNum):
        
        # stop calculation
        while stop:
            await asyncio.sleep(0.01)
            
        # 1. get a data segement
        min = int(itr*batchSize)
        max = int(min + batchSize)
        dfSegment = df[min: max]
        
        # 2. calculate new summary with previous summary and current data segment
        summary = accSummary(summary, dfSegment, xStep, yStep, min) 
        
        # 3. update widgets
        widget.update("randomNumbers", remove="true", insert=summary)
        progressBar.value = (itr+1)/itrNum
        
        # 4. wait for user input
        await asyncio.sleep(1)

In [18]:
def stopButtonListener(b):
    global stop
    if stop:
        stop = False
        print("************* continue **************")
    else:
        stop = True
        print("************* stop **************")

stopButton.on_click(stopButtonListener)

In [19]:
def restartButtonListener(b):
    global stop
    global progressiveSummaryTask
    stop = False
    progressBar.value = 0
    if progressiveSummaryTask:
        progressiveSummaryTask.cancel()
    loop = asyncio.get_event_loop()
    progressiveSummaryTask = loop.create_task(progressiveSummary(randomdf))
    print("************* restart **************")

restartButton.on_click(restartButtonListener)

In [20]:
VBox([progressBar, widget, HBox([stopButton, restartButton])])

VBox(children=(FloatProgress(value=1.0, max=1.0), VegaWidget(), HBox(children=(Button(description='Stop/Contin…

In [21]:
restartButtonListener(None)
# global syncActiveTask
# global progressiveSummaryTask
# global updateWidgetAsyncTask
# loop = asyncio.get_event_loop();
# syncActiveTask = loop.create_task(syncActiveLock())
# progressiveSummaryTask = loop.create_task(progressiveSummary());
# updateWidgetAsyncTask = loop.create_task(updateWidgetAsync());

************* restart **************
