#  Vaex - 2D demo
The purpose of this notebook is to demonstrate
some of the 2D capabilities of VAEX. Plotting of data with BQPlot.

Jupyter widgets are enabled so the user can interactively play with the Vaex settings.  See GUI near bottom of this notebook

In [1]:
import numpy as np
import ipywidgets as widgets
import bqplot as bq
import vaex as vx



ModuleNotFoundError: No module named 'vaex'

### Create random data to plot

In [None]:

size = 10000
scale = 1.
scaleLocal = 20
np.random.seed(0)
x_data = np.arange(size)
y_data = np.cumsum(np.random.randn(size)  * scale) + np.random.randn(size) * scaleLocal

np.random.seed(1)
scaleLocal2 = 3
y_data2 = np.cumsum(np.random.randn(size)  * scale) + np.random.randn(size) * scaleLocal2


### Create inital figure and lines

In [None]:
x_sc = bq.LinearScale()
# x_sc.max = size * 1.3
y_sc = bq.LinearScale()
y_sc2 = bq.LinearScale()

ax_x = bq.Axis(label='X', scale=x_sc, grid_lines='solid')
ax_y = bq.Axis(label='Y', scale=y_sc, orientation='vertical', grid_lines='solid')
ax_y2 = bq.Axis(label='2nd Y', scale=y_sc2, orientation='vertical', side = 'right', visible = False,grid_lines='none')

line1 = bq.Lines(x=x_data, y=y_data, scales={'x': x_sc, 'y': y_sc} , colors=['blue'],display_legend = False, labels=['y1'],stroke_width = 1.0)

line2 = bq.Lines(x=x_data, y=y_data2, scales={'x': x_sc, 'y': y_sc} , colors=['darkgreen'],display_legend = False, labels=['y2'],stroke_width = 1.0)

margins = dict(top = 50, bottom=40, left=50, right=50)
fig = bq.Figure(marks = [line1,line2], axes=[ax_x, ax_y] , fig_margin = margins , animation_duration=1000)
fig.layout.width = '98%'
fig.layout.height = '400px' 
fig.title = 'Vaex demo'

figBinNum = widgets.IntText(
    value=500,
    description='Nº of bins',
    disabled=False
)
figBinNum.layout.width = '180px'


### Create Vaex object

In [None]:
dataVX = vx.from_arrays( x=x_data, y1=y_data, y2 = y_data2)

In [None]:
binbyChannelx = 'x'
binbyChannely = 'y1'

### Calculate initial binned line to plot

In [None]:
start = 500
end = 8000
samplePoints = (50,)
tExpression = "abs(" + str((start + end)/2) + " - " + binbyChannelx + ") < " + str((end-start)/2)
tExpressionAnd = "(" + binbyChannelx + " > " + str(start) + ") & (" + binbyChannelx + " < " + str(end) + ")"
dataVX.select(tExpression, name='xpos')
dataVX.select(tExpressionAnd, name='xposAnd')

x_Binned = dataVX.mean(binbyChannelx, binby=binbyChannelx, selection = 'xpos', shape=samplePoints, limits=[start,end])
y_Binned = dataVX.mean(binbyChannely, binby=binbyChannelx, selection = 'xpos', shape=samplePoints, limits=[start,end])

lineBinned = bq.Lines(x=x_Binned, y=y_Binned, scales={'x': x_sc, 'y': y_sc},display_legend = False, labels = ['y1 -- mean'])
lineBinned2ndY = bq.Lines(x=x_Binned, y=y_Binned, scales={'x': x_sc, 'y': y_sc2},display_legend = False, labels = ['y1 -- mean'])
fig.marks = [line1, line2, lineBinned]


### Create Jupyter widgets

In [None]:
UserEquation = widgets.Text(
    value='y1',
    placeholder='Type equation: e.g.  y1 * y2,  or  where((y1<-50)&(y2>80) , y1, 0)',
    description='Equation1:',
    disabled=False
)
   
valid = widgets.Valid(
    value=True,
    description='',)
valid.layout.width = '20px'
equationHbox = widgets.HBox([UserEquation,valid])

UserEquation2 = widgets.Text(
    value='y2',
    placeholder='Type equation: e.g.  y1 * y2,  or  where((y1<-50)&(y2>80) , y1, 0)',
    description='Equation2:',
    disabled=False
)
   
valid2 = widgets.Valid(
    value=True,
    description='',)
valid2.layout.width = '20px'

equationHbox2 = widgets.HBox([UserEquation2,valid2])

              
    
# -------------------------------
# User equation, time series channel


Userselect = widgets.Text(
    value='',
    placeholder='Type selection boolean equation',
    description='Selection:',
    disabled=False
)
   
validselect = widgets.Valid(
    value=True,
    description='',)
validselect.layout.width = '20px'
    
selectHbox = widgets.HBox([Userselect,validselect])
     
figBinNum = widgets.IntText(
    value=500,
    description='Nº of bins:',
    disabled=False
)
figBinNum.layout.width = '180px'

startx = widgets.FloatText(
    value=0,
    description='Start x:',
    disabled=False
)
startx.layout.width = '180px'

endx = widgets.FloatText(
    value=8000,
    description='End x:',
    disabled=False
)
endx.layout.width = '180px'

calcTypeWidget = widgets.SelectMultiple(
    options=['Mean', 'Max', 'Min', 'sum', 'Std. dev', '90%ile','10%ile','75%ile','25%ile', 'median', 'variance', 'correlation' , 'covariance cov[x,y]'],
    value=['Mean'],
    #rows=10,
    description='Calc type:',
    disabled=False
)

buttonPlot = widgets.Button(description='Plot',
           layout=widgets.Layout(width='180px', height='25px'))

updatePlots_progressBar = widgets.IntProgress(
    value=0,
    min=0,
    max=10,
    step=1,
    description='',
    bar_style='', # 'success', 'info', 'warning', 'danger' or ''
    orientation='horizontal'
)
updatePlots_progressBar.layout.width = buttonPlot.layout.width
updatePlots_progressBar.layout.height = '15px'


helpLabelWidget1 = widgets.HTML(value="<b>The lines y1 and y2 are fixed.</b>")
helpLabelWidget = widgets.HTML(value="<b>Edit the settings and hit plot....</b>")


def on_buttonPlot_clicked(buttonPlot):       
    # clear_output() # clear old plots (and widgets which are reinserted in line below)
    updatePlot()

buttonPlot.on_click(on_buttonPlot_clicked)

buttonPlotBox = widgets.VBox([buttonPlot, updatePlots_progressBar])  
    
    
    

In [None]:
pythonCode = widgets.Textarea(
    value='',
    placeholder='Python code is exported to this panel....',
    description='',
    disabled=False
)
pythonCode.layout.width = '80%'
pythonCode.layout.height = '200px'

## Define interactive legend class

In [None]:
class legendWidget(object):
    """A legend Widget using a horizontal bar chart
    
    marks: line marks from a bqplot figure.  
    
    These line marks must have legend labels 
    (in line mark, remove other legend by using this: display_legend = False)
    e.g. >>> legend = legendWidget(fig.marks) 
    
    BQplot module imported as bq (import bqplot as bq)

    """
    def __init__(self, marks):
        """Return a new Legend object."""
        y_ord = bq.OrdinalScale()
        x_sc = bq.LinearScale()
        
        legendLabels = []
        colours = []
        markLineNums = [] # record number of lines per mark
        for mark in marks:            
            legendLabels += mark.labels
            colours += mark.colors[:len(mark.labels)]
            markLineNums.append(len(mark.labels))  

        bar = bq.Bars(
            y=[1]*len(legendLabels) , # all bars have a amplitude of 1
            x=legendLabels, 
            scales={'y': x_sc, 'x': y_ord},
            colors=colours ,
            padding = 0.6,
            orientation='horizontal',
            stroke = 'white'  #remove the black border around the bar
            )
        
        ax_y = bq.Axis(scale=y_ord, orientation="vertical")
        ax_x = bq.Axis(scale=x_sc)
        ax_x.visible = False
        margin = dict(top=40, bottom=0, left=110, right=5)
        barFig = bq.Figure(marks=[bar], axes=[ax_y, ax_x], fig_margin=margin)
        
        # Variable height depending on number of bars in legend
        barFig.layout.height = str(45 + 20 * len(legendLabels)) + 'px'
        barFig.layout.width = '170px'

        barFig.min_aspect_ratio = 0.000000000001 # effectively remove aspect ratio constraint
        barFig.max_aspect_ratio = 999999999999999 # effectively remove aspect ratio constraint
        barFig.background_style = {'fill': 'White'}   
                    
        self.fig = barFig
        self.bar = bar
        self.colours = colours
        self.markLineNums = markLineNums
        

 
    
        

## Create interactive legend

In [None]:
legend = legendWidget(fig.marks)        

def matchLegendOpacities2FigMarks(legendFig,bar,lineFig):
        # Some marks in line plot have more than 1 line.  
    currentLineNum=0
    for markNum,markLineNum in enumerate(legendFig.markLineNums):
        lineFig.marks[markNum].opacities = bar.opacities[currentLineNum:currentLineNum + markLineNum ]
        currentLineNum+=markLineNum

def changeOpacity(self, target):
    """Enable legend interactivity. 
    Use in conjunction with class legendWidget(object) 
    Click on legend bar to toggle opacity of all other lines
    
    """
    
    # I'm not sure how to pass in the line chart and legend widgets from on_element_click(). 
    # Need to explicity define them below.
    lineFig = fig  # set lineFig to name of line chart figure
    legendFig = legend  # set legendFig to name of new legend widget
    
    opacity = 0.1   # set opacity of non selected lines here
    sigNum = target['data']['index']
    bar = self
    if bar.opacities == []:
        bar.opacities=[1.0]*len(bar.x)
        
    if bar.opacities[sigNum] == 1.0:
#         bar.opacities[sigNum]=opacity    # Doesn't work ????
        bar.opacities=bar.opacities[:sigNum] + [opacity] + bar.opacities[sigNum+1:]
    else:
        bar.opacities=bar.opacities[:sigNum] + [1] + bar.opacities[sigNum+1:] 
    
    matchLegendOpacities2FigMarks(legendFig,bar,lineFig)

   
legend.fig.marks[0].on_element_click(changeOpacity)
# legend.fig

### Update plot function

In [None]:
binbyChannelx = 'x'

def updatePlot():
    binbyChannelys =  []
    if UserEquation.value !='':
        binbyChannelys.append(UserEquation.value)
    if UserEquation2.value !='':
        binbyChannelys.append(UserEquation2.value)
        
    updatePlots_progressBar.max = len(calcTypeWidget.value) * len(binbyChannelys)
    
    start = startx.value
    end = endx.value
    samplePoints = figBinNum.value
    tExpressionAnd = "(" + binbyChannelx + " > " + str(start) + ") & (" + binbyChannelx + " < " + str(end) + ")"
    dataVX.select(tExpressionAnd)

    x_Binned = dataVX.mean(binbyChannelx, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end])
    outputCodeText = ''

    outputCodeText += 'x_Binned = dataVX.mean(' + binbyChannelx + ' , binby=' + binbyChannelx + ', selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'

    outputCodeText += 'dataVX.select(' + tExpressionAnd + ')\n'

    if Userselect.value!= '':
        outputCodeText += 'dataVX.select(' + Userselect.value + ', mode = \'and\')\n'
        dataVX.select(Userselect.value, mode = 'and')    
#         selectionLabel = 'Selection: ' + Userselect.value
    
    y_Binned=[]
    y_Binned2=[]  # For channels to be put on 2nd y axis
    labels = []
    labels2 = [] # For channels to be put on 2nd y axis
#     outputCodeText += 'y_Binned=[]\n'
    for calcType in calcTypeWidget.value:
        for binbyChannely in binbyChannelys:
            updatePlots_progressBar.value +=1                
            if calcType == 'Mean':
                y_Binned.append(dataVX.mean(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'mean'
                labels.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == 'Max':
                y_Binned.append(dataVX.max(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'max'
                labels.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == 'Min':
                y_Binned.append(dataVX.min(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'min'
                labels.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == 'sum':
                y_Binned2.append(dataVX.sum(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'sum'
                labels2.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == 'Std. dev':
                y_Binned2.append(dataVX.std(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'std'
                labels2.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == 'variance':
                y_Binned2.append(dataVX.var(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'var'
                labels2.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == '90%ile':
                y_Binned.append(dataVX.percentile_approx(binbyChannely, percentage=90.0, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'percentile_approx'
                labels.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", percentage=90.0 , binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == '10%ile':
                y_Binned.append(dataVX.percentile_approx(binbyChannely, percentage=10.0, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'percentile_approx'
                labels.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", percentage=10.0 , binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == '75%ile':
                y_Binned.append(dataVX.percentile_approx(binbyChannely, percentage=75.0, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'percentile_approx'
                labels.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", percentage=90.0 , binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == '25%ile':
                y_Binned.append(dataVX.percentile_approx(binbyChannely, percentage=75.0, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'percentile_approx'
                labels.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", percentage=25.0 , binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == 'median':
                y_Binned.append(dataVX.median_approx(binbyChannely, percentage=50.0, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'median_approx'
                labels.append(binbyChannely + '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannely + '", percentage=50.0 , binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            else:
                break
        if len(binbyChannelys) ==2:
            if calcType == 'correlation':
                y_Binned2.append(dataVX.correlation(binbyChannelys[0], binbyChannelys[1],binby=[binbyChannelx], selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'correlation'
                labels2.append(binbyChannely[0] + ',' +  binbyChannely[1]+ '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannelys[0] + '","' + binbyChannelys[1] + '", binby=["' + binbyChannelx + '"], selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
            elif calcType == 'covariance cov[x,y]':
                y_Binned2.append(dataVX.covar(binbyChannelys[0], binbyChannelys[1],binby=[binbyChannelx], selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'covar'
                labels2.append(binbyChannely[0] + ',' + binbyChannely[1]+ '--' + calcType )
                outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannelys[0] + '","' + binbyChannelys[1] + '", binby=["' + binbyChannelx + '"], selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
#             elif calcType == 'variance':
#                 y_Binned2.append(dataVX.var(binbyChannelys[0], binbyChannelys[1],binby=[binbyChannelx], selection = True, shape=(samplePoints,), limits=[start,end]) )
#                 vaexCommand = 'var'
#                 outputCodeText += 'dataVX.' + vaexCommand + '("' + binbyChannelys[0] + ',' + binbyChannelys[1] + '", binby=[' + binbyChannelx + '], selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])\n'
        else:
            if calcType in ['correlation' , 'covariance','covariance cov[x,y]']:
                outputCodeText += 'Please create 2 equations to compute ' + calcType + '\n'

    lineBinned.x = x_Binned
    lineBinned.y = y_Binned
    lineBinned.labels = labels
    if y_Binned2 !=[]:
        lineBinned2ndY.x = x_Binned
        lineBinned2ndY.y = y_Binned2
        lineBinned2ndY.labels = labels2
        ax_y2.visible =True
        fig.marks =  [line1, line2, lineBinned, lineBinned2ndY]
        fig.axes =[ax_x, ax_y, ax_y2]
        
    else:
        ax_y2.visible =False
        fig.marks =  [line1, line2, lineBinned]
        fig.axes =[ax_x, ax_y]
        
#     y_sc2 = bq.LinearScale()
#     ax_y2 = bq.Axis(label='2nd Y', scale=y_sc2, orientation='vertical', side = 'right', visible = False)
    
    pythonCode.value = outputCodeText
    legendNew  = legendWidget(fig.marks)
    legendFig = legendNew.fig
    legend.fig.marks = legendFig.marks
    legend.markLineNums = legendNew.markLineNums
    legend.fig.axes = legendFig.axes
    legend.fig.layout.height = str(40 + 20 * sum(legendNew.markLineNums)) + 'px'
    legend.fig.marks[0].on_element_click(changeOpacity)
    updatePlots_progressBar.value=0  



In [None]:

fig_legend =  widgets.HBox([fig,legend.fig])
lhs = widgets.VBox([helpLabelWidget1,helpLabelWidget,buttonPlotBox])
rhs = widgets.VBox([figBinNum,widgets.HBox([startx,endx]), equationHbox,equationHbox2,selectHbox,calcTypeWidget])
bottom = widgets.HBox([lhs,rhs])

vbox = widgets.VBox([fig_legend,bottom,pythonCode])
vbox

In [None]:
# Only include in calculation when y2 is greater than 50
Userselect.value = 'y2 < 50'
updatePlot()

In [None]:
UserEquation.value = 'y1*y2 / 10'
updatePlot()

In [None]:
# Using the numexpr where function, cap the minimum to -1000
UserEquation.value = 'where(y1*y2 / 10 < -1000 , -1000 ,y1*y2 / 10) '
updatePlot()

In [None]:
Userselect.value = ''
UserEquation.value = 'y1'
updatePlot()