# Activity #1: Heat maps
* we'll start with building up a heat map based on some small, randomly generate data
* we'll use this methodology to make our plot interactive & then move on to using "real" data

In [1]:
# lets import our usual stuff
import pandas as pd
import bqplot
import numpy as np
import traitlets
import ipywidgets
%matplotlib inline

In [2]:
# lets start thinking about heatmaps with some random data
data = np.random.random((10, 10))
data
# so we just have a 10 x 10 array here

array([[4.11109800e-02, 8.48602097e-01, 7.14441826e-01, 4.28300948e-01,
        5.38800524e-02, 5.81968914e-01, 5.79406449e-01, 2.97311462e-01,
        7.97252024e-01, 1.27177858e-01],
       [8.22505230e-01, 8.33193530e-01, 2.65880129e-01, 4.30717540e-01,
        3.39352499e-01, 7.88057561e-01, 7.37673631e-01, 7.04656745e-01,
        4.48373554e-01, 2.92987842e-01],
       [6.29376592e-01, 4.45410800e-01, 8.91704560e-01, 3.77322575e-01,
        9.57605570e-01, 4.49876661e-01, 5.38777668e-01, 7.95003341e-01,
        5.25957943e-01, 8.12966161e-01],
       [9.48006854e-01, 8.42739296e-01, 8.79975935e-01, 9.07582548e-01,
        7.25729291e-01, 7.47312313e-01, 4.18879571e-01, 8.39142426e-01,
        7.28010408e-02, 6.69304296e-01],
       [5.33876693e-01, 4.46790522e-01, 7.21457222e-01, 4.78535268e-01,
        7.03146588e-02, 8.77138127e-01, 9.29309607e-01, 8.31410384e-01,
        9.46714069e-01, 1.53874105e-01],
       [5.07668733e-01, 5.50433766e-01, 9.03165038e-01, 6.78059013e-01,
   

In [3]:
# lets start by generating a quick heat map

# (1)
# create our first scale of our plot: just a color scale
col_sc = bqplot.ColorScale() 
# now we'll use bqplot's gridheatmap function
#  with our randomly generated data & our scales to 
#  make a heatmap like so:
heat_map = bqplot.GridHeatMap(color = data, 
                              scales = {'color': col_sc})
# put our marks into our figure and lets go!
fig = bqplot.Figure(marks = [heat_map])

fig

Figure(fig_margin={'top': 60, 'bottom': 60, 'left': 60, 'right': 60}, layout=Layout(min_width='125px'), marks=…

In [4]:
# (2) ok, this is fine and all, but lets add some reference for our 
#  color scheme with a colorbar & also lets choose a different 
#  color scheme
col_sc = bqplot.ColorScale(scheme = "Reds")
# lets plot some axes on our plot as well, in this case
#  our axis will be a color bar, vertically on the right
#  of our heatmap
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
# put it all together and lets take a look!
heat_map = bqplot.GridHeatMap(color = data, 
                              scales = {'color': col_sc})
# generate fig!
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax])

fig

Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds'), side='right')], fig_margin={'t…

In [5]:
# (3) finally, lets add some axes labels on the x & y axis,
#  we need to add their scales first
# this scale will just count up the boxes in the vertical 
#   & horizontal direction
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()
# add our axes objects
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical')

heat_map = bqplot.GridHeatMap(color = data, 
                              scales = {'color': col_sc, 
                                        'row': y_sc,
                                        'column':x_sc})
fig = bqplot.Figure(marks = [heat_map], 
                    axes = [c_ax, y_ax, x_ax])

fig

Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds'), side='right'), Axis(orientatio…

In [6]:
# so, while this indeed a lovely heatmap, it isn't interactive in any way!
#  boo to that!
# Lets start adding in some interactivity

# keep data from last time

# now add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme = "Reds")
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical')

# lets now re-do our heat map & add in some interactivity:
# NEW STUFF HERE
heat_map = bqplot.GridHeatMap(color = data,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, # to make our selection blue
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})

# stir and combine into 1 figure
fig = bqplot.Figure(marks = [heat_map], 
                    axes = [c_ax, y_ax, x_ax])

fig


Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds'), side='right'), Axis(orientatio…

In [7]:
#  Ok fine, but our selection isn't linked to anything!
#  lets check out what heat_map selected is
heat_map.selected
#  note if I select a different box & re-run this cell,
#  I get out different values

[]

In [8]:
# so now, lets write a little function that links the data value
#  to the "selected" & lets print this in a little ipywidgets label
mySelectedLabel = ipywidgets.Label()

# (1) 
# lets write our linking function
# there are a few ways to link this,
#  here is a simple way first
def get_data_value(change):
    i,j = heat_map.selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label

# regenerate our heatmap to use in our fig canvas
heat_map = bqplot.GridHeatMap(color = data,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})
    
# make sure we check out     
heat_map.observe(get_data_value, 'selected')

fig = bqplot.Figure(marks = [heat_map], 
                    axes = [c_ax, y_ax, x_ax])

ipywidgets.VBox([mySelectedLabel, fig])

VBox(children=(Label(value=''), Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds')…

In [9]:
mySelectedLabel = ipywidgets.Label()

# (2) this is maybe in-elegant as we are 
#  explicitly calling our origininal heat map!
#  so, lets instead remind ourselves what "change" is here
def get_data_value(change):
    print(change)
    i,j = heat_map.selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label
# now we see when we click we get back a whole
#  dictionary of information - if we recall, 
#  "owner" here is our heat_map which "owns" 
# this change.


# regenerate our heatmap to use in our fig canvas
heat_map = bqplot.GridHeatMap(color = data,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})
    
# make sure we check out     
heat_map.observe(get_data_value, 'selected')

fig = bqplot.Figure(marks = [heat_map], 
                    axes = [c_ax, y_ax, x_ax])

ipywidgets.VBox([mySelectedLabel, fig])

VBox(children=(Label(value=''), Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds')…

In [10]:
mySelectedLabel = ipywidgets.Label()

#  If we want to be able to apply our function to 
#  this or any other heatmap figure we generate,
#  we can re-write the above function as follows:

# (3)
#def get_data_value(change,mylab):
def get_data_value(change):
    #print(change['owner'].selected)
    i,j = change['owner'].selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label
# so, this now is applied to any map that we choose to input
    
# regenerate our heatmap to use in our fig canvas
heat_map = bqplot.GridHeatMap(color = data,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})
    
# make sure we check out     
heat_map.observe(get_data_value, 'selected')

fig = bqplot.Figure(marks = [heat_map], 
                    axes = [c_ax, y_ax, x_ax])

ipywidgets.VBox([mySelectedLabel, fig])

VBox(children=(Label(value=''), Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds')…

# Activity #2: Preliminary dashboarding
* we'll use a random dataset to explore how to make dashboard-like plots that change when things are updated

In [11]:
# now lets move on to making a preliminary 
#dashboard for multi-dimensional datasets
#  lets first start with some randomly generated data again

In [12]:
data = np.random.random((10, 10,20)) # array of 10 x 10 x 20 random values
data

array([[[0.20531986, 0.77495287, 0.94598141, ..., 0.37353053,
         0.7928494 , 0.65766041],
        [0.07306385, 0.38022517, 0.53249456, ..., 0.9420061 ,
         0.42608832, 0.10511975],
        [0.61707924, 0.99220247, 0.52939556, ..., 0.70622859,
         0.18442682, 0.36304572],
        ...,
        [0.35367132, 0.60128003, 0.42862019, ..., 0.32986062,
         0.41739413, 0.50583701],
        [0.50659565, 0.29078955, 0.86775637, ..., 0.2640357 ,
         0.82074105, 0.22398891],
        [0.43209945, 0.35157439, 0.10521239, ..., 0.27034301,
         0.97491378, 0.86110825]],

       [[0.44719176, 0.40342738, 0.39957105, ..., 0.24744614,
         0.36096421, 0.48636139],
        [0.64542307, 0.76039307, 0.53440705, ..., 0.79637066,
         0.41637898, 0.76840601],
        [0.15084438, 0.76066607, 0.54183849, ..., 0.75368621,
         0.00956701, 0.97107023],
        ...,
        [0.54776897, 0.4515079 , 0.51548748, ..., 0.40273712,
         0.76671444, 0.7402135 ],
        [0.3

In [13]:
data.shape

(10, 10, 20)

In [14]:
data[0,0,:]
# we can see that no instead of 1 value, each "i,j" component
#  has an array of 20 random values

array([0.20531986, 0.77495287, 0.94598141, 0.27664348, 0.75288846,
       0.6210499 , 0.54595996, 0.37022217, 0.27414527, 0.82912757,
       0.81450464, 0.29704438, 0.08564098, 0.44380016, 0.53554768,
       0.76729808, 0.29995115, 0.37353053, 0.7928494 , 0.65766041])

In [15]:
# lets start building up linked plots
#  first, lets re-do our plot above with our label printing
#  out the sum along this 5-d array

# now add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme = "Reds")
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical')

# create label again
mySelectedLabel = ipywidgets.Label()

def get_data_value(change):
    i,j = change['owner'].selected[0]
    
    # if we run with this, our label is the 20 elements
    #v = data[i,j] # grab data value
    
    # but,lets sum instead
    v = data[i,j].sum() # grab data value
    mySelectedLabel.value = str(v) # set our label
# so, this now is applied to any map that we choose to input
    
    
    
# regenerate our heatmap to use in our fig canvas
# now, we want to plot the sum along our 3rd axis as well, 
#  so, lets do this with "np.sum" along our 3rd axis
heat_map = bqplot.GridHeatMap(color = np.sum(data,axis=2),
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})
    
# make sure we check out     
heat_map.observe(get_data_value, 'selected')
#heat_map.observe(self, mySelectedLabel)
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

ipywidgets.VBox([mySelectedLabel, fig])

VBox(children=(Label(value=''), Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Reds')…

In [16]:
# now, lets generate another figure that just plots the histogram of values in our 3rd axis
x_sch = bqplot.LinearScale()
y_sch = bqplot.LinearScale()
x_axh = bqplot.Axis(scale = x_sch, label = 'Sum of 3rd axis')
y_axh = bqplot.Axis(scale = y_sch, 
                    orientation = 'vertical', 
                    label='Frequency')

hist = bqplot.Hist(sample = data[0,0,:],
                    opacity = 0.1, 
                   normalized = False, # normalized=False means we get counts in each bin
                    scales = {'sample': x_sch, 'count': y_sch},
                  bins = 5)
figh = bqplot.Figure(marks = [hist], axes = [x_axh, y_axh])

ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,figh])] )


# ok, so side by side plots, but nothing updates!

VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

In [17]:
#(3) so, we have to update what our heatmap has access to as 
#  far as being able to update both the label *AND* the 
# histogram's data
def get_data_value2(change):
    i,j = change['owner'].selected[0]
    v = data[i,j].sum() # grab data value
    mySelectedLabel.value = str(v) # set our label
    hist.sample = data[i,j] # UPDATE HISTOGRAM!
heat_map.observe(get_data_value2, 'selected')
# note here now the heat_map is in a sense "driving" our
# changes.
# *** DO EXAMPLE OF BACK AND FORTH ***

ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,figh])] )

VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

# Activity #3: Dashboarding with "real" data
* now we'll move onto the UFO dataset and start messing around with creating a dashboard for this dataset

In [18]:
# lets start by loading the UFO dataset
ufos = pd.read_csv("/Users/achrist3/Downloads/ufo-scrubbed-geocoded-time-standardized-00.csv",
                  names = ["date", "city", "state", "country",
                          "shape", "duration_seconds", "duration",
                          "comment", "report_date", 
                           "latitude", "longitude"],
                  parse_dates = ["date", "report_date"])

## Aside: downsampling
* some folks reported having a tough time with interactivity of scatter plots with the UFO dataset
* here we'll quickly go over some methods of downsampling that can be applied to decrease the size of our dataset

In [19]:
# you'll see the above takes a good long time to load on my computer
# the length of the dataset is quite large:
len(ufos)

80332

In [20]:
# 80,000!  So, to speed up our interactivity, we can 
#  randomly sample this dataset for plotting purposes
# lets down sample to 1000 samples:
nsamples = 1000

downSampleMask = np.random.randint(0,len(ufos)-1,nsamples)
downSampleMask
# so, downsample mask is now a list of random indicies for 
#  the UFO dataset

array([ 8272, 65597, 23439, 25407, 12298, 55221, 78637,  4966, 35740,
       15150, 61626, 43292, 47253, 76826,  8014, 35596, 54668, 62656,
       43408,  2095,  7449, 48757, 59728, 72186,  9984, 28366, 25954,
       24982, 58649, 72521, 66244, 51792, 30162, 44229, 64701, 66157,
       19950, 78842, 60111, 74082, 41168, 20869, 29568, 35688, 26184,
       60866, 47946,  1222, 64689, 10222, 19881,  7295, 26606, 13074,
       74867, 61769, 37592, 46985, 48047,  6242, 22957, 77680, 13574,
       68365, 48765, 58494,  6450, 21990, 29374, 70141, 48982, 15195,
       35884,  3400, 18209, 33334, 71565, 42455, 70931, 22806, 62000,
       14491, 41479,  5957,  7393, 30888, 41466, 28192, 22961,  6014,
       70932, 51738, 76181, 55832, 47761, 47599, 33970, 67825, 78040,
       16543, 22839, 79397, 67105, 63731, 29795, 46939, 23075,  9334,
       42180, 57022,  9233, 39049, 60234, 63717, 14042, 69858, 13654,
         468,  3193,  9357, 29761, 79410, 31944, 62618, 62353, 55523,
       51801, 11400,

In [21]:
# the above doesn't disclude repeats, but we can take 
#  care of this with a different call:
downSampleMask = np.random.choice(range(len(ufos)-1), nsamples, replace=False)

In [22]:
# lets update:
ufosDS = ufos.loc[downSampleMask]
len(ufosDS)
# so much shorter

1000

In [23]:
# we can also see that this is saved as a dataframe:
ufosDS

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration,comment,report_date,latitude,longitude
10034,2012-11-16 18:14:00,greenville,pa,us,unknown,300.0,<5 minutes,Red/orange lights seen for about 4 minutes in ...,2012-11-19,41.404444,-80.391389
62332,2011-07-04 22:00:00,bradenton,fl,us,oval,300.0,5mins,These were not fireworks nor planes of which ...,2011-07-17,27.498611,-82.575000
20572,2012-12-25 01:30:00,west palm beach,fl,us,light,600.0,5-10 minutes,Red lights over west palm beach 12/25/12,2013-02-04,26.705278,-80.036667
37003,2006-04-15 16:00:00,columbus,oh,us,formation,300.0,5 minutes,Formation of 6 to 7 objects in Columbus&#44 Ohio,2007-04-27,39.961111,-82.998889
48026,1996-06-12 06:00:00,vancouver (canada),bc,ca,disk,300.0,2-5 mins,i was leaving work and was sitting in the fron...,2008-03-31,49.250000,-123.133333
72376,2004-08-08 11:25:00,redmond,wa,us,circle,60.0,1 minute,11:25am - East of Redmond&#44 WA. - white circ...,2004-08-11,47.674167,-122.120278
14360,2002-01-14 20:30:00,coupeville,wa,us,unknown,1200.0,15 - 20 min,strange object appearing to hover turns from b...,2002-01-29,48.220000,-122.685000
42824,1988-05-17 19:45:00,chicago,il,us,chevron,900.0,15 minutes,V shaped tubular object .,2004-04-09,41.850000,-87.650000
69177,1954-08-24 05:00:00,parent (canada),qc,ca,other,240.0,3-4 minutes,Capsule shaped copper coloured object hung mot...,2010-11-21,47.916667,-74.616667
34882,2012-03-05 22:00:00,mccaysville,ga,us,light,900.0,15 minutes,Super-bright red&#44 orange sparkling light ho...,2012-03-13,34.986111,-84.371389


In [24]:
# lets make a scatter plot to remind ourselves what this looks like:
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label='Latitude')

#(1)
scatters = bqplot.Scatter(x = ufosDS['longitude'],
                          y = ufosDS['latitude'],
                          scales = {'x': x_sc, 'y': y_sc})

fig = bqplot.Figure(marks = [scatters], axes = [x_ax, y_ax, c_ax]) 
fig

Figure(axes=[Axis(label='Longitude', scale=LinearScale()), Axis(label='Latitude', orientation='vertical', scal…

In [25]:
# (2) recall we can also color by things like duration
c_sc = bqplot.ColorScale()
c_ax = bqplot.ColorAxis(scale = c_sc, label='Duration in sec', orientation = 'vertical', side = 'right')
scatters = bqplot.Scatter(x = ufosDS['longitude'],
                          y = ufosDS['latitude'],
                          color=ufosDS['duration_seconds'],
                          scales = {'x': x_sc, 'y': y_sc, 'color':c_sc})

fig = bqplot.Figure(marks = [scatters], axes = [x_ax, y_ax, c_ax]) 
fig

Figure(axes=[Axis(label='Longitude', scale=LinearScale()), Axis(label='Latitude', orientation='vertical', scal…

In [27]:
# (3) again, we recall that there is a large range in durations, so 
#  it makes sense that we have a muted color pattern - we want 
# to use a log colorscale
# with bqplot we can do this with:
c_sc = bqplot.ColorScale()

c_ax = bqplot.ColorAxis(scale = c_sc, label='log(sec)', orientation = 'vertical', side = 'right')
scatters = bqplot.Scatter(x = ufosDS['longitude'],
                          y = ufosDS['latitude'],
                          color=np.log10(ufosDS['duration_seconds']),
                          scales = {'x': x_sc, 'y': y_sc, 'color':c_sc})

fig = bqplot.Figure(marks = [scatters], axes = [x_ax, y_ax, c_ax]) 
fig

Figure(axes=[Axis(label='Longitude', scale=LinearScale(), side='bottom'), Axis(label='Latitude', orientation='…

In [28]:
# now we are going to use our heatmap idea to plot this data again
#  note this will shmear out a lot of the nice map stuff we see above
#  don't worry!  We'll talk about making maps in the next class or so

# what should we color by?  lets do by duration

# to get this to work with our heatmap, we're going 
#  to have to do some rebinning
#  right now, our data is all in 1 long list
# we need to rebin things in a 2d histogram where 
# the x axis is longitude & y is latitude

nlong = 20 
nlat = 20

#(1)
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=ufos['duration_seconds'], 
                                               #normed=True,  ## this would do the average duration
                                              bins=[nlong,nlat])
# this returns the TOTAL duration of ufo events in each bin
hist2d

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        1.80000000e+03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 4.70640000e+04, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 1.20300000e+03, 7.37100000e+03, 3.70000000e+03],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        1.50000000e+01, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 6.75425400e+06, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 9.63978000e+05, 5.10500000e+03],
       [2.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 8.78160000e+04, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.7449

In [29]:
# note that the sizes of the edges & the hist are different:
hist2d.shape, long_edges.shape, lat_edges.shape

((20, 20), (21,), (21,))

In [31]:
# this is becuase the edges are bin edges, not centers
# to get bin centers we can do:
# lets do some fancy in-line forloops

# this will return a list of averaged n and n+1 edge positions
long_centers = [(long_edges[i]+long_edges[i+1])*0.5 for i in range(len(long_edges)-1)]
lat_centers = [(lat_edges[i]+lat_edges[i+1])*0.5 for i in range(len(lat_edges)-1)]
long_centers, lat_centers

([-167.78055670999998,
  -150.02555893,
  -132.27056115,
  -114.51556336999998,
  -96.76056558999998,
  -79.00556780999997,
  -61.25057002999998,
  -43.49557224999998,
  -25.740574469999984,
  -7.985576689999988,
  9.769421090000009,
  27.52441887000002,
  45.27941665000003,
  63.03441443000003,
  80.78941221000002,
  98.54440999000002,
  116.29940777000002,
  134.05440555,
  151.80940333,
  169.56440111],
 [-78.97368320000001,
  -71.1955456,
  -63.417408,
  -55.6392704,
  -47.86113280000001,
  -40.082995200000006,
  -32.304857600000005,
  -24.526720000000008,
  -16.74858240000001,
  -8.97044480000001,
  -1.192307200000009,
  6.585830399999985,
  14.363967999999986,
  22.142105599999987,
  29.920243199999987,
  37.69838079999999,
  45.47651839999999,
  53.25465599999998,
  61.03279359999998,
  68.8109312])

In [36]:
# (4) note: we might want to control where our bins are, we can do this by 
#  specifying bin edges ourselves
long_bins = np.linspace(-150, 150, nlong+1)
lat_bins = np.linspace(-40, 70, nlat+1)
long_bins

array([-150., -135., -120., -105.,  -90.,  -75.,  -60.,  -45.,  -30.,
        -15.,    0.,   15.,   30.,   45.,   60.,   75.,   90.,  105.,
        120.,  135.,  150.])

In [37]:
long_bins.shape

(21,)

In [38]:
lat_bins

array([-40. , -34.5, -29. , -23.5, -18. , -12.5,  -7. ,  -1.5,   4. ,
         9.5,  15. ,  20.5,  26. ,  31.5,  37. ,  42.5,  48. ,  53.5,
        59. ,  64.5,  70. ])

In [39]:
lat_bins.shape

(21,)

In [40]:
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=ufos['duration_seconds'],
                                              bins = [long_bins,lat_bins])

# this is becuase the edges are bin edges, not centers
long_centers = [(long_edges[i]+long_edges[i+1])*0.5 for i in range(len(long_edges)-1)]
lat_centers = [(lat_edges[i]+lat_edges[i+1])*0.5 for i in range(len(lat_edges)-1)]

In [41]:
# (5)
# again, we want to take the log scale of things
#  we're going to do this by taking the log of hist2d
#  but there are some zero values in this hsitogram
# if we just take the log we get -inf
np.log10(hist2d)
# this can mess up our color scheme mapping

  


array([[      -inf,       -inf,       -inf,       -inf, 1.17609126,
              -inf,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf, 2.8573325 , 5.4029162 , 5.82882675],
       [      -inf,       -inf,       -inf,       -inf,       -inf,
        4.94357365,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf, 5.6459806 , 7.02773172,
        6.72644508, 6.51420351, 5.75806821, 4.31264272, 4.3544926 ],
       [      -inf,       -inf, 0.        ,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf,       -inf,
              -inf, 5.82237014, 4.98358795, 7.67779189, 6.83534037,
        7.73800979, 6.04979947, 5.57698368, 3.61140464,       -inf],
       [      -inf,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf, 1.47712125,       -inf, 3.54157924,
        4.80700706, 5.46428234, 7.07945235, 7

In [42]:
# (6) so we are going to "trick" our color scheme like so
hist2d[hist2d <= 0] = np.nan # set zeros to NaNs
# then take log
hist2d = np.log10(hist2d)
hist2d

array([[       nan,        nan,        nan,        nan, 1.17609126,
               nan,        nan,        nan,        nan,        nan,
               nan,        nan,        nan,        nan,        nan,
               nan,        nan, 2.8573325 , 5.4029162 , 5.82882675],
       [       nan,        nan,        nan,        nan,        nan,
        4.94357365,        nan,        nan,        nan,        nan,
               nan,        nan,        nan, 5.6459806 , 7.02773172,
        6.72644508, 6.51420351, 5.75806821, 4.31264272, 4.3544926 ],
       [       nan,        nan, 0.        ,        nan,        nan,
               nan,        nan,        nan,        nan,        nan,
               nan, 5.82237014, 4.98358795, 7.67779189, 6.83534037,
        7.73800979, 6.04979947, 5.57698368, 3.61140464,        nan],
       [       nan,        nan,        nan,        nan,        nan,
               nan,        nan, 1.47712125,        nan, 3.54157924,
        4.80700706, 5.46428234, 7.07945235, 7

In [43]:
# (7) finally, our histogram is actually
#  transposed - this is just how numpy outputs it,
# lets put the world right side up with:
hist2d = hist2d.T

In [48]:
# now that we have all that fancy binning out of the way, 
#  lets proceed as normal:

# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

#***GO BACK AND PLAY WITH BIN SIZES***
        
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

#(1)
fig

Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(max=8.09710747658943, min=0.0, scheme='RdPu'),…

In [52]:
# lets add a label again to print duration
# create label again
mySelectedLabel = ipywidgets.Label()
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label

# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right',
                        label='log(sec)')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0}) 


# make sure we check out     
heat_map.observe(get_data_value, 'selected')


fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

#(2)
ipywidgets.VBox([mySelectedLabel,fig])

VBox(children=(Label(value=''), Figure(axes=[ColorAxis(label='log(sec)', orientation='vertical', scale=ColorSc…

In [54]:
# ok, now lets build up our dashboard 
# again to also show how the duration of UFO sitings in each 
# selected region changes with year

# we'll do this with the same methodology we applied before
#  **copy paste above***

# (1)

# (I) For the heatmap
# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Scatter plot
# scales & ax in usual way
import datetime as dt
x_scl = bqplot.DateScale(min=dt.datetime(1950,1,1),max=dt.datetime(2020,1,1)) # note: for dates on x-axis
y_scl = bqplot.LogScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
duration_scatt = bqplot.Scatter(x = ufos['date'][region_mask],
                               y = ufos['duration_seconds'][region_mask], 
                              scales={'x':x_scl, 'y':y_scl})

fig_dur = bqplot.Figure(marks = [duration_scatt], axes = [ax_xcl, ax_ycl])

# create label again
mySelectedLabel = ipywidgets.Label()

# now again, we want our scatter plot to react to changes 
#  to what we've selected so:
def get_data_value2(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    duration_scatt.x = ufos['date'][region_mask]
    duration_scatt.y = ufos['duration_seconds'][region_mask]
    
# make sure we connect to heatmap     
heat_map.observe(get_data_value2, 'selected')


ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,fig_dur])])
# note that when I select a deep purple place, my scatter plot is 
#  very laggy, this makes me think we should do this with a 
#  histogram/bar type plot


VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

In [55]:
# (I) For the heatmap
# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Bar plot
# scales & ax in usual way
x_scl = bqplot.LinearScale() # note we are back to linears
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
ufos['year'] = ufos['date'].dt.year
dur, dur_edges = np.histogram(ufos['year'][region_mask],
                              weights=ufos['duration_seconds'][region_mask],
                              bins=10)
# like before with our histograms
dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
# make histogram by hand, weighting by duration
duration_hist = bqplot.Bars(x=dur_centers, y=dur, 
                          scales={'x':x_scl, 'y':y_scl})
fig_dur = bqplot.Figure(marks = [duration_hist], axes = [ax_xcl, ax_ycl])


#  to what we've selected so:
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    dur, dur_edges = np.histogram(ufos['year'][region_mask],
                                  weights=ufos['duration_seconds'][region_mask],
                                  bins=10)
    dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
    duration_hist.x = dur_centers
    duration_hist.y = dur
# make sure we connect to heatmap     
heat_map.observe(get_data_value, 'selected')


ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,fig_dur])])

VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale…

## If you're interested in a more complex dashboard...

In [57]:
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Bar plot for durations thorugh the years
# scales & ax in usual way
x_scl = bqplot.LinearScale() # note we are back to linears
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
ufos['year'] = ufos['date'].dt.year
dur, dur_edges = np.histogram(ufos['year'][region_mask],
                              weights=ufos['duration_seconds'][region_mask],
                              bins=10)
# like before with our histograms
dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
# make histogram by hand, weighting by duration
duration_hist = bqplot.Bars(x=dur_centers, y=dur, 
                          scales={'x':x_scl, 'y':y_scl})
fig_dur = bqplot.Figure(marks = [duration_hist], axes = [ax_xcl, ax_ycl])

# (III) histogram for shape
x_ord = bqplot.OrdinalScale()
y_ord = bqplot.LinearScale()
ax_xord = bqplot.Axis(label='Shape', scale=x_ord)
ax_yord = bqplot.Axis(label='Freq', scale=y_ord,
                     orientation='vertical',
                     side='left')

# histogram using pandas
hist_ord = bqplot.Bars(x=ufos['shape'][region_mask].unique(),
                       y=ufos['shape'][region_mask].value_counts(),
                     scales={'x':x_ord, 'y':y_ord})
fig_shape = bqplot.Figure(marks=[hist_ord], axes=[ax_xord,ax_yord])

#  to what we've selected so:
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    dur, dur_edges = np.histogram(ufos['year'][region_mask],
                                  weights=ufos['duration_seconds'][region_mask],
                                  bins=10)
    dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
    duration_hist.x = dur_centers
    duration_hist.y = dur
    # also update shapes
    #print(ufos['shape'][region_mask])
    hist_ord.x = ufos['shape'][region_mask].unique()
    hist_ord.y = ufos['shape'][region_mask].value_counts()
# make sure we connect to heatmap     
heat_map.observe(get_data_value, 'selected')

# lets make all the sizes look nice
fig_dur.layout.max_width = '400px'
fig_dur.layout.max_height= '300px'
fig_shape.layout.max_width = '400px'
fig_shape.layout.max_height= '300px'
fig.layout.min_width = '800px' # add to both

# dhange layout
#ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,ipywidgets.VBox([fig_shape,fig_dur])])])
ipywidgets.VBox([mySelectedLabel,
                ipywidgets.HBox([fig_shape,fig_dur]),
                fig])

VBox(children=(Label(value='Total duration in log(sec) = 5.464282336262708'), HBox(children=(Figure(axes=[Axis…