# Activity #1: Heat maps
* we'll start with building up a heat map based on some small, randomly generate data
* we'll use this methodology to make our plot interactive & then move on to using "real" data

In [117]:
# lets import our usual stuff
import pandas as pd
import bqplot
import numpy as np
import traitlets
import ipywidgets
%matplotlib inline

In [118]:
# lets start thinking about heatmaps with some random data
data = np.random.random((10, 10))
data
# so we just have a 10 x 10 array here

array([[0.32090106, 0.24711992, 0.16546322, 0.55668118, 0.57514743,
        0.15202762, 0.07997279, 0.3686391 , 0.47563345, 0.94886266],
       [0.85909533, 0.98616421, 0.58997239, 0.5236106 , 0.53208179,
        0.92590643, 0.9813583 , 0.51742894, 0.86154571, 0.21652105],
       [0.0105955 , 0.95680983, 0.71678049, 0.66009592, 0.20723169,
        0.68468413, 0.25342522, 0.55750008, 0.61639414, 0.38973426],
       [0.18652982, 0.43075309, 0.49292574, 0.70701588, 0.35350343,
        0.14926668, 0.38156295, 0.13625618, 0.26287455, 0.45571758],
       [0.00724826, 0.40075965, 0.28868554, 0.50325659, 0.03409594,
        0.78742207, 0.87038529, 0.067327  , 0.97901661, 0.67389506],
       [0.02372724, 0.3544299 , 0.21220456, 0.8114051 , 0.69165234,
        0.58123297, 0.13999681, 0.5478342 , 0.38028971, 0.40431559],
       [0.43706986, 0.62513944, 0.55030735, 0.32942926, 0.23824   ,
        0.96306382, 0.14951026, 0.45925526, 0.93041505, 0.5013106 ],
       [0.1619171 , 0.12990876, 0.1228127

In [119]:
# lets start by generating a quick heat map

# (1)
# create our first scale of our plot: just a color scale
col_sc = bqplot.ColorScale() 
# now we'll use bqplot's gridheatmap function
#  with our randomly generated data & our scales to 
#  make a heatmap like so:
heat_map = bqplot.GridHeatMap(color = data, scales = {'color': col_sc})
# put our marks into our figure and lets go!
fig = bqplot.Figure(marks = [heat_map])

# (2) ok, this is fine and all, but lets add some reference for our 
#  color scheme with a colorbar & also lets choose a different 
#  color scheme
col_sc = bqplot.ColorScale(scheme = "Reds")
# lets plot some axes on our plot as well, in this case
#  our axis will be a color bar, vertically on the right
#  of our heatmap
c_ax = bqplot.ColorAxis(scale = col_sc, orientation = 'vertical', side = 'right')
# put it all together and lets take a look!
heat_map = bqplot.GridHeatMap(color = data, scales = {'color': col_sc})
# generate fig!
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax])

# (3) finally, lets add some axes labels on the x & y axis,
#  we need to add their scales first
# this scale will just count up the boxes in the vertical 
#   & horizontal direction
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()
# add our axes objects
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, orientation = 'vertical')
heat_map = bqplot.GridHeatMap(color = data, scales = {'color': col_sc, 
                                                     'row': y_sc,
                                                     'column':x_sc})
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])


fig

RmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWxlKHNjaGVtZT11J1JlZHMnKSwgc2lkZT0ncmlnaHQnKSwgQXhpcyhvcmllbnRhdGnigKY=


In [120]:
# so, while this indeed a lovely heatmap, it isn't interactive in any way!
#  boo to that!
# Lets start adding in some interactivity

# keep data from last time

# now add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme = "Reds")
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, orientation = 'vertical', side = 'right')
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, orientation = 'vertical')

# lets now re-do our heat map & add in some interactivity:
heat_map = bqplot.GridHeatMap(color = data,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, # to make our selection blue
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})

# stir and combine into 1 figure
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

fig


RmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWxlKHNjaGVtZT11J1JlZHMnKSwgc2lkZT0ncmlnaHQnKSwgQXhpcyhvcmllbnRhdGnigKY=


In [121]:
#  Ok fine, but our selection isn't linked to anything!
#  lets check out what heat_map selected is
heat_map.selected
#  note if I select a different box & re-run this cell,
#  I get out different values


[]

In [122]:
# so now, lets write a little function that links the data value
#  to the selected & lets print this in a little ipywidgets label
mySelectedLabel = ipywidgets.Label()

# (1) 
# lets write our linking function
# there are a few ways to link this,
#  here is a simple way first
def get_data_value(change):
    i,j = heat_map.selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label

# (2) this is maybe in-elegant as we are 
#  explicitly calling our origininal heat map!
#  so, lets instead remind ourselves what "change" is here
def get_data_value(change):
    print(change)
    i,j = heat_map.selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label
# now we see when we click we get back a whole
#  dictionary of information - if we recall, 
#  "owner" here is our heat_map which "owns" 
# this change.
#  If we want to be able to apply our function to 
#  this or any other heatmap figure we generate,
#  we can re-write the above function as follows:

# (3)
#def get_data_value(change,mylab):
def get_data_value(change):
    #print(change['owner'].selected)
    i,j = change['owner'].selected[0]
    v = data[i,j] # grab data value
    mySelectedLabel.value = str(v) # set our label
    #mylab.value = str(v) # set our label
# so, this now is applied to any map that we choose to input
    
# regenerate our heatmap to use in our fig canvas
heat_map = bqplot.GridHeatMap(color = data,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})
    
# make sure we check out     
heat_map.observe(get_data_value, 'selected')
#heat_map.observe(self, mySelectedLabel)
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

ipywidgets.VBox([mySelectedLabel, fig])
#fig

VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dScnKSwgRmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWxlKHNjaGVtZT11J1JlZHPigKY=


# Activity #2: Preliminary dashboarding
* we'll use a random dataset to explore how to make dashboard-like plots that change when things are updated

In [123]:
# now lets move on to making a preliminary dashboard for multi-dimensional datasets
#  lets first start with some randomly generated data again

In [124]:
data = np.random.random((10, 10,20))
data

data.shape

data[0,0,:]
# we can see that no instead of 1 value, each "i,j" component
#  has an array of values

# lets start building up linked plots
#  first, lets re-do our plot above with our label printing
#  out the sum along this 5-d array

# now add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme = "Reds")
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, orientation = 'vertical', side = 'right')
x_ax = bqplot.Axis(scale = x_sc)
y_ax = bqplot.Axis(scale = y_sc, orientation = 'vertical')

# create label again
mySelectedLabel = ipywidgets.Label()

def get_data_value(change):
    i,j = change['owner'].selected[0]
    # if we run with this, our label is the 20 elements
    #v = data[i,j] # grab data value
    # but,lets sum instead
    v = data[i,j].sum() # grab data value
    mySelectedLabel.value = str(v) # set our label
# so, this now is applied to any map that we choose to input
    
    
    
# regenerate our heatmap to use in our fig canvas
# now, we want to plot the sum along our 3rd axis as well, 
#  so, lets do this with "np.sum" along our 3rd axis
heat_map = bqplot.GridHeatMap(color = np.sum(data,axis=2),
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 0.8})
    
# make sure we check out     
heat_map.observe(get_data_value, 'selected')
#heat_map.observe(self, mySelectedLabel)
fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

#(1)
#ipywidgets.VBox([mySelectedLabel, fig])

# (2)
# now, lets generate another figure that just plots the histogram of values in our 3rd axis
x_sch = bqplot.LinearScale()
y_sch = bqplot.LinearScale()
x_axh = bqplot.Axis(scale = x_sch, label = 'Sum of 3rd axis')
y_axh = bqplot.Axis(scale = y_sch, orientation = 'vertical', label='Frequency')

hist = bqplot.Hist(sample = data[0,0,:],
                    opacity = 0.1, normalized = False, # normalized=False means we get counts in each bin
                    scales = {'sample': x_sch, 'count': y_sch},
                  bins = 5)
figh = bqplot.Figure(marks = [hist], axes = [x_axh, y_axh])
# ok, so side by side plots, but nothing updates!

#(3) so, we have to update what our heatmap has access to as 
#  far as being able to update both the label *AND* the 
# histogram's data
def get_data_value2(change):
    i,j = change['owner'].selected[0]
    # if we run with this, our label is the 20 elements
    #v = data[i,j] # grab data value
    # but,lets sum instead
    v = data[i,j].sum() # grab data value
    mySelectedLabel.value = str(v) # set our label
    hist.sample = data[i,j]
    #print(data[i,j])
heat_map.observe(get_data_value2, 'selected')
# note here now the heat_map is in a sense "driving" our
# changes.
# *** DO EXAMPLE OF BACK AND FORTH ***


ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,figh])] )


VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dScnKSwgSEJveChjaGlsZHJlbj0oRmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWzigKY=


# Activity #3: Dashboarding with "real" data
* now we'll move onto the UFO dataset and start messing around with creating a dashboard for this dataset

In [125]:
# lets start by loading the UFO dataset
ufos = pd.read_csv("/Users/jillnaiman1/Downloads/ufo-scrubbed-geocoded-time-standardized-00.csv",
                  names = ["date", "city", "state", "country",
                          "shape", "duration_seconds", "duration",
                          "comment", "report_date", "latitude", "longitude"],
                  parse_dates = ["date", "report_date"])

## Aside: downsampling
* some folks reported having a tough time with interactivity of scatter plots with the UFO dataset
* here we'll quickly go over some methods of downsampling that can be applied to decrease the size of our dataset

In [126]:
# you'll see the above takes a good long time to load on my computer
# the length of the dataset is quite large:
len(ufos)

80332

In [231]:
# 80,000!  So, to speed up our interactivity, we can 
#  randomly sample this dataset for plotting purposes
# lets down sample to 1000 samples:
nsamples = 1000
#nsamples = 5000
downSampleMask = np.random.randint(0,len(ufos)-1,nsamples)
downSampleMask
# so, downsample mask is now a list of random indicies for 
#  the UFO dataset

array([79092, 73015, 51388, 78185, 77507, 59924, 35666, 30750, 55595,
       76285, 50799, 16544, 17031, 21838, 62949, 26538, 23457, 23185,
        9553, 68692, 45343, 14393, 26721,  5599, 15697, 58183, 10164,
       30890, 75774, 58222,  8693, 26267, 11445, 25875, 57285, 59536,
       43846, 29716, 59215,    42, 35667, 54376, 55059, 22948, 52673,
       67360, 59743, 60266, 40674, 30601, 29782, 69182, 48010, 17536,
        9142, 72235, 65525, 36329, 63097, 62620, 28462,  8860, 28455,
        3162, 53534, 59572, 52811, 10808,  4128, 52572, 23789, 23414,
       14780,  2001, 27549, 77375, 44361, 55373,  6804, 80051,  8674,
       45078, 70076, 12847, 14844, 15638, 10268, 64740,   629, 38975,
       24335, 44211, 34167, 50043, 48274, 33531,   270, 69745, 74141,
       25885, 37249, 68892, 55241, 15555, 35555, 30951, 66304,  9201,
       36634, 52285, 27254, 53355, 12973, 28652, 60707,  9189,   779,
       76618, 27415, 66762, 44202, 74726, 57822, 46556, 20626, 66189,
        9699, 20109,

In [232]:
# the above doesn't disclude repeats, but we can take 
#  care of this with a different call:
downSampleMask = np.random.choice(range(len(ufos)-1), nsamples, replace=False)

In [233]:
# lets update:
ufosDS = ufos.loc[downSampleMask]
len(ufosDS)
# so much shorter

1000

In [234]:
# we can also see that this is saved as a dataframe:
ufosDS

Unnamed: 0,date,city,state,country,shape,duration_seconds,duration,comment,report_date,latitude,longitude
66635,2004-08-15 03:00:00,savannah,ga,us,triangle,60.0,1 minute,Moon surface ufo sighting,2005-06-20,32.083333,-81.100000
22423,2012-01-24 22:30:00,mount vernon,wa,us,flash,1.0,split-second,Ultra-bright flash seen in night sky over sout...,2012-02-03,48.421389,-122.332778
29859,2000-02-08 09:10:00,harbor city / san pedro,ca,,circle,180.0,3 minutes,My girfriend &amp; myself looked up northeast ...,2000-02-23,33.740047,-118.287004
54468,2003-06-09 18:15:00,santa rosa area (above),ca,,sphere,4.0,3-4 seconds,Metallic sphere north of San Francisco below o...,2003-07-16,38.455421,-122.723023
17390,2003-12-01 05:00:00,dickinson,nd,us,triangle,10.0,10 seconds,Triangular UFO with lights on the exterior,2003-12-09,46.879167,-102.789167
4584,2011-10-25 18:20:00,matteson,il,us,triangle,1200.0,15-20 min,Triangle Formation of Bright lights in SKY,2011-12-12,41.503889,-87.713056
55989,2004-07-13 22:00:00,marysville,wa,us,light,360.0,6 min,what was this strange light in the sky,2004-07-25,48.051944,-122.175833
45925,2006-05-06 22:20:00,newburgh,ny,us,light,30.0,30 seconds,On 5/6/06 at 10:20 PM we spotted a big round b...,2006-05-15,41.503333,-74.010833
44614,2007-05-27 22:00:00,corona,ca,us,circle,180.0,3 min,bright orange light in sky kinda circular movi...,2007-06-12,33.875278,-117.565556
16668,1999-01-20 19:30:00,butte,mt,us,diamond,60.0,1 min. app.,We were driving &amp; just cleared the top of ...,1999-02-16,46.003889,-112.533889


In [235]:
# lets make a super quick scatter plot to remind ourselves what this looks like:
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, orientation = 'vertical', label='Latitude')

#(1)
#scatters = bqplot.Scatter(x = ufosDS['longitude'],
#                          y = ufosDS['latitude'],
#                          scales = {'x': x_sc, 'y': y_sc})

# (2) recall we can also color by things like duration
c_sc = bqplot.ColorScale()
#c_ax = bqplot.ColorAxis(scale = c_sc, label='Duration in sec', orientation = 'vertical', side = 'right')
#scatters = bqplot.Scatter(x = ufosDS['longitude'],
#                          y = ufosDS['latitude'],
#                          color=ufosDS['duration_seconds'],
#                          scales = {'x': x_sc, 'y': y_sc, 'color':c_sc})

# (3) again, we recall that there is a large range in durations, so 
#  it makes sense that we have a muted color pattern - we want 
# to use a log colorscale
# with bqplot we can do this with:
c_ax = bqplot.ColorAxis(scale = c_sc, label='log(sec)', orientation = 'vertical', side = 'right')
scatters = bqplot.Scatter(x = ufosDS['longitude'],
                          y = ufosDS['latitude'],
                          color=np.log10(ufosDS['duration_seconds']),
                          scales = {'x': x_sc, 'y': y_sc, 'color':c_sc})

fig = bqplot.Figure(marks = [scatters], axes = [x_ax, y_ax, c_ax]) 
fig

RmlndXJlKGF4ZXM9W0F4aXMobGFiZWw9dSdMb25naXR1ZGUnLCBzY2FsZT1MaW5lYXJTY2FsZSgpKSwgQXhpcyhsYWJlbD11J0xhdGl0dWRlJywgb3JpZW50YXRpb249J3ZlcnRpY2FsJywgc2PigKY=


In [264]:
# now we are going to use our heatmap idea to plot this data again
#  note this will shmear out a lot of the nice map stuff we see above
#  don't worry!  We'll talk about making maps in the next class or so

# what should we color by?  lets do by duration

# to get this to work with our heatmap, we're going 
#  to have to do some rebinning
#  right now, our data is all in 1 long list
# we need to rebin things in a 2d histogram where 
# the x axis is long & y is lat

# ***START WITH 10 EACH**
nlong = 20 
nlat = 20

#(1)
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=ufos['duration_seconds'], 
                                              bins=[nlong,nlat])
# this returns the TOTAL duration of ufo events in each bin
hist2d

# (2)
# to do the average duration in each bin we can do:
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=ufos['duration_seconds'],
                                              normed=True, 
                                              bins = [nlong,nlat])
hist2d

# (3) ok, lets go back to total duration
hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=np.log10(ufos['duration_seconds']),
                                              bins = [nlong,nlat])

# note that the sizes of the edges & the hist are different:
hist2d.shape, long_edges.shape, lat_edges.shape

# this is becuase the edges are bin edges, not centers
# to get bin centers we can do:
# lets do some fancy in-line forloops
long_centers = [(long_edges[i]+long_edges[i+1])*0.5 for i in range(len(long_edges)-1)]
lat_centers = [(lat_edges[i]+lat_edges[i+1])*0.5 for i in range(len(lat_edges)-1)]
long_centers, lat_centers

# (4) note: we might want to control where our bins are, we can do this by 
#  specifying bin edges ourselves
long_bins = np.linspace(-150, 150, nlong+1)
lat_bins = np.linspace(-40, 70, nlat+1)
long_bins, long_bins.shape
lat_bins, lat_bins.shape


hist2d, long_edges, lat_edges = np.histogram2d(ufos['longitude'], 
                                               ufos['latitude'], 
                                               weights=ufos['duration_seconds'],
                                              bins = [long_bins,lat_bins])

# this is becuase the edges are bin edges, not centers
long_centers = [(long_edges[i]+long_edges[i+1])*0.5 for i in range(len(long_edges)-1)]
lat_centers = [(lat_edges[i]+lat_edges[i+1])*0.5 for i in range(len(lat_edges)-1)]

# (5)
# again, we want to take the log scale of things
#  we're going to do this by taking the log of hist2d
#  but there are some zero values in this hsitogram
# if we just take the log we get -inf
np.log10(hist2d)
# this can mess up our color scheme mapping

# (6) so we are going to "trick" our color scheme like so
hist2d[hist2d <= 0] = np.nan # set zeros to NaNs
# then take log
hist2d = np.log10(hist2d)
hist2d

# (7) finally, our histogram is actually
#  transposed - this is just how numpy outputs it,
# lets put the world right side up with:
hist2d = hist2d.T



In [265]:
# now that we have all that fancy binning out of the way, 
#  lets proceed as normal:

# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')#,
                       #label='log(sec)')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

#***GO BACK AND PLAY WITH BIN SIZES***

# (2) lets add a label again to pritn duration
# create label again
mySelectedLabel = ipywidgets.Label()
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
        
        
        
        
# make sure we check out     
heat_map.observe(get_data_value, 'selected')


fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

#(1)
#fig

#(2)
ipywidgets.VBox([mySelectedLabel,fig])

VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dScnKSwgRmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWxlKG1heD04LjA5NzEwNzTigKY=


In [267]:
# ok, now lets build up our dashboard 
# again to also show how the duration of UFO sitings in each 
# selected region changes with year

# we'll do this with the same methodology we applied before
#  **copy paste above***

# (1)

# (I) For the heatmap
# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Scatter plot
# scales & ax in usual way
import datetime as dt
x_scl = bqplot.DateScale(min=dt.datetime(1950,1,1),max=dt.datetime(2020,1,1)) # note: for dates on x-axis
y_scl = bqplot.LogScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
duration_scatt = bqplot.Scatter(x = ufos['date'][region_mask],
                               y = ufos['duration_seconds'][region_mask], 
                              scales={'x':x_scl, 'y':y_scl})

fig_dur = bqplot.Figure(marks = [duration_scatt], axes = [ax_xcl, ax_ycl])

# create label again
mySelectedLabel = ipywidgets.Label()
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
        
# make sure we connect to heatmap     
#heat_map.observe(get_data_value, 'selected')

# (2) now again, we want our scatter plot to react to changes 
#  to what we've selected so:
def get_data_value2(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    duration_scatt.x = ufos['date'][region_mask]
    duration_scatt.y = ufos['duration_seconds'][region_mask]
    #print(i,j)
    #print(longs,lats)
    #print(ufos['date'][region_mask])
# make sure we connect to heatmap     
heat_map.observe(get_data_value2, 'selected')


ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,fig_dur])])
# note that when I select a deep purple place, my scatter plot is 
#  very laggy, this makes me think we should do this with a 
#  histogram/bar type plot


VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dScnKSwgSEJveChjaGlsZHJlbj0oRmlndXJlKGF4ZXM9W0NvbG9yQXhpcyhvcmllbnRhdGlvbj0ndmVydGljYWwnLCBzY2FsZT1Db2xvclNjYWzigKY=


In [276]:
# (I) For the heatmap
# add scales - colors, x & y
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Bar plot
# scales & ax in usual way
x_scl = bqplot.LinearScale() # note we are back to linears
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
ufos['year'] = ufos['date'].dt.year
dur, dur_edges = np.histogram(ufos['year'][region_mask],
                              weights=ufos['duration_seconds'][region_mask],
                              bins=10)
# like before with our histograms
dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
# make histogram by hand, weighting by duration
duration_hist = bqplot.Bars(x=dur_centers, y=dur, 
                          scales={'x':x_scl, 'y':y_scl})
fig_dur = bqplot.Figure(marks = [duration_hist], axes = [ax_xcl, ax_ycl])


#  to what we've selected so:
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    dur, dur_edges = np.histogram(ufos['year'][region_mask],
                                  weights=ufos['duration_seconds'][region_mask],
                                  bins=10)
    dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
    duration_hist.x = dur_centers
    duration_hist.y = dur
# make sure we connect to heatmap     
heat_map.observe(get_data_value, 'selected')


ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,fig_dur])])

VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dSdUb3RhbCBkdXJhdGlvbiBpbiBsb2coc2VjKSA9IDMuNzU5MjkwMDMzMDI0MzA0JyksIEhCb3goY2hpbGRyZW49KEZpZ3VyZShheGVzPVtDb2zigKY=


## Might not get to this...

In [299]:
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(hist2d), 
                           max=np.nanmax(hist2d))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# create axis - for colors, x & y
c_ax = bqplot.ColorAxis(scale = col_sc, 
                        orientation = 'vertical', 
                        side = 'right')
x_ax = bqplot.Axis(scale = x_sc, label='Longitude')
y_ax = bqplot.Axis(scale = y_sc, 
                   orientation = 'vertical', 
                   label = 'Latitude')

heat_map = bqplot.GridHeatMap(color = hist2d,
                              row = lat_centers, 
                              column = long_centers,
                              scales = {'color': col_sc,
                                        'row': y_sc,
                                        'column': x_sc},
                              interactions = {'click': 'select'},
                              anchor_style = {'fill':'blue'}, 
                              selected_style = {'opacity': 1.0},
                              unselected_style = {'opacity': 1.0})

fig = bqplot.Figure(marks = [heat_map], axes = [c_ax, y_ax, x_ax])

# (II) Bar plot for durations thorugh the years
# scales & ax in usual way
x_scl = bqplot.LinearScale() # note we are back to linears
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Date', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total duration in Sec', scale=y_scl, 
                    orientation='vertical', side='left')
# for the lineplot of duration in a region as a function of year
# lets start with a default region & year
i,j = 0,0
longs = [long_edges[i], long_edges[i+1]]
lats = [lat_edges[j],lat_edges[j+1]]
region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )

# we can see this selects for the upper right point of our heatmap
lats, longs, ufos['latitude'][region_mask]

# lets plot the durations as a function of year there
ufos['year'] = ufos['date'].dt.year
dur, dur_edges = np.histogram(ufos['year'][region_mask],
                              weights=ufos['duration_seconds'][region_mask],
                              bins=10)
# like before with our histograms
dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
# make histogram by hand, weighting by duration
duration_hist = bqplot.Bars(x=dur_centers, y=dur, 
                          scales={'x':x_scl, 'y':y_scl})
fig_dur = bqplot.Figure(marks = [duration_hist], axes = [ax_xcl, ax_ycl])

# (III) histogram for shape
x_ord = bqplot.OrdinalScale()
y_ord = bqplot.LinearScale()
ax_xord = bqplot.Axis(label='Shape', scale=x_ord)
ax_yord = bqplot.Axis(label='Freq', scale=y_ord,
                     orientation='vertical',
                     side='left')

# histogram using pandas
hist_ord = bqplot.Bars(x=ufos['shape'][region_mask].unique(),
                       y=ufos['shape'][region_mask].value_counts(),
                     scales={'x':x_ord, 'y':y_ord})
fig_shape = bqplot.Figure(marks=[hist_ord], axes=[ax_xord,ax_yord])

#  to what we've selected so:
def get_data_value(change):
    i,j = change['owner'].selected[0]
    v = hist2d[i,j] # grab data value
    mySelectedLabel.value = 'Total duration in log(sec) = ' + str(v) # set our label
    # note!! i & j are swapped here to machup with hist & selection 
    longs = [long_edges[j], long_edges[j+1]]
    lats = [lat_edges[i],lat_edges[i+1]]
    region_mask = ( (ufos['latitude'] >= lats[0]) & (ufos['latitude']<=lats[1]) &\
                (ufos['longitude'] >= longs[0]) & (ufos['longitude']<=longs[1]) )
    dur, dur_edges = np.histogram(ufos['year'][region_mask],
                                  weights=ufos['duration_seconds'][region_mask],
                                  bins=10)
    dur_centers = [(dur_edges[i]+dur_edges[i+1])*0.5 for i in range(len(dur_edges)-1)]
    duration_hist.x = dur_centers
    duration_hist.y = dur
    # also update shapes
    #print(ufos['shape'][region_mask])
    hist_ord.x = ufos['shape'][region_mask].unique()
    hist_ord.y = ufos['shape'][region_mask].value_counts()
# make sure we connect to heatmap     
heat_map.observe(get_data_value, 'selected')

# lets make all the sizes look nice
fig_dur.layout.max_width = '400px'
fig_dur.layout.max_height= '300px'
fig_shape.layout.max_width = '400px'
fig_shape.layout.max_height= '300px'
fig.layout.min_width = '800px' # add to both

# dhange layout
#ipywidgets.VBox([mySelectedLabel, ipywidgets.HBox([fig,ipywidgets.VBox([fig_shape,fig_dur])])])
ipywidgets.VBox([mySelectedLabel,
                ipywidgets.HBox([fig_shape,fig_dur]),
                fig])

VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dSdUb3RhbCBkdXJhdGlvbiBpbiBsb2coc2VjKSA9IDcuOTQ1MDE3OTUyMDk2NzknKSwgSEJveChjaGlsZHJlbj0oRmlndXJlKGF4ZXM9W0F4aXPigKY=
