In [1]:
import pandas as pd
import numpy as np
import bqplot
import traitlets
import ipywidgets

# Dataset

[UFO dataset](https://github.com/planetsig/ufo-reports)

In [2]:
#!wget https://github.com/planetsig/ufo-reports/raw/master/csv-data/ufo-scrubbed-geocoded-time-standardized.csv

In [3]:
ufo = pd.read_csv('ufo-scrubbed-geocoded-time-standardized.csv', 
                  names=['date_sighted', 'city', 'state', 'country',
                         'shape', 'duration', 
                         'duration_txt', 'note', 'date_reported', 
                         'latitude', 'longitude'],
                  parse_dates=['date_sighted', 'date_reported'])
ufo = ufo.reset_index().rename(columns={'index':'ufo_id'})
print(ufo.shape)

ufo = ufo.loc[~ufo.index.isin([27822, 35692, 58591, 43782])] # Ignore dirty data for now

df = ufo.sample(n=1000, random_state=5).reset_index(drop=True)

df['date_sighted'] = df['date_sighted'].str.replace('24:00', '00:00') 
df['date_sighted'] = pd.to_datetime(df['date_sighted'])

df['duration'] = df['duration'].astype(float)
df['latitude'] = df['latitude'].astype(float)
print(df.info())

df.head(2)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


(80332, 12)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   ufo_id         1000 non-null   int64         
 1   date_sighted   1000 non-null   datetime64[ns]
 2   city           1000 non-null   object        
 3   state          922 non-null    object        
 4   country        866 non-null    object        
 5   shape          974 non-null    object        
 6   duration       1000 non-null   float64       
 7   duration_txt   1000 non-null   object        
 8   note           1000 non-null   object        
 9   date_reported  1000 non-null   datetime64[ns]
 10  latitude       1000 non-null   float64       
 11  longitude      1000 non-null   float64       
dtypes: datetime64[ns](2), float64(3), int64(1), object(6)
memory usage: 93.9+ KB
None


Unnamed: 0,ufo_id,date_sighted,city,state,country,shape,duration,duration_txt,note,date_reported,latitude,longitude
0,60887,2010-07-30 23:05:00,uk/scotland,,,light,180.0,3 mins,two bright lights travelling at speed across a...,2010-08-24,56.490671,-4.202646
1,5167,2011-10-28 16:39:00,scottsdale,az,us,unknown,300.0,5min or less,Three to Five comet-like objects seen descendi...,2011-12-12,33.509167,-111.898333


# Last week: BrushIntervalSelector
 - Select an area by X or by Y


In [4]:
# Brush, along y

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

clr = np.log10(df['duration'])
c_sc = bqplot.ColorScale(scheme='Oranges', 
                         min= np.nanmin(clr) , 
                         max= np.nanmax(clr))

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year Sighted')
y_ax = bqplot.Axis(scale=y_sc, label='Year Reported', 
                   orientation='vertical')

c_ax = bqplot.ColorAxis(scale=c_sc, side='right')

# Mark
scatter = bqplot.Scatter(x=df['date_sighted'].dt.year, 
                         y=df['date_reported'].dt.year,
                         scales={'x':x_sc, 'y':y_sc, 
                                 'color':c_sc}, 
                         color=clr)


# Interaction: Brush
my_brush = bqplot.interacts.BrushIntervalSelector(scale=y_sc, 
                                                  marks=[scatter], 
                                                  orientation='vertical')

selected_range = ipywidgets.HTML()
num_of_points_selected = ipywidgets.HTML()
def select_func(change):
    my_brush_selected = my_brush.selected
    scatter_selected = scatter.selected
    
    selected_range.value = 'selected range: {}'.format(my_brush_selected)
    num_of_points_selected.value = 'number of selected points: {}'.format(len(scatter_selected))
    #print('my_brush_selected:', my_brush_selected)
    #print('scatter_selected:', scatter_selected)


my_brush.observe(select_func, 'selected')

# fig
scatter_fig = bqplot.Figure(marks=[scatter], axes=[x_ax, y_ax, c_ax], 
                            interaction=my_brush)
scatter_fig_annotated = ipywidgets.VBox([selected_range, 
                                         num_of_points_selected, 
                                         scatter_fig])
scatter_fig_annotated


VBox(children=(HTML(value=''), HTML(value=''), Figure(axes=[Axis(label='Year Sighted', scale=LinearScale()), A…

# BrushSelector: Select by X and Y

In [5]:
# Brush, 2D

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

clr = np.log10(df['duration'])
c_sc = bqplot.ColorScale(scheme='Oranges', 
                         min= np.nanmin(clr) , 
                         max= np.nanmax(clr))

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year Sighted')
y_ax = bqplot.Axis(scale=y_sc, label='Year Reported', 
                   orientation='vertical')

c_ax = bqplot.ColorAxis(scale=c_sc, side='right')

# Mark
scatter = bqplot.Scatter(x=df['date_sighted'].dt.year, 
                         y=df['date_reported'].dt.year,
                         scales={'x':x_sc, 'y':y_sc, 
                                 'color':c_sc}, 
                         color=clr)


# Interaction: Brush
my_brush = bqplot.interacts.BrushSelector(x_scale=x_sc, 
                                          y_scale=y_sc, 
                                          marks=[scatter])

#selected_range = ipywidgets.HTML()
#num_of_points_selected = ipywidgets.HTML()

selected_range_x = ipywidgets.HTML()
selected_range_y = ipywidgets.HTML()
def select_func(change):
    my_brush_selected = my_brush.selected
    scatter_selected = scatter.selected
    
    my_brush_selected_x = my_brush.selected_x
    my_brush_selected_y = my_brush.selected_y
    
    selected_range_x.value = 'Selected X range: {}'.format(my_brush_selected_x)
    selected_range_y.value = 'Selected Y range: {}'.format(my_brush_selected_y)
    
    #selected_range.value = 'selected range: {}'.format(my_brush_selected)
    #num_of_points_selected.value = 'number of selected points: {}'.format(len(scatter_selected))
    #print('my_brush_selected:', my_brush_selected)
    #print('scatter_selected:', scatter_selected)


my_brush.observe(select_func, 'selected')

# fig
scatter_fig = bqplot.Figure(marks=[scatter], axes=[x_ax, y_ax, c_ax], 
                            interaction=my_brush)
scatter_fig_annotated = ipywidgets.VBox([selected_range_x, 
                                         selected_range_y, 
                                         scatter_fig])
scatter_fig_annotated

VBox(children=(HTML(value=''), HTML(value=''), Figure(axes=[Axis(label='Year Sighted', scale=LinearScale()), A…

# Linking & Brushing
- Left Scatter: year scatter plot
- Right Scatter: longitude and latitude

In [6]:
# Left Scatter: Year Scatter

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

clr = np.log10(df['duration'])
c_sc = bqplot.ColorScale(scheme='Oranges', 
                         min= np.nanmin(clr) , 
                         max= np.nanmax(clr))

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year Sighted')
y_ax = bqplot.Axis(scale=y_sc, label='Year Reported', 
                   orientation='vertical')

c_ax = bqplot.ColorAxis(scale=c_sc, side='right')

# Mark
scatter = bqplot.Scatter(x=df['date_sighted'].dt.year, 
                         y=df['date_reported'].dt.year,
                         scales={'x':x_sc, 'y':y_sc, 
                                 'color':c_sc}, 
                         color=clr)


# Right plot: Scatter of longitude & latitude
# Scale
x_sc_r = bqplot.LinearScale()
y_sc_r = bqplot.LinearScale()

# Axis
x_ax_r = bqplot.Axis(scale=x_sc_r, label='Longitude')
y_ax_r = bqplot.Axis(scale=y_sc_r, label='Latitude', 
                     orientation='vertical')

# Mark
loc_scatter = bqplot.Scatter(x=df['longitude'], 
                             y=df['latitude'], 
                             scales={'x':x_sc_r, 
                                     'y':y_sc_r})


# Interaction: Brush
my_brush = bqplot.interacts.BrushSelector(x_scale=x_sc, 
                                          y_scale=y_sc, 
                                          marks=[scatter])

#selected_range = ipywidgets.HTML()
#num_of_points_selected = ipywidgets.HTML()

selected_range_x = ipywidgets.HTML()
selected_range_y = ipywidgets.HTML()
def select_func(change):
    my_brush_selected = my_brush.selected
    scatter_selected = scatter.selected
    
    my_brush_selected_x = my_brush.selected_x
    my_brush_selected_y = my_brush.selected_y
    
    if my_brush_selected_x is not None and my_brush_selected_y is not None:
    
        selected_range_x.value = 'Selected X range: {}'.format(my_brush_selected_x)
        selected_range_y.value = 'Selected Y range: {}'.format(my_brush_selected_y)

        #selected_range.value = 'selected range: {}'.format(my_brush_selected)
        #num_of_points_selected.value = 'number of selected points: {}'.format(len(scatter_selected))
        #print('my_brush_selected:', my_brush_selected)
        #print('scatter_selected:', scatter_selected)

        x_from, x_to = my_brush_selected_x
        y_from, y_to = my_brush_selected_y

        filter_1 = df['date_sighted'].dt.year >= x_from
        filter_2 = df['date_sighted'].dt.year <= x_to

        filter_3 = df['date_reported'].dt.year >= y_from
        filter_4 = df['date_reported'].dt.year <= y_to

        df_selected = df.loc[(filter_1)&(filter_2)&(filter_3)&(filter_4)]

        loc_scatter.x = df_selected['longitude']
        loc_scatter.y = df_selected['latitude']
        
    else:
        loc_scatter.x = df['longitude']
        loc_scatter.y = df['latitude']


my_brush.observe(select_func, 'selected')

# fig
scatter_fig = bqplot.Figure(marks=[scatter], axes=[x_ax, y_ax, c_ax], 
                            interaction=my_brush)
scatter_fig_annotated = ipywidgets.VBox([selected_range_x, 
                                         selected_range_y, 
                                         scatter_fig])
loc_scatter_fig = bqplot.Figure(marks=[loc_scatter], 
                                axes=[x_ax_r, y_ax_r])

scatter_fig_annotated.layout.width = '500px'
loc_scatter_fig.layout.width = '500px'

# Dashboard
my_dashboard = ipywidgets.HBox([scatter_fig_annotated, 
                                loc_scatter_fig])
my_dashboard

HBox(children=(VBox(children=(HTML(value=''), HTML(value=''), Figure(axes=[Axis(label='Year Sighted', scale=Li…

# DateScale

In [7]:
# UFOs sigthed in 2000 and their durations in seconds

df_selected = df.loc[df['date_reported'].dt.year==2000]

# Scale
x_sc = bqplot.DateScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Date reported')
y_ax = bqplot.Axis(scale=y_sc, label='Duration in seconds', orientation='vertical')

# Marks
scatter = bqplot.Scatter(x=df_selected['date_reported'], 
                         y=df_selected['duration'], 
                         scales={'x':x_sc, 'y':y_sc})

# Fig
scatter_fig = bqplot.Figure(marks=[scatter], axes=[x_ax, y_ax])
scatter_fig

Figure(axes=[Axis(label='Date reported', scale=DateScale()), Axis(label='Duration in seconds', orientation='ve…

In [8]:
df_selected.head(2)

Unnamed: 0,ufo_id,date_sighted,city,state,country,shape,duration,duration_txt,note,date_reported,latitude,longitude
40,43516,1998-05-21 13:00:00,ridgway,pa,us,disk,120.0,2 minutes,It was a helicopter following case.,2000-12-02,41.420278,-78.728889
86,74774,2000-09-15 20:37:00,ruidsoso,nm,,light,900.0,15 minutes,2 points of light&#44 diplaying bizzare behavi...,2000-09-17,33.331749,-105.673041


# Heatmap and click select

In [9]:
# Prep data: UFOs reported in different years and countries
heatmap_data = df.groupby([df['date_reported'].dt.year, 'country'])[['ufo_id']].count()
heatmap_data = heatmap_data.reset_index()
heatmap_data = heatmap_data.pivot(index='date_reported', 
                                  columns='country', 
                                  values='ufo_id')
heatmap_data

country,au,ca,de,gb,us
date_reported,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1998,,,,,9.0
1999,,,,,50.0
2000,1.0,3.0,,,25.0
2001,,2.0,,2.0,31.0
2002,,5.0,,5.0,39.0
2003,1.0,2.0,,3.0,47.0
2004,3.0,3.0,,,47.0
2005,,,,4.0,55.0
2006,,2.0,1.0,,30.0
2007,1.0,,,2.0,48.0


In [10]:
# Heatmap without interactivity

clr = np.log10(heatmap_data)

# Scale
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()
c_sc = bqplot.ColorScale(scheme='BuPu', min=np.nanmin(clr), max=np.nanmax(clr))



# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Country')
y_ax = bqplot.Axis(scale=y_sc, label='Year reported', orientation='vertical')
c_ax = bqplot.ColorAxis(scale=c_sc, side='right')

# Mark
heatmap = bqplot.GridHeatMap(row=heatmap_data.index, 
                             column=heatmap_data.columns,
                             scales={'row':y_sc, 'column':x_sc, 'color':c_sc}, 
                             color=clr)

# Fig
heatmap_fig = bqplot.Figure(marks=[heatmap], axes=[x_ax, y_ax, c_ax])
heatmap_fig


Figure(axes=[Axis(label='Country', scale=OrdinalScale()), Axis(label='Year reported', orientation='vertical', …