In [1]:
# In terminal:
# pip install bqplot==0.12.2
# jupyter labextension list
# jupyter labextension install @jupyter-widgets/jupyterlab-manager 
# jupyter labextension install bqplot

In [2]:
import pandas as pd
import numpy as np
import bqplot
import traitlets
import ipywidgets

# Dataset

[UFO dataset](https://github.com/planetsig/ufo-reports)

In [3]:
# !wget https://github.com/planetsig/ufo-reports/raw/master/csv-data/ufo-scrubbed-geocoded-time-standardized.csv

In [4]:
ufo = pd.read_csv('ufo-scrubbed-geocoded-time-standardized.csv', 
                  names=['date_sighted', 'city', 'state', 'country',
                         'shape', 'duration', 
                         'duration_txt', 'note', 'date_reported', 
                         'latitude', 'longitude'],
                  parse_dates=['date_sighted', 'date_reported'])

ufo = ufo.reset_index().rename(columns={'index':'ufo_id'})
print(ufo.info())

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80332 entries, 0 to 80331
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   ufo_id         80332 non-null  int64         
 1   date_sighted   80332 non-null  object        
 2   city           80332 non-null  object        
 3   state          74535 non-null  object        
 4   country        70662 non-null  object        
 5   shape          78400 non-null  object        
 6   duration       80332 non-null  object        
 7   duration_txt   80332 non-null  object        
 8   note           80317 non-null  object        
 9   date_reported  80332 non-null  datetime64[ns]
 10  latitude       80332 non-null  object        
 11  longitude      80332 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1), object(9)
memory usage: 7.4+ MB
None


In [5]:
def find_dirty_data(col):
    for i, val in enumerate(col):
        try:
            float(val)
        except:
            print('Row {} has dirty data: {}'.format(i, [val]))

In [6]:
find_dirty_data(ufo['duration'])

Row 27822 has dirty data: ['2`']
Row 35692 has dirty data: ['8`']
Row 58591 has dirty data: ['0.5`']


In [7]:
find_dirty_data(ufo['latitude'])

Row 43782 has dirty data: ['33q.200088']


In [8]:
ufo = ufo.loc[~ufo.index.isin([27822, 35692, 58591, 43782])]
df = ufo.sample(n=1000, random_state=5)

In [9]:
df['date_sighted'] = df['date_sighted'].str.replace('24:00', '00:00')
df['date_sighted'] = pd.to_datetime(df['date_sighted'])

df['duration'] = df['duration'].astype(float)
df['latitude'] = df['latitude'].astype(float)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 60887 to 11929
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   ufo_id         1000 non-null   int64         
 1   date_sighted   1000 non-null   datetime64[ns]
 2   city           1000 non-null   object        
 3   state          922 non-null    object        
 4   country        866 non-null    object        
 5   shape          974 non-null    object        
 6   duration       1000 non-null   float64       
 7   duration_txt   1000 non-null   object        
 8   note           1000 non-null   object        
 9   date_reported  1000 non-null   datetime64[ns]
 10  latitude       1000 non-null   float64       
 11  longitude      1000 non-null   float64       
dtypes: datetime64[ns](2), float64(3), int64(1), object(6)
memory usage: 101.6+ KB


# Basic plot types

# Lines: Number of UFOs sighted in each year

In [10]:
df.head(2)

Unnamed: 0,ufo_id,date_sighted,city,state,country,shape,duration,duration_txt,note,date_reported,latitude,longitude
60887,60887,2010-07-30 23:05:00,uk/scotland,,,light,180.0,3 mins,two bright lights travelling at speed across a...,2010-08-24,56.490671,-4.202646
5167,5167,2011-10-28 16:39:00,scottsdale,az,us,unknown,300.0,5min or less,Three to Five comet-like objects seen descendi...,2011-12-12,33.509167,-111.898333


In [11]:
# Prep data
line_data = df.groupby(df['date_sighted'].dt.year)[['ufo_id']].count()
line_data

Unnamed: 0_level_0,ufo_id
date_sighted,Unnamed: 1_level_1
1955,1
1957,2
1958,1
1959,1
1961,2
1962,1
1965,2
1966,2
1968,3
1969,3


In [12]:
# A line plot 

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label = 'Year Sighted')
y_ax = bqplot.Axis(scale=y_sc, label = 'UFO counts', 
                   orientation='vertical')

# Marks
lines = bqplot.Lines(x=line_data.index, y=line_data['ufo_id'], 
                     scales={'x': x_sc, 'y':y_sc})

# Fig
line_fig = bqplot.Figure(marks=[lines], axes=[x_ax, y_ax])
line_fig

Figure(axes=[Axis(label='Year Sighted', scale=LinearScale()), Axis(label='UFO counts', orientation='vertical',…

In [13]:
# See what can be controlled in Marks
lines.traits()

{'_display_callbacks': <traitlets.traitlets.Instance at 0x7fc835a24f70>,
 '_model_module': <traitlets.traitlets.Unicode at 0x7fc835981400>,
 '_model_module_version': <traitlets.traitlets.Unicode at 0x7fc835981490>,
 '_model_name': <traitlets.traitlets.Unicode at 0x7fc835981cd0>,
 '_msg_callbacks': <traitlets.traitlets.Instance at 0x7fc835a24fa0>,
 '_property_lock': <traitlets.traitlets.Dict at 0x7fc835a24f10>,
 '_states_to_send': <traitlets.traitlets.Set at 0x7fc835a24f40>,
 '_view_count': <traitlets.traitlets.Int at 0x7fc835a24df0>,
 '_view_module': <traitlets.traitlets.Unicode at 0x7fc835981430>,
 '_view_module_version': <traitlets.traitlets.Unicode at 0x7fc835981460>,
 '_view_name': <traitlets.traitlets.Unicode at 0x7fc835981ca0>,
 'apply_clip': <traitlets.traitlets.Bool at 0x7fc8359810d0>,
 'close_path': <traitlets.traitlets.Bool at 0x7fc835981b50>,
 'color': <traittypes.traittypes.Array at 0x7fc8359817f0>,
 'colors': <traitlets.traitlets.List at 0x7fc835981940>,
 'comm': <traitlet

In [14]:
# See what can be controlled in Axis
x_ax.traits()

{'_display_callbacks': <traitlets.traitlets.Instance at 0x7fc835a24f70>,
 '_model_module': <traitlets.traitlets.Unicode at 0x7fc83566f9d0>,
 '_model_module_version': <traitlets.traitlets.Unicode at 0x7fc83566fa90>,
 '_model_name': <traitlets.traitlets.Unicode at 0x7fc83566ff40>,
 '_msg_callbacks': <traitlets.traitlets.Instance at 0x7fc835a24fa0>,
 '_property_lock': <traitlets.traitlets.Dict at 0x7fc835a24f10>,
 '_states_to_send': <traitlets.traitlets.Set at 0x7fc835a24f40>,
 '_view_count': <traitlets.traitlets.Int at 0x7fc835a24df0>,
 '_view_module': <traitlets.traitlets.Unicode at 0x7fc83566f100>,
 '_view_module_version': <traitlets.traitlets.Unicode at 0x7fc83566fa60>,
 '_view_name': <traitlets.traitlets.Unicode at 0x7fc83566ff10>,
 'color': <ipywidgets.widgets.trait_types.Color at 0x7fc83566fe20>,
 'comm': <traitlets.traitlets.Instance at 0x7fc835a24e20>,
 'grid_color': <ipywidgets.widgets.trait_types.Color at 0x7fc83566fdf0>,
 'grid_lines': <traitlets.traitlets.Enum at 0x7fc83566fb

In [15]:
# A line plot 
# - Add points to line
# - Change color
# - rotate x ticks
# - change x ticks text-anchor
# - x label offset
# - Add title to fig

lines.marker = 'circle'
lines.colors = ['red']
x_ax.tick_rotate = 45
x_ax.tick_style = {'text-anchor': 'start'}
x_ax.label_offset = '50'

line_fig.title = 'My Line Plot'

line_fig

Figure(axes=[Axis(label='Year Sighted', label_offset='50', scale=LinearScale(), tick_rotate=45, tick_style={'t…

In [16]:
# We can also do these things inline

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label = 'Year Sighted', 
                   tick_rotate = 45)
y_ax = bqplot.Axis(scale=y_sc, label = 'UFO counts', 
                   orientation='vertical')

# Marks
lines = bqplot.Lines(x=line_data.index, y=line_data['ufo_id'], 
                     scales={'x': x_sc, 'y':y_sc}, 
                     colors = ['blue'], 
                     marker= 'circle')

# Fig
line_fig = bqplot.Figure(marks=[lines], axes=[x_ax, y_ax])
line_fig

Figure(axes=[Axis(label='Year Sighted', scale=LinearScale(), tick_rotate=45), Axis(label='UFO counts', orienta…

# Bars: Shapes of UFOs

In [17]:
# Prep Data
bar_data = df.groupby(['shape'])[['ufo_id']].count()
bar_data

Unnamed: 0_level_0,ufo_id
shape,Unnamed: 1_level_1
changing,20
chevron,14
cigar,22
circle,88
cone,2
cross,1
cylinder,12
diamond,11
disk,65
egg,9


In [18]:
# A Bar Chart

# Scale
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.LinearScale()

# Axis 
x_ax = bqplot.Axis(scale=x_sc, label='UFO shapes')
y_ax = bqplot.Axis(scale=y_sc, label='UFO counts', orientation='vertical')

# Marks
bars = bqplot.Bars(x=bar_data.index, y=bar_data['ufo_id'], 
                   scales={'x': x_sc, 'y':y_sc})

# Figure
bar_fig = bqplot.Figure(marks=[bars], axes=[x_ax, y_ax])
bar_fig

Figure(axes=[Axis(label='UFO shapes', scale=OrdinalScale()), Axis(label='UFO counts', orientation='vertical', …

# Historgram: Duration in seconds

In [19]:
# A Hist

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Duration in seconds')
y_ax = bqplot.Axis(scale=y_sc, label='count', orientation='vertical')

# Mark
hist = bqplot.Hist(sample=df['duration'], 
                   scales={'sample':x_sc, 'count':y_sc}, 
                   bins=50)

# Fig
hist_fig = bqplot.Figure(marks=[hist], axes=[x_ax, y_ax])
hist_fig

Figure(axes=[Axis(label='Duration in seconds', scale=LinearScale()), Axis(label='count', orientation='vertical…

# Scatter: Year sighted and Year reported

In [20]:
# A basic one

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year Sighted')
y_ax = bqplot.Axis(scale=y_sc, label='Year Reported', 
                   orientation='vertical')

# Mark
scatter = bqplot.Scatter(x=df['date_sighted'].dt.year, 
                         y=df['date_reported'].dt.year,
                         scales={'x':x_sc, 'y':y_sc})

# fig
scatter_fig = bqplot.Figure(marks=[scatter], axes=[x_ax, y_ax])
scatter_fig

Figure(axes=[Axis(label='Year Sighted', scale=LinearScale()), Axis(label='Year Reported', orientation='vertica…

In [21]:
# A scatter plot colored by duration in seconds

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

clr = np.log10(df['duration'])
c_sc = bqplot.ColorScale(scheme='Oranges', 
                         min= np.nanmin(clr) , 
                         max= np.nanmax(clr))

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year Sighted')
y_ax = bqplot.Axis(scale=y_sc, label='Year Reported', 
                   orientation='vertical')

c_ax = bqplot.ColorAxis(scale=c_sc, side='right')

# Mark
scatter = bqplot.Scatter(x=df['date_sighted'].dt.year, 
                         y=df['date_reported'].dt.year,
                         scales={'x':x_sc, 'y':y_sc, 
                                 'color':c_sc}, 
                         color=clr)

# fig
scatter_fig = bqplot.Figure(marks=[scatter], axes=[x_ax, y_ax, c_ax])
scatter_fig

Figure(axes=[Axis(label='Year Sighted', scale=LinearScale()), Axis(label='Year Reported', orientation='vertica…

In [22]:
np.log10(df['duration'].max())

5.040602340114073

# Interaction

# Pan-zoom

In [23]:
# Add pan-zoom to line plot

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label = 'Year Sighted', 
                   tick_rotate = 45)
y_ax = bqplot.Axis(scale=y_sc, label = 'UFO counts', 
                   orientation='vertical')

# Marks
lines = bqplot.Lines(x=line_data.index, y=line_data['ufo_id'], 
                     scales={'x': x_sc, 'y':y_sc}, 
                     colors = ['blue'], 
                     marker= 'circle')

# Interaction: pan-zoom
panzoom = bqplot.interacts.PanZoom(scales={'x': [x_sc], 
                                           'y': [y_sc]})


# Fig
line_fig = bqplot.Figure(marks=[lines], axes=[x_ax, y_ax], 
                         interaction=panzoom)
line_fig

Figure(axes=[Axis(label='Year Sighted', scale=LinearScale(), tick_rotate=45), Axis(label='UFO counts', orienta…

# Tooltip

In [24]:
# See what can be controlled in lines
lines.traits()

{'_display_callbacks': <traitlets.traitlets.Instance at 0x7fc835a24f70>,
 '_model_module': <traitlets.traitlets.Unicode at 0x7fc835981400>,
 '_model_module_version': <traitlets.traitlets.Unicode at 0x7fc835981490>,
 '_model_name': <traitlets.traitlets.Unicode at 0x7fc835981cd0>,
 '_msg_callbacks': <traitlets.traitlets.Instance at 0x7fc835a24fa0>,
 '_property_lock': <traitlets.traitlets.Dict at 0x7fc835a24f10>,
 '_states_to_send': <traitlets.traitlets.Set at 0x7fc835a24f40>,
 '_view_count': <traitlets.traitlets.Int at 0x7fc835a24df0>,
 '_view_module': <traitlets.traitlets.Unicode at 0x7fc835981430>,
 '_view_module_version': <traitlets.traitlets.Unicode at 0x7fc835981460>,
 '_view_name': <traitlets.traitlets.Unicode at 0x7fc835981ca0>,
 'apply_clip': <traitlets.traitlets.Bool at 0x7fc8359810d0>,
 'close_path': <traitlets.traitlets.Bool at 0x7fc835981b50>,
 'color': <traittypes.traittypes.Array at 0x7fc8359817f0>,
 'colors': <traitlets.traitlets.List at 0x7fc835981940>,
 'comm': <traitlet

In [25]:
# Add tooptip to line plot

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label = 'Year Sighted', 
                   tick_rotate = 45)
y_ax = bqplot.Axis(scale=y_sc, label = 'UFO counts', 
                   orientation='vertical')

# Marks
lines = bqplot.Lines(x=line_data.index, y=line_data['ufo_id'], 
                     scales={'x': x_sc, 'y':y_sc}, 
                     colors = ['blue'], 
                     marker= 'circle')

# Interaction: tooltip

def add_tooltip(chart, d):
    #print(d)
    if 'y' in d['data'].keys():
        my_tooltip = ipywidgets.HTML()
        number_of_ufo = d['data']['y']
        my_tooltip.value = 'Number of UFOs: {}'.format(number_of_ufo)
        lines.tooltip = my_tooltip

lines.on_hover(add_tooltip)

# Fig
line_fig = bqplot.Figure(marks=[lines], axes=[x_ax, y_ax])
line_fig

Figure(axes=[Axis(label='Year Sighted', scale=LinearScale(), tick_rotate=45), Axis(label='UFO counts', orienta…

# Regenerating hist: Take user input as the number of bins

In [26]:
# Interactively changing number of bins

# See what can be controlled in hist
hist.traits()

{'_display_callbacks': <traitlets.traitlets.Instance at 0x7fc835a24f70>,
 '_model_module': <traitlets.traitlets.Unicode at 0x7fc835981400>,
 '_model_module_version': <traitlets.traitlets.Unicode at 0x7fc835981490>,
 '_model_name': <traitlets.traitlets.Unicode at 0x7fc83599b430>,
 '_msg_callbacks': <traitlets.traitlets.Instance at 0x7fc835a24fa0>,
 '_property_lock': <traitlets.traitlets.Dict at 0x7fc835a24f10>,
 '_states_to_send': <traitlets.traitlets.Set at 0x7fc835a24f40>,
 '_view_count': <traitlets.traitlets.Int at 0x7fc835a24df0>,
 '_view_module': <traitlets.traitlets.Unicode at 0x7fc835981430>,
 '_view_module_version': <traitlets.traitlets.Unicode at 0x7fc835981460>,
 '_view_name': <traitlets.traitlets.Unicode at 0x7fc83599b400>,
 'apply_clip': <traitlets.traitlets.Bool at 0x7fc8359810d0>,
 'bins': <traitlets.traitlets.Int at 0x7fc83599b250>,
 'colors': <traitlets.traitlets.List at 0x7fc83599b310>,
 'comm': <traitlets.traitlets.Instance at 0x7fc835a24e20>,
 'count': <traittypes.tra

In [27]:
# Change number of bins
hist.bins = 3
hist_fig

Figure(axes=[Axis(label='Duration in seconds', scale=LinearScale()), Axis(label='count', orientation='vertical…

In [28]:
# ipywidgets.Text
# ipywidgets.Button

text_area = ipywidgets.Text()
text_area

Text(value='')

In [29]:
text_area.value

''

In [30]:
my_button = ipywidgets.Button(description='Regenerate!')
my_button

Button(description='Regenerate!', style=ButtonStyle())

In [31]:
# Change number of bins by user input

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Duration in seconds')
y_ax = bqplot.Axis(scale=y_sc, label='count', orientation='vertical')

# Mark
hist = bqplot.Hist(sample=df['duration'], 
                   scales={'sample':x_sc, 'count':y_sc}, 
                   bins=50)

# Interaction: User input

text_area = ipywidgets.Text()
my_button = ipywidgets.Button(description='Regenerate!')

def regenerate_func(button):
    user_input = text_area.value
    user_input = int(user_input)
    hist.bins = user_input

my_button.on_click(regenerate_func)

# Fig
hist_fig = bqplot.Figure(marks=[hist], axes=[x_ax, y_ax])
hist_fig_regen = ipywidgets.VBox([text_area, my_button, hist_fig])
hist_fig_regen

VBox(children=(Text(value=''), Button(description='Regenerate!', style=ButtonStyle()), Figure(axes=[Axis(label…

# Brush selection: Selet an area in a scatter plot

In [32]:
# Brush, along x

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

clr = np.log10(df['duration'])
c_sc = bqplot.ColorScale(scheme='Oranges', 
                         min= np.nanmin(clr) , 
                         max= np.nanmax(clr))

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year Sighted')
y_ax = bqplot.Axis(scale=y_sc, label='Year Reported', 
                   orientation='vertical')

c_ax = bqplot.ColorAxis(scale=c_sc, side='right')

# Mark
scatter = bqplot.Scatter(x=df['date_sighted'].dt.year, 
                         y=df['date_reported'].dt.year,
                         scales={'x':x_sc, 'y':y_sc, 
                                 'color':c_sc}, 
                         color=clr)

# Interaction: Brush
my_brush = bqplot.interacts.BrushIntervalSelector(scale=x_sc, 
                                                  marks=[scatter])

selected_range = ipywidgets.HTML()
num_of_points_selected = ipywidgets.HTML()
def select_func(change):
    my_brush_selected = my_brush.selected
    scatter_selected = scatter.selected
    
    selected_range.value = 'selected range: {}'.format(my_brush_selected)
    num_of_points_selected.value = 'number of selected points: {}'.format(len(scatter_selected))
    #print('my_brush_selected:', my_brush_selected)
    #print('scatter_selected:', scatter_selected)


my_brush.observe(select_func, 'selected')

# fig
scatter_fig = bqplot.Figure(marks=[scatter], axes=[x_ax, y_ax, c_ax], 
                            interaction=my_brush)
scatter_fig_annotated = ipywidgets.VBox([selected_range, 
                                         num_of_points_selected, 
                                         scatter_fig])
scatter_fig_annotated

VBox(children=(HTML(value=''), HTML(value=''), Figure(axes=[Axis(label='Year Sighted', scale=LinearScale()), A…

In [33]:
# Brush, along y

# Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

clr = np.log10(df['duration'])
c_sc = bqplot.ColorScale(scheme='Oranges', 
                         min= np.nanmin(clr) , 
                         max= np.nanmax(clr))

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year Sighted')
y_ax = bqplot.Axis(scale=y_sc, label='Year Reported', 
                   orientation='vertical')

c_ax = bqplot.ColorAxis(scale=c_sc, side='right')

# Mark
scatter = bqplot.Scatter(x=df['date_sighted'].dt.year, 
                         y=df['date_reported'].dt.year,
                         scales={'x':x_sc, 'y':y_sc, 
                                 'color':c_sc}, 
                         color=clr)

# Interaction: Brush
my_brush = bqplot.interacts.BrushIntervalSelector(scale=y_sc, 
                                                  marks=[scatter], 
                                                  orientation='vertical')

selected_range = ipywidgets.HTML()
num_of_points_selected = ipywidgets.HTML()
def select_func(change):
    my_brush_selected = my_brush.selected
    scatter_selected = scatter.selected
    
    selected_range.value = 'selected range: {}'.format(my_brush_selected)
    num_of_points_selected.value = 'number of selected points: {}'.format(len(scatter_selected))
    #print('my_brush_selected:', my_brush_selected)
    #print('scatter_selected:', scatter_selected)


my_brush.observe(select_func, 'selected')

# fig
scatter_fig = bqplot.Figure(marks=[scatter], axes=[x_ax, y_ax, c_ax], 
                            interaction=my_brush)
scatter_fig_annotated = ipywidgets.VBox([selected_range, 
                                         num_of_points_selected, 
                                         scatter_fig])
scatter_fig_annotated

VBox(children=(HTML(value=''), HTML(value=''), Figure(axes=[Axis(label='Year Sighted', scale=LinearScale()), A…