In [1]:
from bokeh.plotting import figure
from bokeh.io import output_notebook,show
import pandas as pd
from bokeh.models import ColumnDataSource, Div
from bokeh.io import curdoc
import os

output_notebook()

<b>Dataset location:</b> 
https://www.kaggle.com/fernandol/countries-of-the-world

os.cwd() gets the current working directory

In [2]:
countries = pd.read_csv(os.path.join(os.getcwd(),
                             '../datasets/countries_of_the_world.csv'),
                        decimal=",")
countries.head()

Unnamed: 0,Country,Region,Population,Area (sq. mi.),Pop. Density (per sq. mi.),Coastline (coast/area ratio),Net migration,Infant mortality (per 1000 births),GDP ($ per capita),Literacy (%),Phones (per 1000),Arable (%),Crops (%),Other (%),Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500,48.0,0.0,23.06,163.07,700.0,36.0,3.2,12.13,0.22,87.65,1.0,46.6,20.34,0.38,0.24,0.38
1,Albania,EASTERN EUROPE,3581655,28748,124.6,1.26,-4.93,21.52,4500.0,86.5,71.2,21.09,4.42,74.49,3.0,15.11,5.22,0.232,0.188,0.579
2,Algeria,NORTHERN AFRICA,32930091,2381740,13.8,0.04,-0.39,31.0,6000.0,70.0,78.1,3.22,0.25,96.53,1.0,17.14,4.61,0.101,0.6,0.298
3,American Samoa,OCEANIA,57794,199,290.4,58.29,-20.71,9.27,8000.0,97.0,259.5,10.0,15.0,75.0,2.0,22.46,3.27,,,
4,Andorra,WESTERN EUROPE,71201,468,152.1,0.0,6.6,4.05,19000.0,100.0,497.2,2.22,0.0,97.78,3.0,8.71,6.25,,,


In [3]:
countries.describe()

Unnamed: 0,Population,Area (sq. mi.),Pop. Density (per sq. mi.),Coastline (coast/area ratio),Net migration,Infant mortality (per 1000 births),GDP ($ per capita),Literacy (%),Phones (per 1000),Arable (%),Crops (%),Other (%),Climate,Birthrate,Deathrate,Agriculture,Industry,Service
count,227.0,227.0,227.0,227.0,224.0,224.0,226.0,209.0,223.0,225.0,225.0,225.0,205.0,224.0,223.0,212.0,211.0,212.0
mean,28740280.0,598227.0,379.047137,21.16533,0.038125,35.506964,9689.823009,82.838278,236.061435,13.797111,4.564222,81.638311,2.139024,22.114732,9.241345,0.150844,0.282711,0.565283
std,117891300.0,1790282.0,1660.185825,72.286863,4.889269,35.389899,10049.138513,19.722173,227.991829,13.040402,8.36147,16.140835,0.699397,11.176716,4.990026,0.146798,0.138272,0.165841
min,7026.0,2.0,0.0,0.0,-20.99,2.29,500.0,17.6,0.2,0.0,0.0,33.33,1.0,7.29,2.29,0.0,0.02,0.062
25%,437624.0,4647.5,29.15,0.1,-0.9275,8.15,1900.0,70.6,37.8,3.22,0.19,71.65,2.0,12.6725,5.91,0.03775,0.193,0.42925
50%,4786994.0,86600.0,78.8,0.73,0.0,21.0,5550.0,92.5,176.2,10.42,1.03,85.7,2.0,18.79,7.84,0.099,0.272,0.571
75%,17497770.0,441811.0,190.15,10.345,0.9975,55.705,15700.0,98.0,389.65,20.0,4.44,95.44,3.0,29.82,10.605,0.221,0.341,0.6785
max,1313974000.0,17075200.0,16271.5,870.66,23.06,191.19,55100.0,100.0,1035.6,62.11,50.68,100.0,4.0,50.73,29.74,0.769,0.906,0.954


#### Strip the white space from the Country and Region columns
We will be filtering on the Region column which is simplified when we trim out the white space

In [4]:
countries['Country'] = countries['Country'].str.strip()
countries['Region'] = countries['Region'].str.strip()

#### Create a list of regions
This will be used to filter our plot to specific regions

In [5]:
regions = list(countries['Region'].unique())
regions.append('ALL')
regions

['ASIA (EX. NEAR EAST)',
 'EASTERN EUROPE',
 'NORTHERN AFRICA',
 'OCEANIA',
 'WESTERN EUROPE',
 'SUB-SAHARAN AFRICA',
 'LATIN AMER. & CARIB',
 'C.W. OF IND. STATES',
 'NEAR EAST',
 'NORTHERN AMERICA',
 'BALTICS',
 'ALL']

#### Create a ColumnDataSource to store the relevant data
This will be populated later on

In [6]:
source = ColumnDataSource(data=dict(x=[], 
                                    y=[], 
                                    
                                    country=[], 
                                    gdp=[], 
                                    birthrate=[], 
                                    )
                         )

#### Create a tool tips list
This list of data will be used to display information about each point in the plot. The data displayed includes:
* The country name
* Its GDP per capita
* The birthrate (number of children born per 1000 people per year)

In [7]:
tooltips = [('Country', '@country'),
            ('GDP per capita', '@gdp'),
            ('Birthrate', '@birthrate')
           ]

#### Define the figure
We will be plotting a graph of birthrate vs per capita GDP, so we set the X and Y ranges accordingly. We disable the Bokeh toolbar. We add our tooltips to the plot. 

In [8]:
p = figure(plot_height = 300, 
           plot_width = 800, 
           
           x_range = (0, 40000), 
           y_range = (0, 55),
           
           title = 'Birthrate vs GDP per capita', 
           
           toolbar_location = None, 
           
           tooltips = tooltips)

#### Define the markers for the plot
We will use circles to denote each country in the plot

In [9]:
p.circle(x = 'x', 
         y = 'y', 
         
         source = source, 
         
         size = 7,
         line_color = None, 
         )

#### Function to filter the data
This will filter the data according to the inputs specified by the user. The details include:
* filtering the plot according to the region specified

In [10]:
def select_countries():
    
    selected = countries
    region = region_selector.value
    min_gdp = gdp_slider.value
        
    if (region != 'ALL'):
        selected = selected[selected['Region'] == region]
        
    selected = selected[
        (selected['GDP ($ per capita)'] >= min_gdp)]
    
    print('Region = ', region)
    print('Min GDP = ', min_gdp)
    
    return selected

#### Define an update function
This will update a plot when the user has supplied/changed some input. This will call the select_countries function in order to get a filtered dataset and will populate the source data (the ColumnDataSource declared earlier) with the values from the filtered data. 

In [11]:
def update():
    
    df = select_countries()
    print(list(df['Country']))
    
    source.data = dict(
        x=df['GDP ($ per capita)'],
        y=df['Birthrate'],
        country=df["Country"],
        gdp=df['GDP ($ per capita)'],
        birthrate=df['Birthrate']
    )


#### Define the user input controls for our plot
The plot will contain a slider (to control the GDP per capita) and a Select (drop-down list) widget to pick a region for which data is displayed

In [12]:
from bokeh.models.widgets import Slider, Select

#### Load the HTML home page
Our app displays an HTML page with our interactive plot embedded within. Outside of the plot, we can define some HTML elements which will form part of our page by using the Div object. This will take in a string in HTML format and add it as a div to our app's home page. 

Here, we have defined all the contents outside of our plot in the homepage.html file. This will become a div in the homepage for our app. 

In [13]:
homepage = Div(text=open(os.path.join(os.getcwd(), 'homepage.html')).read(), width=800)

#### Define the widget to select a region
This is a drop-down list containing all the regions in our data. Selecting a region should update the plot to only display data of the countries within the selected region. 

The Select widget has the following properties:
* <b>title</b> sets the label for the control
* the <b>options</b> represent a list of all the values which a user can select from the drop-down
* <b>value</b> is the initial value which is set

In [14]:
region_selector = Select(title = 'Region',
                         options = sorted(regions),
                         value = 'ALL'
               )

#### Set the behaviour for when the widget is updated
There is an on_change method for this widget (some have an on_click method as well). Here we specify that when a user selects a value from the drop-down, the new selection is stored in the property 'value' (which we have initialized to 'ALL' in the cell above).

We then specify a callback function for our widget. We need to pass 3 arguments to this callback function - attr, old and new. These arguments need not be used in our example, so we only perform a call to the update() function in order to update the data source with the values we wish to represent in our plot

In [15]:
region_selector.on_change('value', lambda attr, old, new: update())

In [16]:
gdp_slider = Slider(start = 0, 
                    end = 56000, 
                    
                    value = 1, 
                    step = .1,
                    
                    title = 'Minimum GDP per capita')

In [17]:
gdp_slider.on_change('value', lambda attr, old, new: update())

#### Import the layout and widgetbox functions
These will be used compile the components we have created to the plot

In [18]:
from bokeh.layouts import layout, widgetbox

#### Define a widgetbox to include all the user inputs

In [19]:
inputs = widgetbox(region_selector, gdp_slider)

#### Define the layout of our page
The homepage text is in the first row. The widget and the plot are on the next row

In [20]:
plot_layout = layout([
    [homepage],
    [inputs],
    [p]
])

#### Initialize the page by calling update
This will populate the column data source with the details for all countries

In [21]:
update()

Region =  ALL
Min GDP =  1
['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antigua & Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas, The', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia & Herzegovina', 'Botswana', 'Brazil', 'British Virgin Is.', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands', 'Central African Rep.', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo, Dem. Rep.', 'Congo, Repub. of the', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'East Timor', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'Faroe Islands', 'Fiji', 'Finland', 'France', 'French Guiana', 'French Polynesia', 'Gabon', 'Gambia, T

#### Display the plot within the notebook
This will throw a warning as this app needs to be served with Bokeh Server. Alternatively, we can write our callbacks in JavaScript rather than Python. 

However, the plot will be displayed with the intial data here. It is the callbacks for our widgets which will not be triggered as this needs to be writted in JavaScript for them to work in a Notebook.

In [22]:
show(plot_layout)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    http://bokeh.pydata.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    http://bokeh.pydata.org/en/latest/docs/user_guide/server.html



#### Add the layout to the current document
A Bokeh plot comprises multiple objects (glyphs, data sources etc.) which are called Bokeh models. A collection of models makes up a Bokeh Document. The current working document can be accessed by a call to bokeh.io.curdoc().

This is required to display the plot on our web page.

In [23]:
curdoc().add_root(plot_layout)
curdoc().title = 'Birthrate vs Per Capita GDP'

#### NOTE 
os.getcwd() gets the current working directory and is used twice in this piece of code. This will determine where the bokeh server is started from - we need to ensure it is run from the same directory as the main.py python file so that the dataset and the homepage.html are properly picked up