In [1]:
from bokeh.plotting import figure, show, output_notebook, output_file
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter

output_notebook()

In [2]:
import pandas as pd 
import logging
logging.basicConfig()

In [3]:
import numpy as np

In [4]:
country_info = pd.read_csv('./BR/Metadata_Country_API_SP.DYN.TFRT.IN_DS2_en_csv_v2_10224767.csv')

In [5]:
country_info = country_info.replace(np.nan, 'Region')

In [6]:
birth_rates = pd.read_csv('./BR/API_SP.DYN.TFRT.IN_DS2_en_csv_v2_10224767.csv')
gdp_per_cap = pd.read_csv('./GDP/API_NY.GDP.PCAP.PP.CD_DS2_en_csv_v2_10224953.csv', skiprows=4)

In [7]:
birth_rates = birth_rates[birth_rates['Country Code'] != 'INX'] 
gdp_per_cap = gdp_per_cap[gdp_per_cap['Country Code'] != 'INX'] 

In [8]:
data_source = {'Country Name': gdp_per_cap[u'Country Name'].values, 'GDP per capita' : gdp_per_cap['2016'].values, 
              'Birth Rate' : birth_rates['2016'].values, 'Region' : country_info['Region']}

In [9]:
from bokeh.palettes import Accent8 

region_uni = np.unique(data_source['Region'])
region_index = [list(region_uni).index(region) for region in data_source['Region']]

In [11]:
data_source['region_color'] = [Accent8[i] for i in region_index]

In [12]:
TITLE = 'GDP per Capita v.s. Birth Rate'
TOOLS = "hover,pan,wheel_zoom,box_zoom,reset,save"

output_file("toolbar.html") 

TOOLTIPS = [
    ("Name", "@{Country Name}"), 
    ("Region", "@{Region}"), 
    ("Birth Rate", "@{Birth Rate}"), 
    ("GDP per capita", "@{GDP per capita}")] 

p = figure(toolbar_location="above", plot_width=1200, title=TITLE, tooltips=TOOLTIPS)

source = ColumnDataSource(data_source)

p.circle("GDP per capita", "Birth Rate", size=12, source=source,
         color='region_color', line_color="black", fill_alpha=0.8, 
        legend = 'Region')

p.yaxis.axis_label = 'Birth Rate'
p.xaxis.axis_label = 'GDP per Capita' 
p.xaxis[0].formatter = NumeralTickFormatter(format="$0.00")
location = "top_right" 

show(p)