In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Preparing the data

url_co2 = 'https://raw.githubusercontent.com/TrainingbyPackt/Interactive-Data-Visualization-with-Python/master/datasets/co2.csv'
co2 = pd.read_csv(url_co2)

In [3]:
url_gm = 'https://raw.githubusercontent.com/TrainingbyPackt/Interactive-Data-Visualization-with-Python/master/datasets/gapminder.csv'
gm = pd.read_csv(url_gm)

In [4]:
df_gm = gm[['Country', 'region']].drop_duplicates()

In [5]:
df_w_regions = pd.merge(co2, df_gm, left_on='country', right_on='Country', how='inner')

In [6]:
df_w_regions = df_w_regions.drop('Country', axis='columns')

In [7]:
new_co2 = pd.melt(df_w_regions, id_vars=['country', 'region'])
columns = ['country', 'region', 'year', 'co2']
new_co2.columns = columns

In [8]:
df_co2 = new_co2[new_co2['year'].astype('int64') > 1963]
df_co2 = df_co2.sort_values(by=['country', 'year'])
df_co2['year'] = df_co2['year'].astype('int64')
df_co2.head()

Unnamed: 0,country,region,year,co2
28372,Afghanistan,South Asia,1964,0.0863
28545,Afghanistan,South Asia,1965,0.101
28718,Afghanistan,South Asia,1966,0.108
28891,Afghanistan,South Asia,1967,0.124
29064,Afghanistan,South Asia,1968,0.116


In [9]:
df_gdp = gm[['Country', 'Year', 'gdp']]
df_gdp.columns = ['country', 'year', 'gdp']
df_gdp.head()

Unnamed: 0,country,year,gdp
0,Afghanistan,1964,1182.0
1,Afghanistan,1965,1182.0
2,Afghanistan,1966,1168.0
3,Afghanistan,1967,1173.0
4,Afghanistan,1968,1187.0


In [10]:
data = pd.merge(df_co2, df_gdp, on=['country', 'year'], how='left')
data = data.dropna()
data.head()

Unnamed: 0,country,region,year,co2,gdp
0,Afghanistan,South Asia,1964,0.0863,1182.0
1,Afghanistan,South Asia,1965,0.101,1182.0
2,Afghanistan,South Asia,1966,0.108,1168.0
3,Afghanistan,South Asia,1967,0.124,1173.0
4,Afghanistan,South Asia,1968,0.116,1187.0


In [11]:
# Checking the correlation between CO2 and GDP to ensure that this is worth visualizing

np_co2 = np.array(data['co2'])
np_gdp = np.array(data['gdp'])

np.corrcoef(np_co2, np_gdp)

array([[1.        , 0.78219731],
       [0.78219731, 1.        ]])

In [12]:
# Creating a base static plot for an interactive visualization
# Importing bokeh

from bokeh.io import curdoc, output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper, Slider
from bokeh.palettes import Spectral6
from bokeh.layouts import widgetbox, row

In [13]:
output_notebook()

In [14]:
# Color code the data points

regions_list = data.region.unique().tolist()

color_mapper = CategoricalColorMapper(factors=regions_list, palette=Spectral6)

In [18]:
# Make data source for the plot

source = ColumnDataSource(data={
    'x': data.gdp[data['year'] == 1964],
    'y': data.co2[data['year'] == 1964],
    'country': data.country[data['year'] == 1964],
    'region': data.region[data['year'] == 1964],
})
                         

In [44]:
xmin, xmax = min(data.gdp), max(data.gdp)

In [45]:
ymin, ymax = min(data.co2), max(data.co2)

In [97]:
# Create empty figure

plot = figure(title='CO2 Emissions vs GDP in 1964', title_location='above',
             x_axis_label='Income Per Person',
             y_axis_label='CO2 Emissions (tons per person)',
             plot_height=600, plot_width=1000, y_axis_type='log', x_range=(-5000, 25000))
            # x_range=(xmin, xmax),
            # y_range=(ymin, ymax), y_axis_type='log')

In [98]:
# Add circular glyphs to the plot

plot.circle(x='x', y='y', fill_alpha=0.8, source=source, legend_group='region', 
            color=dict(field='region', transform=color_mapper), size=7)

In [99]:
plot.legend.location = 'bottom_right'
#plot.legend.click_policy = 'show'

In [100]:
show(plot)

In [70]:
# Toy example to make sure I'm doing this right

# prepare some data
x = [1, 2, 3, 4, 5]
y1 = [6, 7, 2, 4, 5]
y2 = [2, 3, 4, 5, 6]
y3 = [4, 5, 5, 7, 2]

# create a new plot with a title and axis labels
p = figure(title="Multiple glyphs example", x_axis_label="x", y_axis_label="y")

# add multiple renderers
p.line(x, y1, legend_label="Temp.", color="#004488", line_width=3)
p.line(x, y2, legend_label="Rate", color="#906c18", line_width=3)
p.scatter(x, y3, legend_label="Objects", color="#bb5566", size=16)

# show the results
show(p)

In [101]:
# Adding a slider

slider = Slider(start=min(data.year), end=max(data.year), step=1, value=min(data.year), title="Year")

In [102]:
def update_plot(attr, old, new):
    yr = slider.value
    new_data = {
        'x': data.gdp[data['year'] == yr],
        'y': data.co2[data['year'] == yr],
        'country': data.country[data['year'] == yr],
        'region': data.region[data['year'] == yr],
    }
    source.data = new_data
    plot.title.text = 'CO2 Emissions vs GDP in %d' % yr

In [103]:
slider.on_change('value', update_plot)

In [104]:
layout = row(widgetbox(slider), plot)



In [105]:
curdoc().add_root(layout)

In [106]:
# Add a hover tool

hover = HoverTool(tooltips=[('Country', '@country'), ('GDP', '@x'), ('CO2 Emission', '@y')])

In [107]:
plot.add_tools(hover)