In [96]:
import numpy as np
from pandas import DataFrame as df

def process_data_bubble():
    gdp = df.from_csv('gdp1.csv')
    co2 = df.from_csv('co2_person.csv')
    population = df.from_csv('population1.csv')
    regions = df.from_csv('gapminder_regions.csv')

    # Make the column names ints not strings for handling
    columns = list(co2.columns)
    index = -1
    for col in columns:
        index += 1
        if int(col) == 1800:
            break
    end_index = -1
    for col in columns:
        try:
            col = int(col)
            end_index += 1
        except:
            pass
    end_row = -1
    indexes = list(co2.index)
    for row in indexes:
        if row is not None:
            if isinstance(row, str):
                end_row += 1
    columns_new = columns[20 :60]
    index_end = 231
    co2 = co2.loc[indexes[0]:indexes[index_end], columns_new]
    years = [int(i) for i in columns_new]
    rename_dict = dict(zip(columns_new, years))

    end_index = -1
    columns = list(gdp.columns)
    for col in columns:
        try:
            col = int(col)
            end_index += 1
        except:
            pass
    end_row = -1
    indexes = list(gdp.index)
    for row in indexes:
        if row is not None:
            if isinstance(row, str):
                end_row += 1
    gdp = gdp.loc[indexes[0]: indexes[index_end], columns_new]
    end_index = -1
    columns = list(population.columns)
    for col in columns:
        try:
            col = int(col)
            end_index += 1
        except:
            pass
    end_row = -1
    indexes = list(population.index)
    for row in indexes:
        if row is not None:
            if isinstance(row, str):
                end_row += 1
    population = population.loc[indexes[0]: indexes[index_end], columns_new]

    co2 = co2.rename(columns = rename_dict)
    gdp = gdp.rename(columns = rename_dict)
    gdp = gdp / 800
    population = population.rename(columns = rename_dict)
    regions = regions.rename(columns = rename_dict)

    regions_list = list(regions.Group.unique())

    # Turn population into bubble sizes. Use min_size and factor to tweak.
    scale_factor = 100
    population_size = np.sqrt(population / (np.pi)) / scale_factor
    min_size = 3
    population_size = population_size.where(population_size >= min_size).fillna(min_size)


    return co2, gdp, population_size, regions, years, regions_list

In [97]:
# -*- coding: utf-8 -*-
import pandas as pd

from bokeh.core.properties import field
from ipywidgets import interact
from bokeh.io import push_notebook, output_notebook
from bokeh.layouts import layout
from bokeh.models import (
    ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label,
    CategoricalColorMapper, CustomJS,
)
from bokeh.palettes import Spectral6
from bokeh.plotting import figure, show
from bokeh.layouts import column, widgetbox
output_notebook()

In [103]:
co2_df, gdp_df, population_size, regions_df, years, regions_list = process_data_bubble()

p = pd.Panel({'co2': co2_df, 'gdp': gdp_df, 'population': population_size})

data = {}

region_name = regions_df.Group
region_name.name = 'region'

for year in years:
    df = pd.concat([p.loc[:, :, year], region_name], axis=1).reset_index()
    data[year] = df.to_dict('series')

source = ColumnDataSource(data=data[years[0]])

plot = figure(x_range=(-0.2, 40), y_range=(0, 30), title='CO2 per capita vs. GDP per person by PPP', plot_height=600, plot_width= 750)
plot.xaxis.ticker = SingleIntervalTicker(interval=1)
plot.xaxis.axis_label = "GDP per person by PPP*"
plot.yaxis.ticker = SingleIntervalTicker(interval=2)
plot.yaxis.axis_label = "CO2 per capita"

label = Label(x=1.1, y=18, text=str(years[0]), text_font_size='70pt', text_color='#eeeeee')
plot.add_layout(label)

color_mapper = CategoricalColorMapper(palette=Spectral6, factors=regions_list)
r = plot.circle(
    x='gdp',
    y='co2',
    size='population',
    source=source,
    fill_color={'field': 'region', 'transform': color_mapper},
    fill_alpha=0.8,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend=field('region'),
)
plot.add_tools(HoverTool(tooltips="@index", show_arrow=True, point_policy='follow_mouse'))

def update_notebook(y=0):
    source = ColumnDataSource(data=data[years[int(y)]])
    r.data_source.data = source.data
    label.text = str(years[int(y)])
    push_notebook()


In [104]:
show(plot, notebook_handle = True)

In [100]:
interact(update_notebook, y = (0,len(years)))

In [105]:
import numpy as np
from pandas import DataFrame as df

def process_data_line():
    co2 = df.from_csv('co2_person.csv')

    # Make the column names ints not strings for handling
    columns = list(co2.columns)
    index = -1
    for col in columns:
        index += 1
        if int(col) == 1800:
            break
    end_index = -1
    for col in columns:
        try:
            col = int(col)
            end_index += 1
        except:
            pass
    end_row = -1
    indexes = list(co2.index)
    for row in indexes:
        if row is not None:
            if isinstance(row, str):
                end_row += 1

    columns_new = columns[0 :end_index]
    index_end = 231
    co2 = co2.loc[indexes[0]:indexes[index_end], columns_new]
    years = [int(i) for i in columns_new]
    rename_dict = dict(zip(columns_new, years))
    
    co2 = co2.rename(columns = rename_dict)

    china_df = co2.loc['China', :].values.tolist()
    us_df = co2.loc['United States', :].values.tolist()
    mx_df = co2.loc['Mexico', :].values.tolist()
    
    return china_df, us_df, mx_df, years


In [106]:
import numpy as np
from bokeh.plotting import figure, show
from bokeh.models import SingleIntervalTicker
from bokeh.io import output_notebook
output_notebook()

china_df, us_df, mx_df, years = process_data_line()

plot = figure(title='CO2 per capita vs. GDP per person by PPP', plot_height=600, plot_width= 750)
plot.xaxis.ticker = SingleIntervalTicker(interval=20)
plot.xaxis.axis_label = "Year"
plot.yaxis.ticker = SingleIntervalTicker(interval=2)
plot.yaxis.axis_label = "CO2 per capita"
years_new = []
df_group = [china_df, us_df, mx_df]
color_group = [['red', 'China'], ['blue', 'United States'], ['green', 'Mexico']]

for values, cl in zip(df_group, color_group):
    plot.line(x = years, y = values, color = cl[0], legend = cl[1])

show(plot, notebook_handle = True)

In [107]:
import numpy as np
from pandas import DataFrame as df

def process_data_bar():
    co2 = df.from_csv('co2_person.csv')
    population = df.from_csv('population1.csv')
    regions = df.from_csv('gapminder_regions.csv')
    columns = list(co2.columns)
    index = -1
    for col in columns:
        index += 1
        if int(col) == 1800:
            break
    end_index = -1
    for col in columns:
        try:
            col = int(col)
            end_index += 1
        except:
            pass
    end_row = -1
    indexes = list(co2.index)
    for row in indexes:
        if row is not None:
            if isinstance(row, str):
                end_row += 1
    columns_new = columns[0 :end_index]
    index_end = 231
    co2 = co2.loc[indexes[0]:indexes[index_end], columns_new]
    years = [int(i) for i in columns_new]
    rename_dict = dict(zip(columns_new, years))
    end_index = -1
    columns = list(population.columns)
    for col in columns:
        try:
            col = int(col)
            end_index += 1
        except:
            pass
    end_row = -1
    indexes = list(population.index)
    for row in indexes:
        if row is not None:
            if isinstance(row, str):
                end_row += 1
    population = population.loc[indexes[0]: indexes[index_end], columns_new]

    co2 = co2.rename(columns = rename_dict)
    population = population.rename(columns = rename_dict)
    regions = regions.rename(columns = rename_dict)
    regions_list = list(regions.Group.unique())

    co2 = co2.loc[:, columns_new[-5]:columns_new[-4]]
    population = population.loc[:, columns_new[-5]:columns_new[-4]]
    population = population / 100000

    return co2, population, regions, regions_list

In [114]:
import numpy as np
import pandas as pd
from bokeh.charts import Bar, show
from bokeh.layouts import row
from bokeh.io import output_notebook
output_notebook()

co2_per, population, regions, regions_list = process_data_bar()

co2_dict = dict()
for i in list(co2_per.index)[0:250]:
    if i in list(population.index) and i in regions.Group:
        try:
            sums = int(co2_per.loc[i, :].values.tolist()[0] * population.loc[i, :].values.tolist()[0])
            area = regions.Group[i]
            if area in co2_dict:
                co2_dict[area] += sums
            else:
                co2_dict[area] = sums
        except:
            pass

co2_values = []
co2_keys = []
for k in co2_dict:
    co2_keys.append(k)
    co2_values.append(co2_dict[k])

data = {
    'area' : co2_keys,
    'CO2 emission' : co2_values
}

bar2 = Bar(data, values='CO2 emission', label=['area'],
           agg='sum', title="Python Interpreters", plot_width=400, legend='top_right')

show(bar2, notebook_handle = True)

E-1010 (CDSVIEW_SOURCE_DOESNT_MATCH): CDSView used by Glyph renderer must have a source that matches the Glyph renderer's data source: GlyphRenderer(id='3f454fde-3c1b-4552-bb2a-d517e748a66d', ...)
E-1010 (CDSVIEW_SOURCE_DOESNT_MATCH): CDSView used by Glyph renderer must have a source that matches the Glyph renderer's data source: GlyphRenderer(id='567337f9-ae8a-441f-94e0-c891a5364335', ...)
E-1010 (CDSVIEW_SOURCE_DOESNT_MATCH): CDSView used by Glyph renderer must have a source that matches the Glyph renderer's data source: GlyphRenderer(id='62e8c4c0-3821-4b4d-ba3d-ece7935ae616', ...)
E-1010 (CDSVIEW_SOURCE_DOESNT_MATCH): CDSView used by Glyph renderer must have a source that matches the Glyph renderer's data source: GlyphRenderer(id='8b9cb60c-86a8-4ef5-be07-31d9c52992c6', ...)
E-1010 (CDSVIEW_SOURCE_DOESNT_MATCH): CDSView used by Glyph renderer must have a source that matches the Glyph renderer's data source: GlyphRenderer(id='9e15cdf6-80f7-4497-926d-ab1db88f0273', ...)
E-1010 (CDSVIEW