In [2]:

from bokeh.models import Plot
from bokeh.models import LogScale

from bokeh.models import LinearAxis
from bokeh.models import SingleIntervalTicker
from bokeh.models import LogAxis
from bokeh.models import LogTicker
from bokeh.models import Range1d
from bokeh.models import DataRange1d

from bokeh.models import Text
from bokeh.models import ColumnDataSource

from bokeh.models import Circle
from bokeh.palettes import Spectral6

from bokeh.io import export_png

import os
import imageio
import pandas as pd
import time
import math
import numpy as np

t0 = time.time()
encoding = ['latin1', 'iso8859-1', 'utf-8'][1]

fertility_df = pd.read_csv('./data/csv/fertility_df.csv', encoding=encoding)
fertility_df.set_index(keys='Country', inplace=True)

income_df = pd.read_csv('./data/csv/income_df.csv', encoding=encoding)
income_df.set_index(keys='Country', inplace=True)

population_df = pd.read_csv('./data/csv/population_df.csv', encoding=encoding)
population_df.set_index(keys='Country', inplace=True)

# Turn population into bubble sizes. Use min_size and factor to tweak
scale_factor = 400
population_size_df = np.sqrt(population_df / np.pi) / scale_factor
min_size = 3
population_size_df = population_size_df.where(population_size_df >= min_size).fillna(min_size)

regions_df = pd.read_csv('./data/csv/regions_df.csv', encoding=encoding)
regions_df['Group'] = regions_df['Group'].astype('category')
regions_list = list(regions_df['Group'].cat.categories)
regions_df.set_index(keys='Country', inplace=True)

def get_color(r):
    return Spectral6[regions_list.index(r['Group'])]

regions_df['region_color'] = regions_df.apply(get_color, axis=1)

region_color_series = regions_df['region_color']
region_color_series.name = 'region_color'

columns_list = list(fertility_df.columns)
years_list = list(range(int(columns_list[0]), int(columns_list[-1])))

# Children per woman (total fertility)
x_low = int(math.floor(fertility_df.min().min()))
x_high = int(math.ceil(fertility_df.max().max()))
xdr = Range1d(x_low-0.5*x_low, x_high+0.1*x_high)
x_interval = int(math.ceil((x_high - x_low) / 9))

# Personal income (GDP per capita)
y_low = int(math.floor(income_df.min().min()))
y_high = int(math.ceil(income_df.max().max()))
ydr = DataRange1d(y_low-0.5*y_low, y_high+0.5*y_high)

AXIS_FORMATS = dict(
    minor_tick_in=None,
    minor_tick_out=None,
    major_tick_in=None,
    major_label_text_font_size="10pt",
    major_label_text_font_style="normal",
    axis_label_text_font_size="10pt",

    axis_line_color='#AAAAAA',
    major_tick_line_color='#AAAAAA',
    major_label_text_color='#666666',

    major_tick_line_cap="round",
    axis_line_cap="round",
    axis_line_width=1,
    major_tick_line_width=1,
)

png_dir = "./saves/png/"
for year in columns_list:
    
    if (year in fertility_df) and (year in income_df) and (year in population_size_df):

        fertility_series = fertility_df[year]
        fertility_series.name = 'fertility'

        income_series = income_df[year]
        income_series.name = 'income'

        population_series = population_size_df[year]
        population_series.name = 'population'

        new_df = pd.concat([fertility_series, income_series, population_series, region_color_series], axis=1)

        # Build the plot
        plot = Plot(

            # Children per woman (total fertility)
            x_range=xdr,

            # Personal income (GDP per capita)
            y_range=ydr,
            y_scale=LogScale(),
            
            plot_width=800,
            plot_height=400,
            outline_line_color=None,
            toolbar_location=None, 
            min_border=20,
        )

        # Build the axes
        xaxis = LinearAxis(ticker=SingleIntervalTicker(interval=x_interval), axis_label="Children per woman (total fertility)", 
                           **AXIS_FORMATS)
        yaxis = LogAxis(ticker=LogTicker(), axis_label="Personal income (GDP per capita)", 
                           **AXIS_FORMATS)
        plot.add_layout(xaxis, 'below')
        plot.add_layout(yaxis, 'left')

        # Add the background year text
        text_source = ColumnDataSource({'year': ['%s' % year]})
        text = Text(x=1, y=150, text='year', text_font_size='150pt', text_color='#EEEEEE')
        plot.add_glyph(text_source, text)

        # Add the circle
        renderer_source = ColumnDataSource(new_df)
        circle_glyph = Circle(
            x='fertility', y='income', size='population',
            fill_color='region_color', fill_alpha=0.8, 
            line_color='#7c7e71', line_width=0.5, line_alpha=0.5)
        circle_renderer = plot.add_glyph(renderer_source, circle_glyph)

        # Add the legend
        text_x = 7
        for i, region in enumerate(regions_list):
            plot.add_glyph(Text(x=text_x, y=int(10**(-0.15*i+5)), text=[region], text_font_size='10pt', text_color='#666666'))
            plot.add_glyph(Circle(x=text_x - 0.1, y=int(10**(-0.15*i+5.08)), fill_color=Spectral6[i], size=10, line_color=None, 
                                  fill_alpha=0.8))

        # Save as PNG
        export_png(plot, filename=png_dir+"plot_%s.png" % year)
        
images = []
for subdir, dirs, files in os.walk(png_dir):
    for file in files:
        filepath = os.path.join(subdir, file)
        if filepath.endswith(".png"):
            images.append(imageio.imread(filepath))
imageio.mimsave('./saves/gif/movie.gif', images)

t1 = time.time()
print(t1-t0, time.ctime(t1))

297.7325370311737 Sat Jul 22 14:12:48 2017
