# Gapminder data choropleths

In [1]:
import plotly.plotly as py
import plotly.tools as tls
import pandas as pd
import json
import urllib2
import re

The most famous gapminder data is featured in [Hans Rosling](https://en.wikipedia.org/wiki/Hans_Rosling)'s bubble charts (see a plotly version [here](https://plot.ly/~etpinard/191/fig-31b-hans-roslings-bubble-chart-for-the-year-2007/)).

In this notebook, we'll explore the same dataset using plotly choropleths.

In [2]:
# big thanks to Jennifer Bryan!
df_full = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\t')

df_full.head()

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
0,Afghanistan,1952,8425333,Asia,28.801,779.445314
1,Afghanistan,1957,9240934,Asia,30.332,820.85303
2,Afghanistan,1962,10267083,Asia,31.997,853.10071
3,Afghanistan,1967,11537966,Asia,34.02,836.197138
4,Afghanistan,1972,13079460,Asia,36.088,739.981106


Select a year and truncate the dataframe

In [3]:
# Choose a year, find other years with df['year'].unique()
the_year = 2007   

# Find indices corresponding to 'the_year'
i_year = (df_full['year'] == the_year)

# Grab all rows correponding to 'the_year'
df = df_full[i_year] 

Define a plot function

In [4]:
def plot(z, title, units):
    if units == '$':
        colorbar= dict(
            tickprefix=' ' + units,
            showtickprefix='last'   
        )
    else:
        colorbar = dict(
            ticksuffix=' ' + units,
            showticksuffix='last'
        )
    
    url = py.plot(
        dict(
            data=[
                dict(
                    type='choropleth',
                    locationmode='country names',
                    locations=df['country'],
                    z=z,
                    colorbar=colorbar
                )
            ],
            layout=dict(
                title=title + ' by country in ' + str(the_year),
                titlefont=dict(
                    size=22
                ),
                geo=dict(
                    projection=dict(
                        type='kavrayskiy7'
                    )
                ),
                width=1000,
                height=600
            )
        ),
        validate=False,
        filename=title.lower().replace(' ', '-'),
        auto_open=False
    )
    print(url)

Use the plot function for each of the three dependent variables

In [5]:
plot(df['pop'], 'World population', 'million')
plot(df['lifeExp'], 'Life expectancy', 'year')
plot(df['gdpPercap'], 'GDP per capita', '$')

https://plot.ly/~etpinard/4250
https://plot.ly/~etpinard/4252
https://plot.ly/~etpinard/4254


In [6]:
tls.embed('https://plot.ly/~etpinard/4250')

We note that the Russia is not part of the dataset.

In [7]:
tls.embed('https://plot.ly/~etpinard/4252')

In [8]:
tls.embed('https://plot.ly/~etpinard/4254')

In [9]:
# Inject CSS styling in the NB
from IPython.display import display, HTML
display(HTML(open('../_custom.css').read()))