In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

# Global Health and Nutrition
### The World Bank: an international financial institution that provides loans to countries of the world for capital projects

Will use BigQuery dataset for interactive visualizations. Will see what is the average age of first marriages for females around the world. For more details, please visit <a href='https://www.kaggle.com/theworldbank/world-bank-health-population'>here</a>

#### Import libraries

In [2]:
!pip install chart_studio



In [3]:
!pip install plotly --upgrade

Requirement already up-to-date: plotly in /opt/conda/lib/python3.6/site-packages (4.3.0)


In [4]:
import bq_helper
from google.cloud import bigquery
from bq_helper import BigQueryHelper

ghnp = bq_helper.BigQueryHelper(active_project='bigquery-public-data', dataset_name='world_bank_health_population')
bq_assistant = BigQueryHelper('bigquery-public-data', 'world_bank_health_population')
bq_assistant.list_tables()

Using Kaggle's public dataset BigQuery integration.
Using Kaggle's public dataset BigQuery integration.


['country_series_definitions',
 'country_summary',
 'health_nutrition_population',
 'series_summary',
 'series_times']

In [5]:
bq_assistant.head('health_nutrition_population', num_rows=5)

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,value,year
0,Mexico,MEX,"Population ages 10-14, male",SP.POP.1014.MA,2267072.0,1960
1,Mexico,MEX,"Population ages 10-14, male",SP.POP.1014.MA,2365295.0,1961
2,Mexico,MEX,"Population ages 10-14, male",SP.POP.1014.MA,2479898.0,1962
3,Mexico,MEX,"Population ages 10-14, male",SP.POP.1014.MA,2606719.0,1963
4,Mexico,MEX,"Population ages 10-14, male",SP.POP.1014.MA,2735074.0,1964


### Average age of first marriages for females around the world since 2000

In [6]:
qr_averageFirstMarriage = """
select country_name, country_code, round(avg(value), 2) as average
from `bigquery-public-data.world_bank_health_population.health_nutrition_population`
where indicator_code = 'SP.DYN.SMAM.FE' and year > 2000
group by country_name, country_code
order by average
"""

averageFirstMarriage = ghnp.query_to_pandas_safe(qr_averageFirstMarriage)
averageFirstMarriage.head()

Unnamed: 0,country_name,country_code,average
0,Niger,NER,16.43
1,Central African Republic,CAF,17.15
2,Chad,TCD,18.3
3,Mali,MLI,18.67
4,Mozambique,MOZ,18.76


In [9]:
# Use iplot()
# See Details on iplot: https://kite.com/python/docs/plotly.offline.iplot
# It plotly graphs inside an IPython notebook without connecting to an external server

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode()
from subprocess import check_output

data = [{
    'type': 'choropleth',
    'locations': averageFirstMarriage.country_code,
    'z': averageFirstMarriage.average,
    'text': averageFirstMarriage.country_name,
    'autocolorscale': True,
    'reversescale': True,
    'marker':{
        'line':{
            'color': 'rgb(180,180,180)',
            'width': 0.5
        }
    }
}]


layout={
    'title': 'Average Age of First Marriage of Females Since 2000',
    'geo': {
        'showframe': False,
        'showcoastlines': False,
        'projection': {'type': 'orthographic'}
    }
}

figure={'data': data, 'layout': layout}

# Show a Plot
iplot(figure)

ImportError: 
The plotly.plotly module is deprecated,
please install the chart-studio package and use the
chart_studio.plotly module instead. 


### Life Expectancy

In [None]:
qr_lifeExpectancy = """
select country_name, country_code, year, round(value, 2) as indicator
from `bigquery-public-data.world_bank_health_population.health_nutrition_population`
where indicator_code = 'SP.DYN.LE00.IN'
order by year;
"""

lifeExpectancy = ghnp.query_to_pandas_safe(qr_lifeExpectancy)
lifeExpectancy.head()

In [None]:
# https://plot.ly/python/animations/

def plotIndicatorMapOverTime(df,title):
    gDataList = []
    frames = []
    years = df.year.unique().tolist()

    #Create template Figure
    figure = {
        'data': [],
        'layout': {},
        'frames': []
    }

    figure['layout']['title'] = title
    
    #Define Sliders
    figure['layout']['sliders'] = {
        'args': [
            'transition', {
                'duration': 400,
                'easing': 'cubic-in-out'
            }
        ],
        'initialValue': str(years[0]),
        'plotlycommand': 'animate',
        'values': years,
        'visible': True
    }

    #Update buttons: Play and Pause
    figure['layout']['updatemenus'] = [
        {
            'buttons': [
                {
                    'args': [None, {'frame': {'duration':0, 'redraw': False},
                             'fromcurrent': True, 'transition': {'duration': 0, 'easing': 'quadratic-in-out'}}],
                    'label': 'Play',
                    'method': 'animate'
                },
                {
                    'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                    'transition': {'duration': 0}}],
                    'label': 'Pause',
                    'method': 'animate'
                }
            ],
            'direction': 'left',
            'pad': {'r': 10, 't': 87},
            'showactive': False,
            'type': 'buttons',
            'x': 0.1,
            'xanchor': 'right',
            'y': 0,
            'yanchor': 'top'
        }
    ]
    
    sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Year:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
    }
    
    minZValue = df.indicator.min()
    maxZValue = df.indicator.max()
    
    for year in years:
        dfYear = df[df.year == year]
        gDataTemp = [ {
            'type': 'choropleth',
            'locations': dfYear.country_code,
            'z': dfYear.indicator,
            'text': dfYear.country_name,
            'autocolorscale': True,
            'marker': {
                'line': {
                    'color':'rgb(180,180,180)',
                    'width':0.5
                        } 
                      },
            'zauto': False,
            'zmin': minZValue,
            'zmax': maxZValue,
        } ]
    
        if year == years[0]:     
            figure['data'] = gDataTemp

    
        frame = {'data': gDataTemp,'name': str(year)}
        figure['frames'].append(frame)
    
        slider_step = {'args': [
            [year],
            {'frame': {'duration': 300, 'redraw': False},
             'mode': 'immediate',
             'transition': {'duration': 300}}
         ],
         'label': year,
         'method': 'animate'}
        
        sliders_dict['steps'].append(slider_step)
    
    figure['layout']['sliders'] = [sliders_dict]

    iplot(figure)

In [8]:
plotIndicatorMapOverTime(lifeExpectancy, title='Life Expectancy')

NameError: name 'plotIndicatorMapOverTime' is not defined