# Female life expectancy at birth (2013) by estimated Gross National Income (GNI) PPP USD per capita (2011)

## Setup

In [1]:
from __future__ import unicode_literals, print_function, division

In [2]:
import os

In [3]:
import numpy as np
import pandas as pd

import plotly.plotly as py
from plotly.graph_objs import *
import plotly.tools as tls

  from pkg_resources import resource_stream


In [4]:
import plot_constants as pc

## Specific Configuration

In [5]:
DATA_DIR = os.path.join('.', 'data')
RAW_FILE = os.path.join(DATA_DIR, 'raw_data_20150701.xlsx')

In [6]:
SHEET = 'Figure3'

PARSE_COLS = 'A:C'
SKIPROWS = 0
SKIP_FOOTER = 107-48

title = 'Female Life Expectancy by Gross National Income'
subtitle = '(Female life expectancy at birth by estimated GNI PPP in USD per capita (2010))'
source = dict(label='WHO/Europe Health For All Database (April 2014 update)', 
              link='http://www.euro.who.int/en/data-and-evidence/databases/european-health-for-all-database-hfa-db',
              accessed='14 July 2015')

X_LABEL = 'GNI per capita (USD, at PPP)'
Y_LABEL = 'Female Life Expectancy (Years)'

## Data Importation and Munging

In [7]:
fyles = [('hfa_250.html', 'gni'), 
         ('hfa_1012.html', 'life_expectancy')]

dfs = []
for fyle, variable in fyles:
    fn = os.path.join(DATA_DIR, fyle)
    tables = pd.read_html(fn, skiprows=1, header=0)
    df = tables[0]
    df.columns = ['country', variable]
    df = df.replace('...', np.nan)
    df.country = df.country.str[4:]
    df = df[0:53]
    dfs.append(df)

DF = pd.merge(*dfs)
DF.dropna(inplace=True)
DF = pc.clean_mkd(DF, 'country')
DF.head()

Unnamed: 0,country,gni,life_expectancy
3,Austria,47060,83.63
6,Belgium,45840,82.99
8,Bulgaria,6320,77.42
9,Croatia,13550,80.01
10,Cyprus,28570,84.46


## Plotting

In [8]:
plot_height = pc.PLOT_HEIGHT
plot_width = pc.PLOT_WIDTH
filename = 'vaw/fig_02'

trace = Scatter(
    x=DF['gni'],
    y=DF['life_expectancy'],
    mode='markers+text',
    text=DF.country,
    textposition='top center',
    textfont=Font(
        family='Arial',
    ),
    marker=Marker(
        color= pc.NEUTRAL_FILL,
        line=Line(color=pc.NEUTRAL_LINE, width=1),
        symbol=pc.FEMALE_SYMBOL,
        size=10,
    )
)

data = Data([trace])

layout = Layout(
    title = '<b>{}</b><br>{}'.format(title, subtitle),
    autosize=False,
    width=1000,
    height=plot_height,
    hovermode='closest',
    margin=Margin(
        l=120,
        r=120,
        pad=0
    ),
    xaxis=XAxis(dict(zeroline=False), title=X_LABEL),
    yaxis=YAxis(title=Y_LABEL),
    annotations=Annotations([
        Annotation(
            x=0.5,
            y=-0.1,
            xref='paper',
            yref='paper',
            text='Source: <a href="{}">{}</a>'.format(source['link'],source['label']),
            align='center',
            showarrow=False
        )
    ]),
)

fig = Figure(data=data, layout=layout)
py.iplot(fig, filename=filename, height=plot_height)

In [9]:
print(pc.check_mkd(DF, 'country'))

¶ The former Yugoslav Republic of Macedonia (MKD is an abbreviation of the ISO).
