# Correlation between Schooling and Ado Pregnancy

## Setup

In [1]:
from __future__ import unicode_literals, print_function, division

In [2]:
import os

In [3]:
import pandas as pd

import plotly.plotly as py
from plotly.graph_objs import *
import plotly.tools as tls

  from pkg_resources import resource_stream


In [4]:
import plot_constants as pc

## Specific Configuration

In [5]:
DATA_DIR = os.path.join('.', 'data')
RAW_FILE = os.path.join(DATA_DIR, 'raw_data_20150701.xlsx')

In [6]:
SHEET = 'Figure21'

PARSE_COLS = 'A:C'
SKIPROWS = 0
SKIP_FOOTER = 52-48

title = 'Correlation between Schooling and Adolescent Pregnancy'
subtitle = 'Adolescent birth rate by Mean Years of Schooling'
source = dict(label='United Nations Development Programme (UNDP)', 
              link='http://hdr.undp.org/en/content/table-4-gender-inequality-index',
              accessed='NNN')

X_LABEL = 'Mean years of schooling (female, 2002-2012)'
Y_LABEL = 'Births per 1,000 women aged 15-19 (2010/2015)'

## Data Importation and Munging

In [7]:
DF = pd.read_excel(RAW_FILE,
                   sheetname=SHEET,
                   parse_cols=PARSE_COLS,
                   skiprows=SKIPROWS,
                   skip_footer=SKIP_FOOTER)
DF.columns = ['Country', 'Schooling', 'Birthrate']
DF

Unnamed: 0,Country,Schooling,Birthrate
0,Turkey,6.380445,30.923
1,Bosnia and Herzegovina,7.240337,15.108
2,MKDa,7.876328,18.256
3,Portugal,7.999779,12.573
4,Austria,8.88,4.126
5,Albania,9.050693,15.297
6,Serbia,9.2,16.872
7,Kyrgyzstan,9.27,29.264
8,Spain,9.470815,10.624
9,Malta,9.502173,18.193


## Plotting

In [8]:
plot_height = pc.PLOT_HEIGHT
plot_width = pc.PLOT_WIDTH
filename = 'vaw/fig_11'

trace = Scatter(
    x=DF.Schooling,
    y=DF.Birthrate,
    mode='markers+text',
    text=DF.Country,
    textposition='top center',
    textfont=Font(
        family='Arial',
    ),
    marker=Marker(color=pc.NEUTRAL_FILL,
                  line=Line(color=pc.NEUTRAL_LINE,
                            width=1,
                           ),
                  symbol='circle',
                  size=12,)
)

data = Data([trace])

layout = Layout(
    title = '<b>{}</b><br>{}'.format(title, subtitle),
    autosize=False,
    width=1000,
    height=plot_height,
    hovermode='closest',
    margin=Margin(
        l=120,
        r=120,
        pad=0
    ),
    xaxis=XAxis(title=X_LABEL),
    yaxis=YAxis(dict(zeroline=False), title=Y_LABEL),
    annotations=Annotations([
        Annotation(
            x=0.5,
            y=-0.1,
            xref='paper',
            yref='paper',
            text='Source: <a href="{}">{}</a>'.format(source['link'],source['label']),
            align='center',
            showarrow=False
        )
    ]),
)

fig = Figure(data=data, layout=layout)
py.iplot(fig, filename=filename, height=plot_height)