# Assignment #3 - Interactive Plotly

#### Import libraries

In [1]:
import pandas as pd
import numpy as np
from plotly import graph_objs as go
from ipywidgets import IntSlider, interact

#### Datasets required for the purpose of the assignment

In [2]:
fertility = pd.read_csv('/Users/jordi.hs/Desktop/BTS/Content/DSF/bts-mbds-data-science-foundations/sessions/data/gapminder/fertility.csv', index_col='Country')

In [3]:
population = pd.read_csv('/Users/jordi.hs/Desktop/BTS/Content/DSF/bts-mbds-data-science-foundations/sessions/data/gapminder/population.csv', index_col='Country')

In [4]:
life_exp = pd.read_csv('/Users/jordi.hs/Desktop/BTS/Content/DSF/bts-mbds-data-science-foundations/sessions/data/gapminder/life_expectancy.csv', index_col='Country')

In [5]:
regions = pd.read_csv('/Users/jordi.hs/Desktop/BTS/Content/DSF/bts-mbds-data-science-foundations/sessions/data/gapminder/regions.csv', index_col='Country')

fertility = pd.read_csv("data/gapminder/fertility.csv", index_col="Country")
population = pd.read_csv("data/gapminder/population.csv", index_col="Country")
life_exp = pd.read_csv("data/gapminder/life_expectancy.csv", index_col="Country")
regions = pd.read_csv("data/gaapminder/regions.csv", index_col="Country")

#### Dataframe with key indicators (Fertility, Population, Life Expectancy and Regions) indexed by Year

In [6]:
def by_year(year):
    return pd.DataFrame({
        'Population': population[str(year)],
        'Fertility': fertility[str(year)],
        'Life expectancy': life_exp[str(year)],
        'Group': regions['Group'],
    })

In [7]:
df = by_year(1994)
df.head()

Unnamed: 0,Population,Fertility,Life expectancy,Group
Afghanistan,18553819.0,7.796,51.738,South Asia
Albania,3179442.0,2.772,71.92,Europe & Central Asia
Algeria,27751086.0,3.705,67.674,Middle East & North Africa
American Samoa,51885.0,,,East Asia & Pacific
Andorra,63111.0,,,Europe & Central Asia


#### Scatter plot of Life expectancy vs Fertility, using the Population as bubble size and colored  by Group

In [8]:
fig = go.FigureWidget()
for group_name, sub_df in df.groupby("Group"):
    sc = fig.add_scatter(
        x=sub_df['Fertility'],
        y=sub_df['Life expectancy'],
        mode='markers',
        marker={
            'size': np.sqrt(sub_df['Population'].fillna(0))/ 400
        },
        name=group_name,
    )

### 1. Decorate the figure with proper X and Y axis labels, a title, a big text showing the year, and a legend (if not present). Note: If the legend does not show the colors, upgrade plotly to the latest version

In [9]:
import plotly
plotly.__version__ # Version check

'3.6.1'

In [10]:
year = 1994
fig.layout = dict(
    title='Life expectancy v. Fertility',
    xaxis=dict(title='Fertility rate'),
    yaxis=dict(title='Life Expectancy (years)'),
    annotations=[dict( # To create the top-right box of the plot that shows the depicted year in big text.
            x=7.5,
            y=80,
            showarrow=True,
            text=str(year),
            font=dict(
                family='Courier New, monospace',
                size=26,
            ),
            align='center',
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor='#636363',
            ax=20,
            ay=-30,
            bordercolor='#c7c7c7',
            borderwidth=2,
            borderpad=1,
            bgcolor='#ff7f0e',
            opacity=0.8
        )])

In [11]:
fig

FigureWidget({
    'data': [{'marker': {'size': array([ 0.24429746,  0.64660169, 14.66721514,  0.6926489 ,  1.…

### 2.Create a function update_year that receives a year as an integer and updates the data of the existing figure with the values from the selected year. Note: The update might not be very efficient

In [12]:
fig1 = go.FigureWidget()
def update_year(year):
    if year > life_exp.columns.astype(int).max()\
    or year < life_exp.columns.astype(int).min(): # Initial check to see whether the given year falls within the range.
        print('Invalid year')
    else:
        update = by_year(year) # To consider the dataframe indexed by year
        fig1.data = [] # To set the data properly and avoid possible overlaps
        for group_name, sub_update in update.groupby("Group"):
            sc = fig1.add_scatter(
                x=sub_update['Fertility'],
                y=sub_update['Life expectancy'],
                mode='markers',
                marker={
                    'size': np.sqrt(sub_update['Population'].fillna(0))/ 400
                },
                name=group_name
            )
            fig1.layout = dict(
                title='Life expectancy v. Fertility',
                xaxis=dict(title='Fertility rate'),
                yaxis=dict(title='Life Expectancy (years)')
            )

### 3. Create an horizontal slider that ranges from the minimum to the maximum year

In [13]:
slider = IntSlider(
    value=year,
    min=life_exp.columns.astype(int).min(),
    max=life_exp.columns.astype(int).max(),
    step=1,
    description='Year:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

### 4. Bind the update_year function to changes in the horizontal slider and use it to interactively change the plot

In [14]:
interact(update_year, year=slider)
fig1

interactive(children=(IntSlider(value=1994, continuous_update=False, description='Year:', max=2013, min=1964),…

FigureWidget({
    'data': [{'marker': {'size': array([ 0.24429746,  0.64660169, 14.66721514,  0.6926489 ,  1.…