Data Visualization: World Happiness and The Statistics of Countries

In [1]:
#Imports
import plotly as py
import plotly.plotly as plt
import plotly.graph_objs as go
import ipywidgets as widget
import numpy as np
import pandas as pd
from scipy import stats
from IPython.display import HTML

#Logging onto PlotLy (Offline for now)
py.offline.init_notebook_mode(connected=True)

In [2]:
'''
This code in this cell is Damien Kao. The purpose of the code in this cell is to 
hide code in jupyter notebook for a cleaner presentation.
Source: http://blog.nextgenetics.net/?e=102
'''

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

In [3]:
#Create Happiness DataFrame and Happiness Score Series
happy_df = pd.read_csv("2017.csv", index_col=0)
happy_score = happy_df.iloc[:,1] #Happiness Score Series

#Create a world DataFrame containing various info all countries in the world as of 2017
df = pd.read_csv("world.csv", index_col=0, decimal=',')

# Calculate and Add a column to the world DataFrame: Growth Rate
# Growth Rate is calculated as a countries Birthrate - Deathrate
gr_lst = []
for index in df.index.values:
    gr_lst.append(float(df.loc[index, 'Birthrate']) - float(df.loc[index, 'Deathrate'] ))
df["Growth_Rate"] = gr_lst

In [4]:
def update_plot(column, style):
    '''
    This function updates the plot each time a new widget selection is made.
    
    Parameters:
    column - the selected Country statistic
    style - the style in which the plot is presented
    
    Returns: None
    '''
    #Create the layout for the plotly, set settings for plotly
    layout = dict(
        title = column + " in the World (2017)",
        geo = dict(
            showframe = False,
            showcoastlines = True,
            projection = dict(
                type = style #map projection, original: Mercator
            )
        )
    )
    
    #Set the data for the plot, set type to choropleth
    data = [ dict(
        type = 'choropleth',
        locations = df['CODE'],
        z = df[column],
        text = df.index.values, #Country
        #Set colors here (follow a gradient)
        colorscale = [[0,"rgb(237, 41, 47)"],[0.35,"rgb(240, 67, 165)"],[0.5,"rgb(203, 106, 244)"],\
            [0.6,"rgb(158, 133, 247)"],[0.7,"rgb(175, 215, 252)"],[1,"rgb(204, 255, 255)"]],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            title = column),
        ) ]
    
    #Plot the data
    fig = dict( data=data, layout=layout )
    py.offline.iplot( fig, validate=False, filename='test.html' )

# Set the widgets and options
options = ["Growth_Rate", "Population", "GDP ($ per capita)", "Literacy (%)", "Phones (per 1000)"]
column = widget.Dropdown(options=options, value="Growth_Rate", description = "Country Stats")
styles = ['orthographic', 'Mercator']
style = widget.Dropdown(options=styles, value="Mercator", description = "Style")
widget.interactive(update_plot, column=column, style=style)

A Jupyter Widget

In [5]:
def update_plot(year, style):
    '''
    This function updates the plot each time a new widget selection is made.
    
    Parameters:
    year - the selected year
    style - the style in which the plot is presented
    
    Returns: None
    '''
    #Set the layout settings of the PlotLy
    layout = dict(
    title = 'Population in '+ str(year) +' Based on 2017 Growth Rates',
    geo = dict(
        showframe = False,
        showcoastlines = True,
        projection = dict(
            type = style #map projection, original: Mercator
            )
        )
    )
    
    #The multiplier for the growth rate, calculated by years since 2017
    multiplier = year-2017
    
    #Create a copy of the world DataFrame for manipulation
    df2 = df.copy()
    #Re-calculate each countries population
    for index in df2.index.values:
        df2.loc[index, 'Population'] = df2.loc[index, 'Population'] + (df2.loc[index, "Growth_Rate"] * multiplier * (df2.loc[index, 'Population']//1000))
        #If a country has negative population, its population is zero!
        if (df2.loc[index, 'Population'] <= 0):
            df2.loc[index, 'Population'] = 0
            
    #Set the data and color scale gradient
    data = [ dict(
        type = 'choropleth',
        locations = df2['CODE'],
        z = df2['Population'],
        text = df2.index.values, #Country
        colorscale = [[0,"rgb(237, 41, 47)"],[0.35,"rgb(240, 67, 165)"],[0.5,"rgb(203, 106, 244)"],\
            [0.6,"rgb(158, 133, 247)"],[0.7,"rgb(175, 215, 252)"],[.999,"rgb(204, 255, 255)"], [1,"rgb(255, 255, 0)"] ],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            title = 'Population'),
      ) ]
    
    #Plot the plotly
    fig = dict( data=data, layout=layout )
    py.offline.iplot( fig, validate=False, filename='d3-world-map' )
    
#Set the widgets and their options
year = widget.IntSlider(min=2017, max=3000, value=2017, description='Year')
styles = ['orthographic', 'Mercator']
style = widget.Dropdown(options=styles, value="Mercator", description = "Style")

widget.interactive(update_plot, year=year, style=style)

A Jupyter Widget

In [6]:
#Establish the data, set z-values to happiness scores and text to country names
data = [ dict(
        type = 'choropleth',
        locations = happy_df['CODE'],
        z = happy_df['Happiness.Score'],
        text = happy_df.index.values, #Country
        #Set color scale to a gradient
        colorscale = [[0,"rgb(34, 139, 34)"],[0.35,"rgb(55, 48, 117)"],[0.5,"rgb(90, 114, 161)"],\
            [0.6,"rgb(134, 116, 170)"],[0.7,"rgb(183, 160, 175)"],[1,"rgb(192, 192, 192)"]],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            title = 'Happiness Score'),
      ) ]

def update_plot(style):
    '''
    This function updates the plot each time a new widget selection is made.
    
    Parameters:
    style - the style in which the plot is presented
    
    Returns: None
    '''
    #Set layout settings for plotly
    layout = dict(
        title = 'Happiness Scores based on economic, social support, etc.',
        geo = dict(
            showframe = False,
            showcoastlines = True,
            projection = dict(
                type = style #map projection, original: Mercator
            )
        )
    )

    # Plot the data
    fig = dict( data=data, layout=layout )
    py.offline.iplot( fig, validate=False, filename='d3-world-map' )

#Set the widgets and their options
styles = ['orthographic', 'Mercator']
style = widget.Dropdown(options=styles, value="Mercator", description = "Style")
widget.interactive(update_plot, style=style)

A Jupyter Widget

In [7]:
# Set up lists, data, DataFrame for a new and cleaner DataFrame
happy_score = pd.read_csv("2017.csv", index_col=12)
data2 = pd.read_csv("world.csv", index_col=20, decimal=',')
happiness = []
popul = []
gdp = []
lit = []
phone = []
growth = []

#Iterate over the data and create the new DataFrame with variables we care about
for index in data2.index.values:
    if index in happy_score.index.values:
        happiness.append(happy_score.loc[index, "Happiness.Score"])
        popul.append(data2.loc[index, "Population"])
        gdp.append(data2.loc[index, "GDP ($ per capita)"])
        lit.append(data2.loc[index, "Literacy (%)"])
        phone.append(data2.loc[index, "Phones (per 1000)"])
        growth.append(data2.loc[index, "Birthrate"] - data2.loc[index, "Deathrate"])
scatter = pd.DataFrame([popul, gdp, lit, phone, growth], 
                       index=["Population", "GDP ($ per capita)", "Literacy (%)", "Phones (per 1000)", "Growth_Rate"], 
                       columns= happiness)
scatter = scatter.T #Transpose the data
scatter = scatter.dropna() #Clean the data; get rid of NaNs


In [8]:
def update_plot(column):
    '''
    This function updates the plot each time a new widget selection is made.
    
    Parameters:
    column - the selected Country statistic
    
    Returns: None
    '''
    #Set the layout settings for the plotly
    layout = go.Layout(xaxis=dict(ticks='', showticklabels=True,
                              zeroline=False, title="Happiness"),
                   yaxis=dict(ticks='', showticklabels=True,
                              zeroline=False, title=column),
                   title="Happiness Score vs. " + column,
                   showlegend=False, hovermode='closest',
                    )
    
    # Plot the scattered data points
    p1 = go.Scatter(x= scatter.index.values, 
                    y= scatter.loc[:, column], 
                    mode='markers',
                    marker=dict(color='black')
                   )

    # Calculate the ols parameters
    slope, intercept, rval, pval, stderr = stats.linregress(scatter.index.values, scatter.loc[:, column])

    # Calculate and plot the linear regression model
    p2 = go.Scatter(x= scatter.index.values , 
                    y= [slope * happy + intercept for happy in scatter.index.values ],
                    mode='lines',
                    line=dict(color='blue', width=3)
                    )
    
    # Plot the plots/figures
    fig = go.Figure(data=[p1, p2], layout=layout)
    py.offline.iplot(fig)
    
    # Print out the linear regression model statistics
    print("Slope: " + str(slope) + '\nP-Value: ' + str(pval) + '\nR-Squared: ' + str(rval**2))

#Set the widgets and their options
options = ["Growth_Rate", "Population", "GDP ($ per capita)", "Literacy (%)", "Phones (per 1000)"]
column = widget.Dropdown(options=options, value="Growth_Rate", description = "Country Stats")

widget.interactive(update_plot, column=column)

A Jupyter Widget