In [1]:
import pandas as pd
import os
import geopandas as gpd
import cartopy.io.shapereader as shpreader    
# Note: you might have to install this package.
# Try "pip install cartopy" or "conda install -c conda-forge cartopy"
import country_converter as coco              
# Note: you might have to install this package.
# Try "pip install country_converter --upgrade" or "conda install country_converter"
import seaborn as sns
import geopandas
from geopandas import GeoDataFrame
import json
from math import log
import bokeh
from bokeh.plotting import figure, show
from bokeh.models import LinearInterpolator, GeoJSONDataSource, LinearColorMapper, ColorBar, HoverTool, Slider
from bokeh.models import  ColumnDataSource,Range1d, LabelSet, Label
from bokeh.palettes import brewer, Spectral6, Spectral5
from bokeh.transform import linear_cmap, factor_cmap
from bokeh.layouts import column, row
from ipywidgets import interact, Dropdown, IntSlider, widgets

In [2]:
from bokeh.io import output_notebook
output_notebook()

In [3]:
# Loading datasets:
PATH = r'C:\Users\monic\Documents\GitHub\final-project-final-project-carrie-monica-yu\Data'
PATH2 = r'C:\Users\monic\Documents\GitHub\final-project-final-project-carrie-monica-yu\Final dataframes'
FNAME = 'combined.csv'
FNAME2 = 'DI.csv'
combined = pd.read_csv(os.path.join(PATH2, FNAME))
DI = pd.read_csv(os.path.join(PATH2, FNAME2))

In [4]:
### Merging the two datasets (i.e., DI and combined) with the shapefile:
def get_worldshp(fname):
    countries = gpd.read_file(fname)
    countries = countries[['NAME_LONG', 'geometry']]
    countries = countries.replace({'SÃ£o TomÃ© and Principe': 'São Tomé and Principe',
                         'CuraÃ§ao': 'Curaçao',
                         'Saint-BarthÃ©lemy': 'Saint-Barthélemy',
                         "CÃ´te d'Ivoire": "Côte d'Ivoire"})
    countries.iloc[168, 0:1] = 'Åland Islands'
    return countries

# Downloading the Natural Earth medium scale shapefiles:
shp_fname = shpreader.natural_earth(resolution='50m', category='cultural', name='admin_0_countries')

# Loading shapefile and standardizing its country names:
df_shp = get_worldshp(fname=shp_fname)
df_shp['Country'] = coco.convert(names=df_shp['NAME_LONG'], to='name_short')

# Merging DI (i.e. the data for the 5 components of the EIU Democracy Index) with the shapefile:
DI_shape = DI.merge(df_shp, on='Country', how='left')
DI_gdf = GeoDataFrame(DI_shape, crs='EPSG:4326', geometry=DI_shape['geometry'])

# Merging the combined dataset with the shapefile:
combined_gdf = combined.merge(df_shp, on='Country', how='left')
combined_gdf = GeoDataFrame(combined_gdf, crs='EPSG:4326', geometry=combined_gdf['geometry'])

Indian Ocean Territories not found in regex
Ashmore and Cartier Islands not found in regex
Siachen Glacier not found in regex


In [5]:
# List of indicators that we are interested in:
cols = list(combined.columns)[3:18]

## Interactive choropleths

### 2020 Democracy Index and its 5 components:

In [6]:
# Converting geopandas dataframe to geoJSON dataframe:
DI_geojson = GeoJSONDataSource(geojson=DI_gdf.to_json())
# Picking columns that correspond to indicators:
DI_cols = DI_gdf.columns[1:7]

In [7]:
def plot_DI(df, Variable):
    palette = brewer['RdYlGn'][9][::-1]
    cmap = LinearColorMapper(palette=palette)
    # Plotting the choropleth:
    plot = figure(title="Democracy Index",
                  plot_height=400,
                  plot_width=800)
    countries = plot.patches("xs", "ys", source=df,
                             fill_color={"field":Variable, "transform":cmap},
                             line_color="black", line_width = 0.25, fill_alpha=1)
    # Formatting:
    plot.xgrid.grid_line_color = None
    plot.ygrid.grid_line_color = None
    plot.axis.visible = False
    # Adding the hover tooltip:
    plot.add_tools(HoverTool(renderers=[countries],
                             tooltips='@Country: Overall Democracy Index score of @Overall'))
    # Adding the legend:
    color_bar = ColorBar(color_mapper=cmap, width=8,  location=(0,0), title="Democracy Index")
    plot.add_layout(color_bar, 'left')
    return plot

In [8]:
@interact(Variable=DI_cols)
def make_plot_for_DI(Variable):
    plot = plot_DI(df=DI_geojson, Variable=Variable)
    show(plot)

interactive(children=(Dropdown(description='Variable', options=('Overall', 'I: Electoral process and pluralism…

### Comparing the Democracy Index with indicators by year:

In [9]:
# Subsetting dataframe by year and converting geopandas dataframe to geoJSON:
def pick_year(gdf, year):
    df = gdf[gdf["Year"] == year]
    geosource = GeoJSONDataSource(geojson=df.to_json())
    return geosource

In [10]:
# Plotting the choropleth for the Democracy Index:
def choropleth_demo(df): 
    palette = brewer['RdYlGn'][9][::-1]
    cmap = LinearColorMapper(palette=palette)
    # Plotting choropleth:
    plot = figure(title=f'Democracy Index',
                  plot_height=400,
                  plot_width=800)
    countries = plot.patches("xs", "ys", source=df,
                             fill_color={"field":"Democracy Index", "transform":cmap},
                             line_color="black", line_width = 0.25, fill_alpha=1)
    # Formatting:
    plot.xgrid.grid_line_color = None
    plot.ygrid.grid_line_color = None
    plot.axis.visible = False
    # Adding the hover tooltip:
    plot.add_tools(HoverTool(renderers=[countries],
                             tooltips=[('Country','@Country'),
                                       ('Democracy Index','@{Democracy Index}')]))
    # Adding the legend:
    color_bar = ColorBar(color_mapper=cmap, width=8,  location=(0,0), title="Democracy Index")
    plot.add_layout(color_bar, 'left')
    return plot

In [11]:
# Plotting choropleths for indicators:
def choropleth_var(Variable, df):
    # Reversing (or not) the palette depending on the indicator:
    if Variable in ("Democracy Index", "GDP per capita, PPP",
                    "Foreign Investment", "Life Expectancy",
                    "Expected Years of Schooling", "Expected Years of Schooling, Female",
                    "Expected Years of Schooling, Male", "Forest Area", "EPI", "HDI"):
        palette = brewer['RdYlGn'][9][::-1]
    else: 
        palette = brewer['RdYlGn'][9] 
    cmap = LinearColorMapper(palette=palette)
    # Plotting choropleth:
    plot = figure(title=f'{Variable}',
                  plot_height=400,
                  plot_width=800)
    countries = plot.patches("xs", "ys", source=df,
                             fill_color={"field":Variable, "transform":cmap},
                             line_color="black", line_width = 0.25, fill_alpha=1)
    # Formatting:
    plot.xgrid.grid_line_color = None
    plot.ygrid.grid_line_color = None
    plot.axis.visible = False
    # Adding the hover tooltip:
    plot.add_tools(HoverTool(renderers=[countries],
                             tooltips=[('Country','@Country')]))
    # Adding the legend:
    color_bar = ColorBar(color_mapper=cmap, width=8,  location=(0,0), title=f'{Variable}')
    plot.add_layout(color_bar, 'left')
    return plot

In [12]:
# Creating dictionary of variables and years for which data is available:
years = []
for col in cols:
    years.append(list(combined[combined[col].notna()]["Year"].unique()))
avail_years = {col:years for col, years in zip(cols, years)}

In [13]:
# Making the slider (for years) dependent on the dropdown menu (for indicators)
widget_one = Dropdown(options=avail_years.keys())
widget_two = IntSlider(max=2020)
# Function for updating slider based on dropdown menu option chosen.
def update_slider(*args):
    widget_two.min = min(avail_years[widget_one.value])
    widget_two.max = max(avail_years[widget_one.value])
    widget_two.value = max(avail_years[widget_one.value])
widget_one.observe(update_slider)

In [14]:
# Plotting the interactive choropleth:
@interact(Variable=widget_one, Year=widget_two)
def plot_choropleth(Variable, Year): 
    geosource2 = pick_year(gdf=combined_gdf, year=Year)
    plot_demo = choropleth_demo(df=geosource2)
    plot_var = choropleth_var(Variable=Variable, df=geosource2)
    show(column(plot_demo, plot_var))

interactive(children=(Dropdown(description='Variable', options=('GDP per capita, PPP', 'Unemployment, total', …

## Line Plots

In [15]:
# Retrieving country names:
countries = combined['Country'].unique()

In [16]:
# Comparing the Democracy Index of individual countries with the global average:
def compare(df, country_name):
    plot = figure(title=f'Democracy Index ({country_name})', x_axis_label='Year',y_axis_label='Democracy Index', plot_height=300, plot_width=350, y_range=(0,10))
    # Caculating the global average for the Democracy Index:
    y1 = df['Democracy Index'].groupby(df['Year']).mean()
    # Finding the Democracy Index for a given country in a given year:
    y2 = df[df['Country']==country_name][['Year','Democracy Index']]
    dt = {'xs':[list(y1.index),y2['Year']], 'ys':[y1,y2['Democracy Index']], 'label': ['World Average',country_name], 'col':["green", "orange"]}
    plot.multi_line(xs='xs', ys='ys', color='col', legend_group = 'label',line_width = 3, source=dt) 
    return plot

In [17]:
# Plotting the changing trends for both Democracy Index and a given indicator:
def year_change(df, country_name, ind):
    plot = figure(title=f'Variation Trend ({country_name})', x_axis_label='Year',y_axis_label='Number of Standard Deviations', plot_height=300, plot_width=500, y_range=(-3,3))
    data = df[df['Country']==country_name]
    # Standardizing the data to compare the changing trends:
    y1 = list( (data['Democracy Index']-data['Democracy Index'].mean())/data['Democracy Index'].std())
    y2 = list( (data[ind]-data[ind].mean())/data[ind].std())
    dt = {'xs':[data['Year'],data['Year']], 'ys':[y1,y2], 'label': ['Democracy Index',ind], 'col':["firebrick", "navy"]}
    plot.multi_line(xs='xs', ys='ys', color='col', legend_group = 'label',line_width = 3, source=dt)
    return plot

In [18]:
@interact(Country=countries, Indicator=cols)
def make_plot_for(Country=countries[0], Indicator=cols[0]):
    plot1 = compare(combined, Country)
    plot2 = year_change(combined, Country, Indicator)
    show(row(plot1, plot2))

interactive(children=(Dropdown(description='Country', options=('Canada', 'United States', 'Austria', 'Belgium'…

## Scatter Plot: Relationship between Democracy Index score, GDP, and HDI score

In [19]:
# Calculating the log of GDP per capita (PPP):
combined['log_GDP'] = [log(s) for s in combined['GDP per capita, PPP']]

In [20]:
def big_scatter_plot(df, yr):
    df_year = df[df['Year']==yr]
    # Making the colors of the points dependent on the Democracy Index score:
    mapper = linear_cmap(field_name='Democracy Index', palette=Spectral6 ,low=df_year['Democracy Index'].min(),high=df_year['Democracy Index'].max())
    # Likewise, the size of the points depends on the Democracy Index score:
    size_mapper=LinearInterpolator(x=[df_year['Democracy Index'].min(),df_year['Democracy Index'].max()],y=[3,30])
    plot = figure(title = "Democracy Index, GDP per capita (PPP), and HDI", 
               x_axis_label='GDP per capita (PPP)',y_axis_label='Human Development Index',
               plot_height=450, plot_width=800,
               x_range=(6,12), y_range=(0,1.2),
               tools="hover", tooltips="@Country: Democracy Index score of @{Democracy Index}")
    plot.scatter('log_GDP','HDI',source=df_year,fill_alpha=0.6, 
              size={'field':'Democracy Index','transform': size_mapper},
              line_color=mapper,color=mapper)
    # Adding the legend (color bar):
    color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0),title="Democracy Index")
    plot.add_layout(color_bar, 'right')
    return plot

In [21]:
years = (2010, 2019)
@interact(Year = years)
def draw_big_sp(Year = years[0]):
    plot = big_scatter_plot(df=combined, yr=Year)
    show(plot)
    return plot

interactive(children=(IntSlider(value=2010, description='Year', max=2019, min=2010), Output()), _dom_classes=(…

## Scatter Plot: Comparing Democracy Index scores against all indicators

In [22]:
combined['region'] = combined['region'].fillna('Unknown')
index_cmap = factor_cmap('region', palette=Spectral5, 
                         factors=sorted(combined['region'].unique()))

In [23]:
# Different variables have data from different years, 
# so we need to make the dropdown menu for year selection depend on the indicator chosen.
options = {'GDP per capita': list(range(2010, 2021)),
            'Unemployment': list(range(2010, 2021)),
           'GINI Index': list(range(2010, 2020)),
           'Foreign Investment': list(range(2010, 2021)),
           'Maternal Mortality ratio': list(range(2010, 2018)),
           'Life Expectancy': list(range(2010, 2020)),
           'Expected Years of Schooling': list(range(2010, 2020)),
           'Expected Years of Schooling, Female': list(range(2010, 2020)),
           'Expected Years of Schooling, Male': list(range(2010, 2020)),
           'Mortality Rate, Infant': list(range(2010, 2020)),
           'CO2 Emissions': list(range(2010, 2019)),
           'Forest Area': list(range(2010, 2021)),
           'Unemployment rate (female to male ratio)': list(range(2010, 2020)),
           'EPI': [2020],
           'HDI': list(range(2010, 2020))}
first_widget = Dropdown(options=options.keys())
second_widget = Dropdown()
def update(*args):
    second_widget.options = options[first_widget.value]
first_widget.observe(update)

In [24]:
def plot_small_scatter(df, variable, year):
    df2 = df.rename({'Democracy Index': 'Democracy_Index',
                     'GDP per capita, PPP': 'GDP per capita',
                     'Unemployment, total': 'Unemployment'}, axis='columns')
    df_Year = df2[df2['Year']==year]
    Year = ColumnDataSource(data=df_Year)
    p = figure(plot_width=700, plot_height=450, title = 'Democracy Index x '+ variable.replace('_', ' '), toolbar_location=None,
          tools="hover", tooltips="@Country: Democracy Index score of @Democracy_Index")
    p.scatter('Democracy_Index', variable, source=Year, fill_alpha=0.6, size=16, fill_color=index_cmap, legend_group='region')
    p.yaxis.axis_label = variable.replace('_', ' ')
    p.xaxis.axis_label = 'Democracy Index'
    p.legend.location = "top_left"
    return p

In [25]:
@interact(Variable=first_widget, Year=second_widget)
def make_small_scatter(Variable, Year):
    plot = plot_small_scatter(df=combined, variable=Variable, year=Year)
    show(plot)

interactive(children=(Dropdown(description='Variable', options=('GDP per capita', 'Unemployment', 'GINI Index'…