In [104]:
import os
import pandas as pd
import json
import folium
from ipywidgets.widgets import interact

We first import the topojson

In [105]:
topojson_europe_path = 'topojson/europe.topojson.json'
topojson_europe = json.load(open(topojson_europe_path))

To improve map interaction, we want to add markers one each country, so that we can display the exact data showed on the map. To do so, we have downloaded a csv of all the countries with the position of their respective capital. We check that the countries in the topojson match the countries in the csv, and we rename the countries that mismatch.
Also, we remove the 'United States' row as it cause a shift in the csv, and we do not need it.
We also remove Vatican city because we don't have data for this country, and it hides Italy data on the map.

Link for the csv: http://techslides.com/list-of-countries-and-capitals

In [106]:
# csv for capital locations preprocessing
#http://techslides.com/list-of-countries-and-capitals
countries = pd.read_csv('./europe_data/country-capitals.csv')
countries.index = countries['CountryName']
for i in range(len(topojson_europe['objects']['europe']['geometries'])):
    name = topojson_europe['objects']['europe']['geometries'][i]['properties']['NAME']
    if not name in countries.index:
        print(name)

countries = countries[countries['CapitalLatitude'] != ' D.C.'].rename({'Moldova':'Republic of Moldova',
                                                                      'Macedonia': 'The former Yugoslav Republic of Macedonia'})

The former Yugoslav Republic of Macedonia
Republic of Moldova
Holy See (Vatican City)


Here we define all the functions implemented to output the map we want.

1. opacity_nan function allows to determine if we have data for a specified country. If there's some data, an opacity of 0 is defined for the country, and if there's no data, an opacity of 1. With this opacity, we then attribute a grey layer (in the function put_nan_layer) on every country, that finally appears only on coutries with no data. It allows our map to well show for which country we have unavailable data.

2. get_value_or_NaN returns the value of a field, or an 'Unavailable data' message when the field is empty or doesn't exist. This value will be displayed in markers.

3. get_clean_df returns a clean panda dataframe given the path of the excel file we want to load.

4. put_nan_layer puts the grey layer, calling the opacity_nan_function

5. generate_map creates the map we want for a given excel path and a year. We can specify if we want the markers displayed or not (by default we do not display it). It calls the choroplet built-in function to draw the density map, and then set the NaN layer before adding the markers if asked.

6. generate_growth_map does the same as generate_map but shows the evolution between two years passed as arguments.



In [107]:
def opacity_nan(df, country,year):
    if not country in df[year].dropna().index:
        return 1
    else:
        return 0
    
def get_value_or_NaN(df, country, year):
    if (country in df.index):
        if (pd.notnull(df.loc[country][year])):
            return str(df.loc[country][year]) + '%'
    return 'Unavailable data'

def get_clean_df(excel_path,title):
    raw_excel = pd.read_excel(excel_path).dropna()
    df = raw_excel.set_index(raw_excel[title].values).drop(title,axis=1)
    df.columns = df.iloc[0]
    df = df.drop('geo\\time',axis=0).replace(':','NaN').astype(float).rename({
                          'Former Yugoslav Republic of Macedonia, the' :'The former Yugoslav Republic of Macedonia'})
    return df

def put_nan_layer(m, df, year):
    folium.TopoJson(
        topojson_europe,
        'objects.europe',
        style_function=lambda feature: {
            #not feature['properties']['NAME'] in df_total_unemployment.index:
            'fillColor':  '#5b5858',
            'fillOpacity': opacity_nan(df, feature['properties']['NAME'],year),
            }
        ).add_to(m)
    
def generate_map(excel_path, title, legend_title, year, markers=False):
    
    df= get_clean_df(excel_path, title)
    
    m_europe = folium.Map([60,30],zoom_start=3.4,min_zoom=3.)
    
    m_europe.choropleth(
        geo_data= topojson_europe,
        topojson = 'objects.europe',
        name= legend_title,
        data = df,
        columns=[df.index, year],
        key_on='feature.properties.NAME',
        fill_color='YlOrRd',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name= legend_title +' (%)'
    )
    
    put_nan_layer(m_europe, df, year)
    
    folium.LayerControl().add_to(m_europe)
    
    if markers:
        for i in range(len(topojson_europe['objects']['europe']['geometries'])):
            name = topojson_europe['objects']['europe']['geometries'][i]['properties']['NAME']
            if name in countries.index:
                folium.Marker([countries['CapitalLatitude'].astype(float).loc[name], countries['CapitalLongitude'].astype(float).loc[name]],
                      popup=name + ': ' + get_value_or_NaN(df, name, year),
                      icon=folium.Icon(icon='cloud')
                      ).add_to(m_europe)
    
    return m_europe

def generate_growth_map(excel_path, title, legend_title, year_start, year_end, markers=False):
    
    df= get_clean_df(excel_path, title)
    
    m_europe = folium.Map([60,30],zoom_start=3.4,min_zoom=3.)
    
    year = year_start + '-' + year_start
    df[year] = (df[year_end] - df[year_start])*-1
    
    m_europe.choropleth(
        geo_data= topojson_europe,
        topojson = 'objects.europe',
        name= legend_title,
        data = df,
        columns=[df.index, year],
        key_on='feature.properties.NAME',
        fill_color= 'RdBu',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name= legend_title +' (%)'
    )
    
    put_nan_layer(m_europe, df, year)
    
    folium.LayerControl().add_to(m_europe)
    
    if markers:
        for i in range(len(topojson_europe['objects']['europe']['geometries'])):
            name = topojson_europe['objects']['europe']['geometries'][i]['properties']['NAME']
            if name in countries.index:
                folium.Marker([countries['CapitalLatitude'].astype(float).loc[name], countries['CapitalLongitude'].astype(float).loc[name]],
                      popup=name + ': ' + get_value_or_NaN(df, name, year),
                      icon=folium.Icon(icon='cloud')
                      ).add_to(m_europe)
    
    return m_europe

This function defines the maps we can query. It is the function that will be used in the interact widget.

In [108]:
def query_map(query_name, sex, year, markers=0):
    year = str(year) #allows to pass year as int argument
    if query_name == 'Unemployment':
        f = open('./europe_data/unemployment_description.txt', 'r')
        print(f.read())
        f.close()
        return generate_map('./europe_data/unemployment_15_74_'+ sex +'.xls',title='Unemployment rate by sex',
             legend_title=sex+' Unemployment in ' + year, year=year, markers=markers)

    if query_name == 'Young people Inactivity':
        f = open('./europe_data/no_activity_15_24_people_description.txt', 'r')
        print(f.read())
        f.close()
        return generate_map('./europe_data/no_activity_15_24_people_'+ sex +'.xls',
             title='Young people neither in employment nor in education and training by sex',
             legend_title=sex + ' Young people Inactivity in ' + year, year=year, markers=markers)

    if query_name == 'Long Term Unemployment':
        f = open('./europe_data/long_term_unemployment_description.txt', 'r')
        print(f.read())
        f.close()
        return generate_map('./europe_data/long_term_unemployment_'+ sex +'.xls',
             title='Long-term unemployment rate by sex',
             legend_title=sex + ' Long Term Unemployment in ' + year, year=year, markers=markers)
    
    

Here is the widget that allows to show maps we managed to create. We can display maps for global unemployment ratio, Young people inactivity and Long Term Unemployment. For each of these maps, we can choose a sex and a year and display the relative map. A description of how is calculated the ratio for each request is displayed when running the widget.
We recommend to open it in python notebook to have the choice in all the maps available, and to get the description.
Otherwise, here are some maps we generated:

[Total unemployment in 2016 (with markers)](./europe_data/tot_unempl_2016_mark.html)

[Male unemployment in 2016 (with markers):](./europe_data/male_unempl_2016_mark.html)

[Female unemployment in 2016 (with markers):](./europe_data/female_unempl_2016_mark.html)

[Total Young people Inactivity in 2016 (with markers):](./europe_data/total_young_inactivity_2016_mark.html)

[Total Long Term Unemployment in 2016 (with markers):](./europe_data/total_long_term_2016_mark.html)

In [109]:
mymap = query_map(query_name='Unemployment',sex='Female',year=2016, markers=1)

mymap.save('./europe_data/female_unempl_2016_mark.html')

Unemployment rate represents unemployed persons as a percentage of the labour force. The labour force is the total number of people employed and unemployed. Unemployed persons comprise persons aged 15 to 74 who were: 
a. without work during the reference week,
b. currently available for work, i.e. were available for paid employment or self-employment before the end of the two weeks following the reference week, 
c. actively seeking work, i.e. had taken specific steps in the four weeks period ending with the reference week to seek paid employment or self-employment or who found a job to start later, i.e. within a period of, at most, three months.
The indicator is based on the EU Labour Force Survey.



In [111]:
interact(query_map, query_name=['Unemployment','Young people Inactivity', 'Long Term Unemployment'],
                         sex=['Total','Male','Female'],
                        year=range(2005,2017),markers=[0,1])

<function __main__.query_map>

In [112]:
def query_growth_map(query_name, sex, year_start, year_end, markers=0):
    year_start = str(year_start)
    year_end = str(year_end)
    if (int(year_start) < int(year_end)):
        if query_name == 'Employment Growth':
                f = open('./europe_data/unemployment_description.txt', 'r')
                print(f.read())
                f.close()
                return generate_growth_map('./europe_data/unemployment_15_74_'+ sex +'.xls',title='Unemployment rate by sex',
                     legend_title=sex+' Employment growth between ' + year_start + ' and ' + year_end,
                                    year_start=year_start, year_end=year_end, markers=markers)
        
        if query_name == 'Young people Activity Growth':
                f = open('./europe_data/unemployment_description.txt', 'r')
                print(f.read())
                f.close()
                return generate_growth_map('./europe_data/no_activity_15_24_people_'+ sex +'.xls',title='Young people neither in employment nor in education and training by sex',
                     legend_title=sex+' Young Activity growth between ' + year_start + ' and ' + year_end, year_start=year_start, year_end=year_end, markers=markers)

Below is defined a new widget allowing to choose two different years and to see how a country improved its employment ratio in this time laps.

We managed to have a 'positive' color (blue) when the employment ratio increased in a country, and a 'negative' color (red) when the employment ratio increased. The scale being automatically defined in choroplet built-in function, we couldn't fix the zero to white color.
It maybe would have been judicious to define our own choroplet function in order to be more precise on the color map choice.

Here is an example of the maps we can obtain (if you're not running the python notebook and hence can't have access to the widget):

[Difference of employment ratio between 2005 and 2016 (with markers):](./europe_data/total_growth_employment_2005_2016_mark.html)


In [113]:
interact(query_growth_map, query_name=['Employment Growth','Young people Activity Growth'], sex=['Total','Male','Female'],
        year_start=range(2005,2017), year_end=range(2005,2017),markers=[0,1])

<function __main__.query_growth_map>