In [None]:
# Import libraries
import os
import pandas as pd
import json
import numpy as np
import folium
from ipywidgets import interact


#pd.options.mode.chained_assignment = None  # default='warn'
%matplotlib inline

# Exercise 2
## Setting up the DataFrame of the Swiss unemployment

In [None]:
swiss_unemployment_data = pd.read_csv('swiss_unemployment_rate_year.csv', encoding = 'utf-16', nrows=52, header = [0, 1], index_col = [0,1])

In [None]:
swiss_unemployment_data.replace("...", np.nan, inplace = True)

In [None]:
swiss_unemployment_data.index.names = ['Canton', 'Sexe']
swiss_unemployment_data

We can remark that the numbers contained in the DataFrame above contain the apostrophe character which makes them impossible to convert as float. We thus write a small script to remove that character from the swiss_unemployment DataFrame.
Then we convert every DataFrame entries to int except from the 'Canton' column which is let as string and the 'Taux de chômage' column which is let as float.

In [None]:
swiss_unemployment_data = swiss_unemployment_data.replace(["'"],'', regex=True)


Now we convert the values of the columns to numerical values in order to be able to apply numerical operations on them.

In [None]:
for i in swiss_unemployment_data.columns:
    if (i[1] != "Taux de chômage" and i[1] != "Taux de chômage des jeunes"):
        swiss_unemployment_data[i] = swiss_unemployment_data[i].astype('int')
    
    if (i[1] == "Taux de chômage des jeunes"):
        swiss_unemployment_data[i] = swiss_unemployment_data[i].astype('float')


Now we calculate the size of the active population and the size of the active young popultation. To do so we refer to the definition of the unemployment rate according to the Swiss Confederation which states that it is the ratio of people looking for a job divided by the size of the active population. The size of the active popultation is thus obtained by dividing the number of people looking for a job ('Demandeurs d'emploi') by the unemployment rate ('Taux de chômage'). We will assign a new column for each period investigated in the swiss_unemployment_data DataFrame.

In [None]:
def active_population_column(df):
    month = 'Janvier'
    for i, year in enumerate(range(2005, 2018)):
        period_name = month + " " + str(year)
        df[period_name, 'Active population'] = \
            (df[period_name, "Demandeurs d'emploi"].astype('int') \
            .divide(0.01 * df[period_name, 'Taux de chômage'])).astype('int')
        df[period_name, 'Active population'] = df[period_name, 'Active population'].apply(np.round) 
        
        df[period_name, 'Active young population'] = \
            (df[period_name, "Chômeurs jeunes"].astype('float') \
            .divide(0.01 * df[period_name, 'Taux de chômage des jeunes'])).astype('float')
        df[period_name, 'Active young population'] = df[period_name, 'Active young population'].apply(np.round)
active_population_column(swiss_unemployment_data)

Now we are going to add a new row per canton to show the all the statistics gathering women ('Femmes') and men ('Hommes')  categories. For the unemployment rate of young people for both sexes we will assume that the total active population is representative of the active population of young people and average 

In [None]:
tot_df = swiss_unemployment_data.groupby('Canton').apply(np.sum)
for i in range(0, 26):

    for i, year in enumerate(range(2005, 2018)):
        month = 'Janvier'
        period = month + ' ' + str(year)
        tot_df[period, 'Taux de chômage'] = 100*tot_df[period, "Demandeurs d'emploi"]\
                                            .divide(tot_df[period, 'Active population'])
        tot_df[period, 'Taux de chômage'] = tot_df[period, 'Taux de chômage'].apply(np.round, decimals=1)
        
        tot_df[period, 'Taux de chômage des jeunes'] = 100*tot_df[period, "Chômeurs jeunes"]\
                                            .divide(tot_df[period, 'Active young population'], fill_value = np.nan)
        tot_df[period, 'Taux de chômage des jeunes'] = tot_df[period, 'Taux de chômage des jeunes'].apply(np.round, decimals=1)
        #tot_df[tot_df[period, 'Taux de chômage des jeunes'].apply(np.isinf)].loc[:,(period, 'Taux de chômage des jeunes')] = np.nan
        tot_df.loc['Appenzell Rhodes-Intérieures', (period, 'Taux de chômage des jeunes')] = np.nan
        
tot_df['Sexe'] = ['Both']*26
tot_df = tot_df.set_index('Sexe',append = True)

swiss_unemployment_data = swiss_unemployment_data.append(tot_df)

Now that we have a clean DataFrame with data for every cantons and for every genre we are going to compute extra unemployment rates based on different definitions from the one used by the Swiss Confederation. 
We are going to compute the unemployment rate for
- 'Long range unemployed' people ('Chômeurs de longue durée')
- 'Registered unemployed' people ('Chômeurs inscrits')

In [None]:
for i, year in enumerate(range(2005, 2018)):
    month = 'Janvier'
    period = month + ' ' + str(year)
    swiss_unemployment_data[period, 'Long range unemployment rate'] = \
                                                            swiss_unemployment_data[period, 'Chômeurs de longue durée']\
                                                            .divide(swiss_unemployment_data[period, 'Active population'])
    
    swiss_unemployment_data[period, 'Registered unemployment rate'] = \
                                                            swiss_unemployment_data[period, 'Chômeurs inscrits']\
                                                            .divide(swiss_unemployment_data[period, 'Active population'])

            


We know sort the columns in order to have them all sorted according to the year

In [None]:
swiss_unemployment_data.sort_index(axis=1, level = [0, 1], inplace = True)
swiss_unemployment_data = swiss_unemployment_data.sort_index()

In [None]:
swiss_unemployment_data

## Matching of cantons name between DataFrame and TopoJson

We load a TopoJson in order to define the cantons of Switzerland.

In [None]:
ch_cantons_path = 'topojson/ch-cantons.topojson.json'
ch_cantons_json = json.load(open(ch_cantons_path))
canton_names_json = [ch_cantons_json['objects']['cantons']['geometries'][i]['properties']['name'] for i in range(26)]
canton_names_json

Looking at the way the cantons are named in the JSON file above, we can point out that the canton names are not specified in the exact same way as in the swiss_unemployment_data DataFrame that we created earlier. Therefore the idea now is to print the canton names specified in the JSON and to compare it with the one used as index in the DataFrame.

Firstly we get the number of cantons specified in the JSON file. There are actually 26 cantons which suits with the data we have in the swiss_unemployment_rate DataFrame.

In [None]:
len(ch_cantons_json['objects']['cantons']['geometries'])

Now we compare all the names of the JSON along with the names of our DataFrame. It shows the changes that we are going to apply to our DataFrame indexes.

In [None]:
canton_names_json.sort(reverse = False)
[(canton_names_json[i], " -> ",
 swiss_unemployment_data.index[3*i][0]) for i in range(26)]

We can remark that the order in which the canton names are specified in the JSON is approximately the same as the one used for indexing the swiss_unemployment_data DataFrame except from a few names that need to permute like 'Aargau' that needs to be inserted after 'Appenzell Innerrhoden' and 'Bern/Berne' that needs to be inserted before 'Basel-Landschaft'. Therefore we are going to carry out these slight modifications and then we are going to replace the index names of the DataFrame with the canton names specified in the JSON.

In [None]:
# Modification of the indexing order of swiss_unemployment_data
# Changing Argovie's position
swiss_unemployment_data = swiss_unemployment_data.iloc[6:9].append(swiss_unemployment_data.iloc[0:6])\
                            .append(swiss_unemployment_data.iloc[9:])
    
# Changing Bern's position
swiss_unemployment_data = swiss_unemployment_data.iloc[:9].append(swiss_unemployment_data.iloc[12:18])\
                            .append(swiss_unemployment_data.iloc[9:12]).append(swiss_unemployment_data[18:])

Below we just check that the matching between the canton names of the swiss_unemployment_data DataFrame and the ones of the JSON is perfect.

In [None]:
[(canton_names_json[i], " -> ",
 swiss_unemployment_data.index[3*i][0]) for i in range(26)]


Now we will add a column to the swiss_unemployment_rate DataFrame that specifies the canton names as they are defined in the TopoJson. It will allow to easily link the DataFrame to the latter.

In [None]:
redim_ch_cantons_json = []
for i in range(26):
    redim_ch_cantons_json += [canton_names_json[i]]*3

swiss_unemployment_data['Json canton'] = redim_ch_cantons_json
cols = swiss_unemployment_data.columns.tolist()
swiss_unemployment_data =swiss_unemployment_data[[cols[-1]] + cols[:-1]]

In [None]:
swiss_unemployment_data.head()

## Creation of an interactive map
The interactive map that is developed in this section is a folium map centered on coordinates [46.8414, 8.24523] which is more or less the center of Switzerland. The map is shown below.

In [None]:
swiss_map = folium.Map(location=[46.8414, 8.24523], zoom_start = 8, max_zoom = 9, min_zoom = 7)
swiss_map

We are now going to design a function called `map_creator` that takes various parameters from the DataFrame as argument and that prints out a chloropleth map of the unemployment rate per Swiss canton. The user can interact with the map to choose the parameters that suits him/her thanks to the ipywidget library. Three parameters can be defined: 
- The year (between 2005 and 2017)
- The sex (Both, Female, Male)
- The unemployment computation method

In [None]:
def map_creator(year, sex, attribute):
    # sex translation
    if (sex == 'Male'):
        sex = 'Hommes'
    elif (sex == 'Female'):
        sex = 'Femmes'
    
    # attribute translation
    if (attribute == 'Unemployment rate'):
        attribute = 'Taux de chômage'
    elif (attribute == 'Young unemployment rate'):
        attribute = 'Taux de chômage des jeunes'
    
    # Declaration of the swiss map
    swiss_map = folium.Map(location=[46.8414, 8.24523], zoom_start = 8, max_zoom = 9, min_zoom = 7)
    
    # Definition of the DataFrame according to the sex attribute
    # Firstly we compute the Inequality Male-Female if it is required by the user
    if (attribute == 'Inequality Male - Female'):
        attribute = 'Taux de chômage'
        #data_male = swiss_unemployment_data[swiss_unemployment_data.index.get_level_values(level = 1)=='Hommes']
        data_female = swiss_unemployment_data[swiss_unemployment_data.index.get_level_values(level = 1)=='Femmes'].copy()
        data = swiss_unemployment_data[swiss_unemployment_data.index.get_level_values(level = 1)=='Hommes'].copy()
        female_serie = data_female['Janvier ' + str(year), attribute]
        male_serie = data['Janvier ' + str(year), attribute]
        data['Janvier ' + str(year), attribute] = female_serie.values - male_serie.values
    # Oterwise we get the right DataFrame according to the asked sex
    else:
        data = swiss_unemployment_data[swiss_unemployment_data.index.get_level_values(level = 1)==sex]
    
    # Here is the creation of the chloropleth
    swiss_map.choropleth(
    geo_data=ch_cantons_json,
    topojson = 'objects.cantons',
    name='choropleth',
    data=data,
    columns=['Json canton', ('Janvier ' + str(year), attribute)],
    key_on='feature.properties.name',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Unemployment Rate (%)')
    return swiss_map

Now you can try the `map_creator` function for different parameters, feel free to change the year, the sex or the computation method.

In [None]:
attribute_list = ['Unemployment rate', 'Young unemployment rate', 'Registered unemployment rate',\
                  'Long range unemployment rate', 'Inequality Male - Female']

interact(map_creator, Year=range(2005, 2018), Sex=['Both', 'Male', 'Female'], Computation_method = attribute_list)

## Map gallery
Some preconfigured map configurations are shown below. Especially:
- The unemployment rate as defined by the Swiss confederation (number of people looking for a job divided by the number of active people)
- The unemployment rate without counting for people already having a job (number of people registered as unemployed divided by the number of active people)
- The unemployment rate of young people
- The long range unemployment rate

### The unemployment rate as defined by the Swiss confederation

#### For both genders

In [None]:
new_map = map_creator(2017, 'Both', 'Taux de chômage')
new_map

#### For male

In [None]:
map_creator(2017, 'Hommes', 'Taux de chômage')

#### For female

In [None]:
map_creator(2017, 'Femmes', 'Taux de chômage')

### The unemployment rate without counting for people already having a job

#### For both genders

In [None]:
map_creator(2017, 'Both', 'Registered unemployment rate')

We can remark that the difference between counting all people looking for a job and only counting for people who do not have a job is not very significative. Indeed the color of some cantons changes but the general trend remains the same.

### The unemployment rate of young people

#### For both genders

In [None]:
map_creator(2017, 'Both', 'Young unemployment rate')

### The long range unemployment rate 

In [None]:
map_creator(2017, 'Both', 'Long range unemployment rate')