In [None]:
import pandas as pd
import folium 
import seaborn
import json
# Used for downloading and handling files:
import gzip
import urllib
# Used for country IDs
import pycountry
import jenkspy
# Numpy, Scipy, matplotlib
%pylab inline

In [None]:
folium.__version__

# 01 - Unemployment in European countries

As stated on the Eurostat website, the unemployment rate is defined as:
>Unemployment rates represent unemployed persons as a percentage of the labour force. The labour force is the total number of people employed and unemployed. Unemployed persons comprise persons aged 15 to 74 who were: a. without work during the reference week, b. currently available for work, i.e. were available for paid employment or self-employment before the end of the two weeks following the reference week, c. actively seeking work, i.e. had taken specific steps in the four weeks period ending with the reference week to seek paid employment or self-employment or who found a job to start later, i.e. within a period of, at most, three months. This table does not only show unemployment rates but also unemployed in 1000 and as % of the total population.

### With xls files

In [None]:
urllib.request.urlretrieve('http://ec.europa.eu/eurostat/tgm/web/_download/Eurostat_Table_tsdec450NoFlagNoDesc_f375eece-81ae-4fc9-b4ef-5ab165fe8c72.xls',
                           'eurostat.xls')
na_values= [':', '(i)', '(p)']
df_eu = pd.read_excel('eurostat.xls', skiprows=range(3), skip_footer=9, 
                          na_values=na_values, header=0)
df_eu = df_eu.rename(columns={'geo\\time': 'country'})
print(df_eu.shape)
df_eu.head(10)

In [None]:
eu_topo_path = r'topojson/europe.topojson.json'
eu_json = json.load(open(eu_topo_path))

In [None]:
m_eu = folium.Map([51, 15], zoom_start=4)
folium.TopoJson(open(eu_topo_path),
                'objects.europe',
                name='topojson',
               ).add_to(m_eu)
m_eu

`Folium` interprets NaNs as 0 values and plots them onto our choropleth map if we don't remove them. Therefore, we'll remove any countries from our `eu_json` that don't have any data for our year of interest, 2016. Although we don't have the value for Switzerland in this dataset, later data from amstat shows that the unemployment rate for switzerland in 2016 was 3.3. We have to be careful here to take the rate which doesn't include people with already employed, so that we compare similar data between the two sets. As we've only got one value, we'll fill it in "by hand".

In [None]:
rm_countries = df_eu[df_eu['2016'].isnull()].loc[:,'country'].tolist()
print(rm_countries)
df_eu.loc[34,'2016'] = 3.3 # todo: get value w/o job holders for comparison
# removing all countries w/o 2016 data from eu_json
eu_json['objects']['europe']['geometries'][:] = [d for d in eu_json['objects']['europe']['geometries'] if d.get('properties').get('NAME') not in rm_countries]
df_eu.loc[:, ['country','2016']]

In [None]:
#q = list(df_eu['2016'].quantile([.05, .235, .655, .774, .9455]))
# We choose to use natural jenks breaks, todo: add some more text about this
breaks = jenkspy.jenks_breaks(df_eu.loc[:,'2016'].dropna().values, nb_class=5)
m_europe = folium.Map(location=[46, 15], tiles='cartodbpositron', zoom_start =4)
m_europe.choropleth(geo_data=eu_json, data=df_eu.reset_index(),
                columns = ['country', '2016'],
                threshold_scale = breaks,
                key_on='feature.properties.NAME',
                topojson='objects.europe',
                fill_color='YlGn',
                fill_opacity=0.7,
                line_opacity=0.2,
                legend_name='Unemployment Rate (%)')

m_europe

### With tsv files ! not working !
Still need to change country code into strings, mismatch between eurostat data and ISO standards in `pycountry`

In [None]:
# Downloading and reading in the data
#urllib.request.urlretrieve(
       # 'http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/tipsun20.tsv.gz', 
        #'tipsun20.tsv.gz')
file = gzip.open('tipsun20.tsv.gz')
df = pd.read_csv(file, sep='\t')

# Re-organising the columns

df = df.rename(columns={'sex,age,unit,geo\\time': 'country'})
#df = df.set_index('country')
df = df[ df.country.str.contains('TOTAL') ] # filter out extra data, only interested in total unemployment rate
df.head()

In [None]:
pycountry.countries.get(alpha_2='GB').name

In [None]:
# WIP
for i in range(len(df.country)):
    try:
        df.loc[i, 'country'] = pycountry.countries.get(alpha_2=df.loc[i, 'country'][15:]).name 
    except:
        print('No country code for:', df.loc[i, 'country'][15:])
    
    #print(df.loc[i, 'country'])

ISO-3166-1993 country codes for Estonia and United Kingdom are: `EE` and `GB`

In [None]:
df[df.loc[:, 'country'].str.contains('EL')].country = 'EL'# = pycountry.countries.get(alpha_2='EE').name 
#df[df.loc[:, 'country'].str.contains('UK'), 'country']# = pycountry.countries.get(alpha_2='GB').name 
df

# 02 - Unemployment in Swiss cantons

In [None]:
canton_topo_path = r'topojson/ch-cantons.topojson.json'
canton_json = json.load(open(canton_topo_path))

In [None]:
m_ch = folium.Map([47,8.3], zoom_start=8)

folium.TopoJson(open(canton_topo_path),
                'objects.cantons',
                name='topojson',
               ).add_to(m_ch)
m_ch

In [None]:
#io = 'Unemployment_Rates_CH_2016.xlsx'
io = 'ch_test.xlsx'
df_ch = pd.read_excel(io, skiprows=range(2), skip_footer=1, header=0)

# There is monthly data available for 2016, but we're only going to keep 
# the yearly average so we can compare to eurostat
df_ch = df_ch.loc[:, ['Canton', 'Total']] 

df_ch = df_ch.dropna()
df_ch['Total'] = df_ch['Total'].astype('float64', copy=False, errors='ignore')

ids = []
for canton in canton_json['objects']['cantons']['geometries']:
    ids.append(canton['id'])
    
# We just need to change order of canton ids so it's the same as in our df
ids.remove('FR')
ids.insert(19 ,'FR')
df_ch['ID'] = ids
df_ch

In [None]:
breaks = jenkspy.jenks_breaks(df_ch.loc[:,'Total'].dropna().values, nb_class=5)
m_switzerland = folium.Map(location=[47.1, 8.39], tiles='cartodbpositron', zoom_start =7)
m_switzerland.choropleth(geo_data=canton_json, data=df_ch,
                columns = ['ID', 'Total'],
                threshold_scale = breaks,
                key_on='feature.id',
                topojson='objects.cantons',
                fill_color='YlGn',
                fill_opacity=0.7,
                line_opacity=0.2,
                legend_name='Unemployment Rate Switzerland (%)')

m_switzerland