## Task 3

In [4]:
import pandas as pd
import folium
import json
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
UNEMPLOYMENT_RATE_BY_NATIONALITY = 'data/Unemployment_Rate_Nationality-1year.xlsx'
UNEMPLOYMENT_RATE_BY_AGE = 'data/Unemployment_Rate_Age-1year.xlsx'
UNEMPLOYMENT_RATE_COMBINED = 'data/Unemployment_Rate-Age+Nationality.xlsx'

https://python-visualization.github.io/folium/quickstart.html

https://github.com/wrobstory/vincent

https://altair-viz.github.io/

http://nbviewer.jupyter.org/gist/BibMartin/4b9784461d2fa0d89353

http://jeffpaine.github.io/geojson-topojson/

### Data processing

In [6]:
'''
This function is used to load and clean the .xlsx data on unemployment rates in a generalized manner.
Function returns the dataframe ready for use and analysis, with specified index and pruned data.

Parameters:
path -- path to the excel file containing the necessary data

Returns:
df -- processed dataframe
'''
# describe how the data was obtained for replicability!
def get_dataframe_rate(path, cols_to_drop, rename_pair, new_index, drop_last=True):
    df = pd.read_excel(path, convert_float=False)
    df.drop(cols_to_drop, axis=1, inplace=True) # drop the unnecesary data
    if drop_last: # drop the last row, usually representing the total
        df.drop([0,len(df)-1], axis=0, inplace=True) # drop total values, we need cantonal values only
    else:
        df.drop([0], axis=0, inplace=True) # no total values present, we drop only header text
    df.rename(columns=rename_pair, inplace=True)
    #df.set_index(new_index, inplace=True) # set multiindex 
    
    return df

'''
This function is a shortcut to get the pruned dataframe for the unemployment rate by nationality.

Parameters:
path -- path to the excel file containing the data for unemployment rate by nationality
'''
def get_dataframe_rate_nationality(path=UNEMPLOYMENT_RATE_BY_NATIONALITY):
    return get_dataframe_rate(path, ['Mois'], {"Nationalité":'Nationality'},['Canton', 'Nationality'])

'''
This function is a shortcut to get the pruned dataframe for the unemployment rate by age.

Parameters:
path -- path to the excel file containing the data for unemployment rate by age
'''
def get_dataframe_rate_age(path=UNEMPLOYMENT_RATE_BY_AGE):
    return get_dataframe_rate(path, ['Mois', 'Unnamed: 2'], {"Classes d'âge 15-24, 25-49, 50 ans et plus":'Age category'}, ['Canton', 'Age category'])

'''
This function is a shortcut to get the pruned dataframe for the unemployment rate by composite data with age and nationality.

Parameters:
path -- path to the excel file containing the data for unemployment rate by age and nationality
'''
def get_dataframe_rate_combined(path=UNEMPLOYMENT_RATE_COMBINED):
    return get_dataframe_rate(path, ['Unnamed: 3', 'Monat'], {"Altersklassen 15-24, 25-49, 50 und mehr":'Age category', 
                                                              "Nationalität":'Nationality', "Kanton":'Canton'},
                                       ['Canton', 'Nationality', 'Age category'], drop_last=False)

In [7]:
u_rate_nationality = get_dataframe_rate_nationality()
u_rate_age = get_dataframe_rate_age()
u_rate_combined = get_dataframe_rate_combined()

In [8]:
#u_rate_nationality.loc[('Zurich', 'Suisses'),'Janvier 2017']

## Map viz

In [9]:
ch_topo = json.load(open('topojson/ch-cantons.topojson.json'))

In [10]:
canton_id = [canton['id'] for canton in ch_topo['objects']['cantons']['geometries']]

In [11]:
canton_id_name_fr = zip(canton_id, u_rate_nationality.reset_index()['Canton'].drop_duplicates())

In [12]:
canton_id_name_de = zip(canton_id, u_rate_combined.reset_index()['Canton'].drop_duplicates())

In [13]:
cantons_pairs_fr = {'Canton':{pair[1]: pair[0] for pair in canton_id_name_fr}}
cantons_pairs_de = {'Canton':{pair[1]: pair[0] for pair in canton_id_name_de}}

In [14]:
cantons_pairs_de

{'Canton': {'Aargau': 'AG',
  'Appenzell Ausserrhoden': 'AR',
  'Appenzell Innerrhoden': 'AI',
  'Basel-Landschaft': 'BL',
  'Basel-Stadt': 'BS',
  'Bern': 'BE',
  'Freiburg': 'FR',
  'Genf': 'GE',
  'Glarus': 'GL',
  'Graubünden': 'GR',
  'Jura': 'JU',
  'Luzern': 'LU',
  'Neuenburg': 'NE',
  'Nidwalden': 'NW',
  'Obwalden': 'OW',
  'Schaffhausen': 'SH',
  'Schwyz': 'SZ',
  'Solothurn': 'SO',
  'St. Gallen': 'SG',
  'Tessin': 'TI',
  'Thurgau': 'TG',
  'Uri': 'UR',
  'Waadt': 'VD',
  'Wallis': 'VS',
  'Zug': 'ZG',
  'Zürich': 'ZH'}}

In [15]:
m_switzerland = folium.Map([46.8,8.3], tiles='Mapbox Bright', zoom_start=8)
m_switzerland

In [16]:
def replace_canton_with_id(dataframe, canton_pairs):
    #old_index = dataframe.index.names
    replaced_df = dataframe.reset_index().replace(to_replace=canton_pairs)
    return replaced_df#.set_index(old_index)

In [17]:
u_rate_age = replace_canton_with_id(u_rate_age, cantons_pairs_fr)
u_rate_nationality = replace_canton_with_id(u_rate_nationality, cantons_pairs_fr)
u_rate_combined = replace_canton_with_id(u_rate_combined, cantons_pairs_de)

In [18]:
m_switzerland.choropleth(geo_data=ch_topo, 
                         name="Suisses", 
                         topojson='objects.cantons',
                         data = u_rate_nationality[u_rate_nationality.Nationality=='Suisses'],
                         columns = ['Canton','Novembre 2016'],
                         key_on='feature.id',
                         fill_color='YlGn', 
                         fill_opacity=0.5, 
                         line_opacity=0.2,
                         legend_name="Unemployment Rate (%)"
                        )

m_switzerland.choropleth(geo_data=ch_topo, 
                         name="Etrangers", 
                         topojson='objects.cantons',
                         data = u_rate_nationality[u_rate_nationality.Nationality=='Etrangers'],
                         columns = ['Canton','Novembre 2016'],
                         key_on='feature.id',
                         fill_color='YlGn', 
                         fill_opacity=0.5, 
                         line_opacity=0.2,
                         legend_name="Unemployment Rate (%)"
                        )


folium.LayerControl().add_to(m_switzerland)

<folium.map.LayerControl at 0x1e2c0418da0>

In [19]:
m_switzerland

In [20]:
#folium.Map.save(m_switzerland, "map.html")

In [21]:
#from IPython.display import HTML
#HTML("map.html")

http://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/TimeSliderChoropleth.ipynb

In [22]:
import ipywidgets as widgets
from IPython.core.display import HTML

In [23]:
u_rate_nationality.head(1)

Unnamed: 0,index,Canton,Nationality,Octobre 2016,Novembre 2016,Décembre 2016,Janvier 2017,Février 2017,Mars 2017,Avril 2017,Mai 2017,Juin 2017,Juillet 2017,Août 2017,Septembre 2017,Total
0,1,ZH,Etrangers,5.8,6.1,6.4,6.6,6.5,6.3,6,5.7,5.6,5.5,5.3,5.3,5.9


In [24]:
english2french_month = {
    'Oct':'Octobre',
    'Nov':'Novembre',
    'Dec':'Décembre',
    'Jan':'Janvier',
    'Feb':'Février',
    'Mar':'Mars',
    'Apr':'Avril',
    'May':'Mai',
    'Jun':'Juin',
    'Jul':'Juillet',
    'Aug':'Août',
    'Sep':'Septembre'
}

In [25]:
def show_data(x):
    month = x.strftime('%b')
    year = x.strftime('%Y')
    str_date = english2french_month[month]+' '+year
    
    m_switzerland = folium.Map([46.8,8.3], tiles='Mapbox Bright', zoom_start=8)
    
    m_switzerland.choropleth(geo_data=ch_topo, 
                         name="Suisses", 
                         topojson='objects.cantons',
                         data = u_rate_nationality[u_rate_nationality.Nationality=='Suisses'],
                         columns = ['Canton',str_date],
                         key_on='feature.id',
                         fill_color='YlGn', 
                         fill_opacity=0.5, 
                         line_opacity=0.2,
                         legend_name="Unemployment Rate (%)"
                        )

    m_switzerland.choropleth(geo_data=ch_topo, 
                         name="Etrangers", 
                         topojson='objects.cantons',
                         data = u_rate_nationality[u_rate_nationality.Nationality=='Etrangers'],
                         columns = ['Canton',str_date],
                         key_on='feature.id',
                         fill_color='YlGn', 
                         fill_opacity=0.5, 
                         line_opacity=0.2,
                         legend_name="Unemployment Rate (%)"
                        )


    folium.LayerControl().add_to(m_switzerland)
    
    display(HTML('<h3>'+str_date+'</h3>'))
    display(m_switzerland)

In [26]:
def f(x):
    show_data(x)

In [27]:
import datetime
dates = [datetime.date(2016,i,1) for i in range(10,13)]
dates += [datetime.date(2017,i,1) for i in range(1,10)]

In [28]:
options = [(i.strftime('%b-%Y'), i) for i in dates]

In [29]:
w = widgets.SelectionSlider(
    options=options,
    description='Select month',
    disabled = False,
    readout = True,
    continuous_update=False
)

In [30]:
widgets.interact(f, x=w)

<function __main__.f>

In [31]:
import copy
# https://github.com/topojson/topojson/wiki/Introduction
def create_canton_topos(originalTopoJSON):
    canton = []
    for geometry in originalTopoJSON["objects"]["cantons"]["geometries"]:
        tmp_topo = copy.deepcopy(originalTopoJSON)
        tmp_topo["objects"]["cantons"]["geometries"]=[geometry]
        #tmp_topo["arcs"] = 
        
        canton.append(tmp_topo)
              
        #print(tmp_topo["objects"]["cantons"]["geometries"][0]['id'])
        
        
    return canton

In [32]:
canton_topos = create_canton_topos(ch_topo)

In [33]:
import vincent
vincent.core.initialize_notebook()

In [34]:
tmp = u_rate_age[u_rate_age.Canton=='ZH']['Septembre 2017']

In [35]:
def make_pie_age(canton, date):
    d = {}
    cnt = 0
    keys = ['15-24','25-49','50+']
    
    for el in u_rate_age[u_rate_age.Canton==canton][date]:
        d[keys[cnt]] = el
        cnt += 1
    
    pie = vincent.Pie(d, width=100, height=100)
    pie.legend('Age range')
    pie_json = pie.to_json()
    pie_dict = json.loads(pie_json)
    
    return pie_dict

def make_pie_nationality(canton, date):
    d = {}
    cnt = 0
    keys = ['Foreign','Swiss']
    
    for el in u_rate_nationality[u_rate_nationality.Canton==canton][date]:
        d[keys[cnt]] = el
        cnt += 1
        
    pie = vincent.Pie(d, width=100, height=100)
    pie.legend('Nationality')
    pie_json = pie.to_json()
    pie_dict = json.loads(pie_json)
    
    return pie_dict

In [36]:
def make_grouped_bar_combined(canton, date):
    foreigners = {}
    swiss = {}
        
    cnt = 0
    keys = ['15-24','25-49','50+']
    
    df_canton = u_rate_combined[u_rate_combined.Canton==canton].replace('...',-1)
    df_foreign = df_canton[df_canton.Nationality=='Ausländer']
    df_swiss = df_canton[df_canton.Nationality=='Schweizer']

    
    for el in df_foreign[date]:
        if(el!=-1):
            foreigners[keys[cnt]] = el
        cnt += 1
        
    cnt = 0
    for el in df_swiss[date]:
        if(el!=-1):
            swiss[keys[cnt]] = el
        cnt += 1
        
    data = [foreigners, swiss]
    index = ['Foreigners', 'Swiss']
    
    bar = vincent.GroupedBar(pd.DataFrame(data, index=index))
    bar.legend(title='Unemployment rate by nationality and age')
    bar.axis_titles(x='Nationality', y='Unemployment rate')
    bar.common_axis_properties(title_size=10)
    bar.width = 250
    bar.height = 200
    
    bar_json = bar.to_json()
    bar_dict = json.loads(bar_json)
    #bar.display()
    return bar_dict

http://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/Popups.ipynb

In [37]:
import branca

In [38]:
#http://nbviewer.jupyter.org/github/python-visualization/folium/tree/master/examples/
#http://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/Popups.ipynb

m_switzerland = folium.Map([46.8,8.3], tiles='Mapbox Bright', zoom_start=8)

cnt = 0

for canton in canton_topos:
    #print(canton["objects"]["cantons"]["geometries"][0]['id'])
    
    cnt += 1
    
    #if(cnt>5):
    #    break
    
    name = canton["objects"]["cantons"]["geometries"][0]['id'];
    
    tj = folium.TopoJson(canton, 
               'objects.cantons',
               name=name)

    #v_pie_age = folium.Vega(make_pie_age(name,'Septembre 2017'), width=500, height=300)
    #v_pie_nationality = folium.Vega(make_pie_nationality(name,'Septembre 2017'), width=500, height=300, left=150)
    v_bar_combined = folium.Vega(make_grouped_bar_combined(name,'September 2017'), width=500, height=250)
    
    popup = folium.Popup(max_width=500)
    
    popup.add_child(v_bar_combined)
    #popup.add_child(child=v_pie_nationality)
    
    tj.add_child(popup)
    tj.add_to(m_switzerland)

#folium.LayerControl().add_to(m_switzerland)
#display(m_switzerland)

In [39]:
# map is too big, but browser can render it
folium.Map.save(m_switzerland, "map.html")

In [40]:
from bokeh.charts import Donut, show, output_file, Scatter, Bar
from bokeh.sampledata.olympics2014 import data
import bokeh

def bokeh_pie_nationality(canton, date):
    df_canton = u_rate_nationality[u_rate_nationality.Canton==canton]
    
    d = Donut(df_canton, values=date, label=['Nationality'], text_font_size='12pt', hover_text='Unemployment by nationality',
             height = 220, width=220)
    
    d.toolbar.disabled = True
    d.toolbar.logo = None
    d.toolbar_location = None
    d.title.text = "Unemployment (%) by nationality"
    
    return d

The bokeh.charts API has moved to a separate 'bkcharts' package.

This compatibility shim will remain until Bokeh 1.0 is released.
After that, if you want to use this API you will have to install
the bkcharts package explicitly.

  warn(message)


In [41]:
def bokeh_pie_age(canton, date):
    df_canton = u_rate_age[u_rate_age.Canton==canton]
    
    d = Donut(df_canton.replace({'Age category': {1.0: '15-24', 2.0: '25-49', 3.0:'50+'}}), values=date, label=['Age category'], text_font_size='12pt', hover_text='Unemployment by age',
             height = 220, width=220)
    
    d.toolbar.disabled = True
    d.toolbar.logo = None
    d.toolbar_location = None
    d.title.text = "Unemployment (%) by age"
    
    return d

http://bokeh.pydata.org/en/0.11.0/docs/user_guide/charts.html

In [42]:
def bokeh_bar_combined(canton, date):
    df_canton = u_rate_combined[u_rate_combined.Canton==canton].replace('...',0)
    
    d = bokeh.charts.Bar(df_canton.replace({'Age category': {1.0: '15-24', 2.0: '25-49', 3.0:'50+'}}), values=date,
                         label=['Age category'], group=['Nationality'], legend='top_right')
    
    d.toolbar.disabled = True
    d.toolbar.logo = None
    d.toolbar_location = None
    d.title.text = "Unemployment % by age category, nationality"
    d.yaxis.axis_label = "Unemployment rate [%]"
    d.axis.axis_label_text_font_size = '12pt'
    d.title.align = 'center'
    d.title.text_font_size = '12pt'
    d.xaxis.major_label_text_font_size = '12pt'
    d.height = 350
    d.width = 440
    
    return d
    

In [43]:
from bokeh.embed import file_html
from bokeh.resources import CDN

In [44]:
#http://nbviewer.jupyter.org/github/python-visualization/folium/tree/master/examples/
#http://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/Popups.ipynb

m_switzerland = folium.Map([46.8,8.3], tiles='Mapbox Bright', zoom_start=8)

cnt = 0

for canton in canton_topos:

    cnt += 1
    #if(cnt>2):
    #    break
    
    name = canton["objects"]["cantons"]["geometries"][0]['id'];
    
    tj = folium.TopoJson(canton, 
               'objects.cantons',
               name=name)

    v_pie_age = bokeh_pie_nationality(name, 'Septembre 2017')
    html_age = file_html(v_pie_age, CDN, 'age'+name+str(cnt))
    
    v_pie_nationality = bokeh_pie_age(name, 'Septembre 2017')
    html_nationality = file_html(v_pie_nationality, CDN, 'nationality'+name+str(cnt))
    
    v_bar_grouped = bokeh_bar_combined(name, 'September 2017')
    html_grouped = file_html(v_bar_grouped, CDN, 'grouped'+name+str(cnt))
    
    # get the IFrames for both
    age = branca.element.IFrame(html=html_age, width=100, height=100)
    nationality = branca.element.IFrame(html=html_nationality, width=100, height=100)
    grouped = branca.element.IFrame(html=html_grouped, width=200, height=100)
    
    # combined IFrame
    #print(html_age)
    combined_html = '<p style="font-family: Verdana; text-align: center;"> Statistics for canton '+name+'</p>'\
    +'<figure style="width:90; max-width:90; max-height:90; float:left;">'+html_age+'</figure>'\
    +'<figure style="width:90; max-width:90; max-height:90; float:right;">'+html_nationality+'</figure>'\
    +'<figure style="width:95; max-width:95; max-height:95; float:left; padding-top:100;">'+html_grouped+'</figure>'
    
    
    combined = branca.element.IFrame(html=combined_html, width=450, height=380)
    
    popup = folium.Popup(combined, max_width=450)
    #popup.add_child(age)
    
    tj.add_child(popup)
    tj.add_to(m_switzerland)

#display(m_switzerland)
# map is too big, but browser can render it
folium.Map.save(m_switzerland, "map-full.html")

In [45]:
from IPython.display import HTML
HTML("map-full.html")

In [46]:
HTML("map.html")