In [1]:
import pandas as pd
import numpy as np
import requests

# ADA Lab 3 - Interactive Viz

## Importing the data

We only need two columns of the excel file: University and Approved Amount.

As seen in the P3 documentation we can remove all entries that have a missing value for 'University' because these grants are not given in Switzerland.
After that we remove the remaining values that don't have a value for 'Approved Amount', we can do this without affecting the result because this is only 0.2 % of the data.

In [2]:
# Frame containing all the useful values
fundings = pd.read_excel ('grant.xlsx') [['University', 'Approved Amount']]

In [3]:
# Removing entries with missing values
print(fundings.shape)

fundings_cleaned = fundings.drop(fundings.index[fundings['University'].isnull()])
print(fundings_cleaned.shape)

fundings_final = fundings_cleaned.drop(fundings_cleaned.index[fundings_cleaned['Approved Amount'] == 'data not included in P3'])
print(fundings_final.shape)

(63969, 2)
(50988, 2)
(50878, 2)


## Extracting the canton

The get_canton function takes a university name as input and returns the canton for that university. It first executes a text search on the Google Places API with the university string as a query. Afterwards it uses the returned place id to get the place details which contain the canton.

This function is called for all distinct universities in the dataframe and the University - Canton mapping is saved as .csv.

After adding a couple of cantons manually, the .csv file is loaded in a dataframe and joined with the university list so each entry has a value for canton.

Then a groupby and sum on canton suffices to get the amount of granted money per canton.

In [4]:
def get_canton(university):
    from credentials import places_api_key
    # Get placeid from university name
    try:
        url_text_search = 'https://maps.googleapis.com/maps/api/place/textsearch/json?key={key}&query={query}'.format(
                key=places_api_key,
                query=university
        )
        r=requests.get(url_text_search)

        if len(r.json()['results']) > 0:
            placeid = r.json()['results'][0]['place_id']
            # Get info on placeid
            url_place_search = 'https://maps.googleapis.com/maps/api/place/details/json?key={key}&placeid={placeid}'.format(
                key=places_api_key,
                placeid=placeid
            )
            r2=requests.get(url_place_search)

            # Parse json and return canton
            for el in r2.json()['result']['address_components']:
                if el['types']==['administrative_area_level_1', 'political']:
                    return el['short_name']
        else:
            return None
    except Exception as e:
        print(e)
        return None

In [11]:
# List of all the universities
universities = pd.DataFrame(fundings_final ['University'].unique ())
universities.columns = ['University']
universities = universities[1:]

In [12]:
# Get their cantons
universities['Canton'] = universities['University'].apply(get_canton)
universities

Unnamed: 0,University,Canton
1,Université de Genève - GE,GE
2,"NPO (Biblioth., Museen, Verwalt.) - NPO",
3,Universität Basel - BS,BS
4,Université de Fribourg - FR,FR
5,Universität Zürich - ZH,ZH
6,Université de Lausanne - LA,VD
7,Universität Bern - BE,BE
8,"Eidg. Forschungsanstalt für Wald,Schnee,Land -...",
9,Université de Neuchâtel - NE,NE
10,ETH Zürich - ETHZ,ZH


In [25]:
universities.to_csv('uni_canton_mapping.csv')

In [22]:
cantons = pd.read_csv('uni_canton_mapping.csv')

In [5]:
canton_uni = pd.merge(cantons, fundings_final, left_on='University', right_on='University', how='inner', sort=False)

In [6]:
canton_uni['Approved Amount'] = canton_uni['Approved Amount'].apply(pd.to_numeric)
canton_uni = canton_uni.groupby('Canton').sum().reset_index()
canton_uni.columns = ['Canton', 'Granted Money']

## Building the map

In [32]:
from choropleth import buildChoroplethMap

In [33]:
buildChoroplethMap(canton_uni)

KeyError: 'VD'

## Parameters :

 - dataFrame : the data frame containing canton values
 - scale : color scales on the map
 - outputFileName : the name of the output map file