In [1]:
# imports (may be more than needed)
import pandas as pd
import numpy as np
import glob # to find all files in folder
from datetime import datetime
from datetime import date, time
from dateutil.parser import parse
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import show
import itertools
import requests
from bs4 import BeautifulSoup
import json

%matplotlib inline
sns.set_context('notebook')
pd.options.mode.chained_assignment = None  # default='warn'


In [39]:
# set to true if you want to write the data to csv
do_persist = False
def write_to_csv(df, file_name):
    if do_persist:
        df.to_csv(file_name)
        print('done writing')
        return True
    else:
        return False

In [3]:
data = pd.read_csv('data/GrantExport.csv', delimiter=';')
data.dtypes

﻿"Project Number"                int64
Project Title                   object
Project Title English           object
Responsible Applicant           object
Funding Instrument              object
Funding Instrument Hierarchy    object
Institution                     object
University                      object
Discipline Number                int64
Discipline Name                 object
Discipline Name Hierarchy       object
Start Date                      object
End Date                        object
Approved Amount                 object
Keywords                        object
dtype: object

In [4]:
data.head()

Unnamed: 0,"﻿""Project Number""",Project Title,Project Title English,Responsible Applicant,Funding Instrument,Funding Instrument Hierarchy,Institution,University,Discipline Number,Discipline Name,Discipline Name Hierarchy,Start Date,End Date,Approved Amount,Keywords
0,1,Schlussband (Bd. VI) der Jacob Burckhardt-Biog...,,Kaegi Werner,Project funding (Div. I-III),Project funding,,Nicht zuteilbar - NA,10302,Swiss history,Human and Social Sciences;Theology & religious...,01.10.1975,30.09.1976,11619.0,
1,4,Batterie de tests à l'usage des enseignants po...,,Massarenti Léonard,Project funding (Div. I-III),Project funding,Faculté de Psychologie et des Sciences de l'Ed...,Université de Genève - GE,10104,Educational science and Pedagogy,"Human and Social Sciences;Psychology, educatio...",01.10.1975,30.09.1976,41022.0,
2,5,"Kritische Erstausgabe der ""Evidentiae contra D...",,Kommission für das Corpus philosophorum medii ...,Project funding (Div. I-III),Project funding,Kommission für das Corpus philosophorum medii ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10101,Philosophy,Human and Social Sciences;Linguistics and lite...,01.03.1976,28.02.1985,79732.0,
3,6,Katalog der datierten Handschriften in der Sch...,,Burckhardt Max,Project funding (Div. I-III),Project funding,Abt. Handschriften und Alte Drucke Bibliothek ...,Universität Basel - BS,10302,Swiss history,Human and Social Sciences;Theology & religious...,01.10.1975,30.09.1976,52627.0,
4,7,Wissenschaftliche Mitarbeit am Thesaurus Lingu...,,Schweiz. Thesauruskommission,Project funding (Div. I-III),Project funding,Schweiz. Thesauruskommission,"NPO (Biblioth., Museen, Verwalt.) - NPO",10303,Ancient history and Classical studies,Human and Social Sciences;Theology & religious...,01.01.1976,30.04.1978,120042.0,


In [5]:
# take only the relevant cols and give them nicer names
grants = data[['University', 'Approved Amount']]
grants.rename(columns={'University': 'university', 'Approved Amount' : 'amount'}, inplace=True)
grants.dtypes

university    object
amount        object
dtype: object

In [6]:
# drop nulls
len(grants)

63969

In [7]:
grants = grants.replace(to_replace='Nicht zuteilbar - NA', value=np.nan)
grants = grants.dropna()
len(grants)

48393

In [8]:
grants['amount'] = pd.to_numeric(grants.amount, errors='coerce')

In [9]:
universities = grants.groupby(by='university', axis=0, as_index=False).sum()
universities.sort_values('amount', ascending=False)

Unnamed: 0,university,amount
70,Université de Genève - GE,1.838237e+09
68,Universität Zürich - ZH,1.826843e+09
6,ETH Zürich - ETHZ,1.635597e+09
65,Universität Bern - BE,1.519373e+09
64,Universität Basel - BS,1.352251e+09
71,Université de Lausanne - LA,1.183291e+09
5,EPF Lausanne - EPFL,1.175316e+09
69,Université de Fribourg - FR,4.575262e+08
72,Université de Neuchâtel - NE,3.832046e+08
39,"NPO (Biblioth., Museen, Verwalt.) - NPO",3.341306e+08


In [10]:
delim = ' - '
universities['university_name'] = [fn.split(delim)[0].strip() for fn in universities['university'].values]
universities['abbrev'] = [fn.split(delim)[1].strip() if len(fn.split(delim)) > 1 else np.nan for fn in universities['university'].values]
universities.set_index('university', inplace=True)
universities.head(1)

Unnamed: 0_level_0,amount,university_name,abbrev
university,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AO Research Institute - AORI,3435621.0,AO Research Institute,AORI


In [11]:
# run the python file that defines the functions to access the api's
%run map_universities.py

In [12]:
# create a new column with the canton in it.
def canton_for_university_query(uni):
    q = str(uni.university_name) + ' ' + str(uni.abbrev) + ' switzerland'
    # remove some characters
    to_remove = ['(', ')', ',', '.', '-', '+', '&']
    [q.replace(ch, ' ') for ch in to_remove]
    return canton_for_university(q)

universities['canton'] = universities.apply(canton_for_university_query, axis=1)
len(universities['canton']) - sum(pd.isnull(universities['canton']))

***************************************
query: AO Research Institute AORI switzerland
{'results': [], 'html_attributions': [], 'status': 'ZERO_RESULTS'}
{'results': [], 'html_attributions': [], 'status': 'ZERO_RESULTS'}
***************************************
query: Allergie- und Asthmaforschung SIAF switzerland
{'results': [{'geometry': {'location': {'lng': 9.8200409, 'lat': 46.7954192}, 'viewport': {'southwest': {'lng': 9.81931865, 'lat': 46.79532020000001}, 'northeast': {'lng': 9.82028165, 'lat': 46.79571619999999}}}, 'formatted_address': 'Obere Str. 22, 7270 Davos Platz, Switzerland', 'types': ['point_of_interest', 'establishment'], 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/generic_business-71.png', 'place_id': 'ChIJL3RyxgGkhEcR24v33tCoi5A', 'name': 'Schweiz. Institut f. Allergie- u. Asthmaforschung', 'id': 'ea435d78b7507556ed24683ca26f040f93a8d840', 'reference': 'CmRSAAAAni8nyzfHFwOwEodapiCFEL9anYzWlx8ttwPGNuyBQunBIZONV1j9_B0GgBAnOPR89m34BCCudBgoFX8PRA42w-eUOH5kYL



{'results': [{'geometry': {'location': {'lng': 9.026636499999999, 'lat': 46.1968913}, 'viewport': {'southwest': {'lng': 9.026546699999997, 'lat': 46.19671865}, 'northeast': {'lng': 9.026905900000001, 'lat': 46.19694885}}}, 'formatted_address': 'Viale Officina 3, Bellinzona, Switzerland', 'photos': [{'width': 3200, 'photo_reference': 'CoQBdwAAAL_NSTF_cr_12x4IbOy5zVG4ZCYMpRwd5UfRveokVFlTO2tdi21qc18afgCFclJGiTbVTFDu-Wkb27qURmPaeALGzlBO5vLJbTlR5jwLTf_oFgScndb5PvO2GcnE6eC9MAUuee242joi4NYOVuKK6Ir1Oof5tHz2ywlZ1C-UB09QEhD9atlqWqdsZrPCnPm_Ci1EGhQ5E3B_YWgIGsDGm7Tr_NKtsdhp9g', 'html_attributions': ['<a href="https://maps.google.com/maps/contrib/113223259282903616271/photos">Daniel Vasile Tishchenko</a>'], 'height': 2106}], 'types': ['hospital', 'point_of_interest', 'establishment'], 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/doctor-71.png', 'place_id': 'ChIJe7akFTNKhEcRKvjBP5odqdA', 'name': 'Ente Ospedaliero Cantonale', 'id': 'a9a3d6bfc2b8c10377892588918227aa403b7fd9', 'reference'



***************************************
query: Pädagogische Hochschule Schaffhausen PHSH switzerland
{'results': [{'geometry': {'location': {'lng': 8.645286199999997, 'lat': 47.7073984}, 'viewport': {'southwest': {'lng': 8.644982900000002, 'lat': 47.70726765000001}, 'northeast': {'lng': 8.645387299999998, 'lat': 47.70779065}}}, 'formatted_address': 'Ebnatstrasse 80, 8200 Schaffhausen, Switzerland', 'types': ['point_of_interest', 'establishment'], 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/school-71.png', 'place_id': 'ChIJaz2FyteBmkcRVaBtCVv5Zqg', 'name': 'Pädagogische Hochschule Schaffhausen (PHSH)', 'id': '2dd708f308987b7b0155b763b1f39ee53e03cdc0', 'reference': 'CmRSAAAAnTYGCzJiHRQbF7V_2ZDtbEQHlyUu4MpQDYMCcsqb9OvetEEqbaPfHev4pnIzynWaLGXjxKCrtUeJFqiQnRFi1v2rkKDtb15p6MzvhJrzxN9AWVyoy9tjocs9vUde7epxEhBefbxuUVEFj7QtSo4sLKjwGhRRcOjuuz8YVUH9c-MRVR10EO4XeQ'}], 'html_attributions': [], 'status': 'OK'}
{'results': [{'geometry': {'location': {'lng': 8.645286199999997, 'lat': 47.

61

In [13]:
len(universities[~pd.isnull(universities['canton'])])

61

In [14]:
universities[pd.isnull(universities['canton'])]

Unnamed: 0_level_0,amount,university_name,abbrev,canton
university,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AO Research Institute - AORI,3435621.0,AO Research Institute,AORI,
Eidg. Material und Prüfungsanstalt - EMPA,58574520.0,Eidg. Material und Prüfungsanstalt,EMPA,
Firmen/Privatwirtschaft - FP,111686700.0,Firmen/Privatwirtschaft,FP,
Forschungsanstalten Agroscope - AGS,33115720.0,Forschungsanstalten Agroscope,AGS,
Forschungskommission SAGW,100000.0,Forschungskommission SAGW,,
Istituto Svizzero di Roma - ISR,141000.0,Istituto Svizzero di Roma,ISR,
"NPO (Biblioth., Museen, Verwalt.) - NPO",334130600.0,"NPO (Biblioth., Museen, Verwalt.)",NPO,
Physikal.-Meteorolog. Observatorium Davos - PMOD,12098440.0,Physikal.-Meteorolog. Observatorium Davos,PMOD,
Pädagogische Hochschule Nordwestschweiz - PHFHNW,3476142.0,Pädagogische Hochschule Nordwestschweiz,PHFHNW,
Schweizer Kompetenzzentrum Sozialwissensch. - FORS,34735820.0,Schweizer Kompetenzzentrum Sozialwissensch.,FORS,


### map by hand (using google & wikipedia):
- Schweizer Kompetenzzentrum Sozialwissensch. -> lausanne -> VD
- Weitere Institute -> translates to 'other institutes' -> nan
- Forschungsanstalten Agroscope -> not in one place -> nan
- Haute école pédagogique BE, JU, NE -> situated in JU but belongs to BE, JU & NE -> JU or 1/3 for each?
- Swiss Institute of Bioinformatics -> all over the place -> nan
- Firmen/Privatwirtschaft -> similar to 'other institutions' -> nan
- Forschungsinstitut für Opthalmologie -> in Sitten -> VS
- Eidg. Forschungsanstalt für Wald,Schnee,Land -> all over the place -> nan
- Istituto Svizzero di Roma -> in ROM (italy) -> nan
- Pädag. Hochschule Tessin (Teilschule SUPSI) -> TI
- Pädagogische Hochschule Nordwestschweiz -> office in Windisch -> AG
- Physikal.-Meteorolog. Observatorium Davos -> GR
- Instituto Ricerche Solari Locarno -> TI
- Staatsunabh. Theologische Hochschule Basel -> BS
- Fachhochschule Nordwestschweiz (ohne PH) -> same as 'Pädagogische Hochschule Nordwestschweiz' -> AG
- Forschungskommission SAGW -> does not exist anymore -> nan
- NPO (Biblioth., Museen, Verwalt.) -> several institutions -> nan
- Swiss Center for Electronics and Microtech. -> Neuchâtel -> NE
- Eidg. Material und Prüfungsanstalt -> all over the place (BE, ZH, SG) -> nan
- Weitere Spitäler -> several hospitals -> nan
- 'AO Research Institute - AORI' -> Davos -> GR

In [15]:
universities[pd.isnull(universities['canton'])].index

Index(['AO Research Institute - AORI',
       'Eidg. Material und Prüfungsanstalt - EMPA',
       'Firmen/Privatwirtschaft - FP', 'Forschungsanstalten Agroscope - AGS',
       'Forschungskommission SAGW', 'Istituto Svizzero di Roma - ISR',
       'NPO (Biblioth., Museen, Verwalt.) - NPO',
       'Physikal.-Meteorolog. Observatorium Davos - PMOD',
       'Pädagogische Hochschule Nordwestschweiz - PHFHNW',
       'Schweizer Kompetenzzentrum Sozialwissensch. - FORS',
       'Staatsunabh. Theologische Hochschule Basel - STHB',
       'Swiss Institute of Bioinformatics - SIB', 'Weitere Institute - FINST',
       'Weitere Spitäler - ASPIT', 'Zürcher Fachhochschule (ohne PH) - ZFH'],
      dtype='object', name='university')

In [29]:
universities.at['Weitere Spitäler - ASPIT', 'canton']

nan

In [31]:
# do the mapping
manual_map = {
        'Schweizer Kompetenzzentrum Sozialwissensch. - FORS' : 'VD',
        'Pädag. Hochschule Tessin (Teilschule SUPSI) - ASP' : 'TI',
        'Pädagogische Hochschule Nordwestschweiz - PHFHNW' : 'AG',
        'Physikal.-Meteorolog. Observatorium Davos - PMOD' : 'GR',
        'Instituto Ricerche Solari Locarno - IRSOL' : 'TI',
        'Staatsunabh. Theologische Hochschule Basel - STHB' : 'BS',
        'Zürcher Fachhochschule (ohne PH) - ZFH' : 'AG',
        'AO Research Institute - AORI' : 'GR'
    }
for uni_index, ctn in manual_map.items():
    if pd.isnull(universities.at[uni_index, 'canton']):
        universities.set_value(uni_index, 'canton', ctn)
universities[pd.isnull(universities['canton'])]

Unnamed: 0_level_0,amount,university_name,abbrev,canton
university,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Eidg. Material und Prüfungsanstalt - EMPA,58574520.0,Eidg. Material und Prüfungsanstalt,EMPA,
Firmen/Privatwirtschaft - FP,111686700.0,Firmen/Privatwirtschaft,FP,
Forschungsanstalten Agroscope - AGS,33115720.0,Forschungsanstalten Agroscope,AGS,
Forschungskommission SAGW,100000.0,Forschungskommission SAGW,,
Istituto Svizzero di Roma - ISR,141000.0,Istituto Svizzero di Roma,ISR,
"NPO (Biblioth., Museen, Verwalt.) - NPO",334130600.0,"NPO (Biblioth., Museen, Verwalt.)",NPO,
Swiss Institute of Bioinformatics - SIB,11583220.0,Swiss Institute of Bioinformatics,SIB,
Weitere Institute - FINST,9256736.0,Weitere Institute,FINST,
Weitere Spitäler - ASPIT,10749810.0,Weitere Spitäler,ASPIT,


In [32]:
# how many did we match?
print(str(round((100/ len(universities) ) * len(universities[~pd.isnull(universities['canton'])])) )+ '%')

88%


In [41]:
write_to_csv(universities, 'uni_mappings/unis_mapped(54)_25_10_2016.csv')

done writing


True

In [34]:
canton_grants = universities.groupby(by='canton', axis=0).sum()
canton_grants.sort_values('amount', ascending=False)

Unnamed: 0_level_0,amount
canton,Unnamed: 1_level_1
ZH,3597297000.0
VD,2401656000.0
GE,1877102000.0
BE,1555048000.0
BS,1392498000.0
FR,459073700.0
NE,401897600.0
AG,171031000.0
TI,115262300.0
SG,91194100.0


In [40]:
write_to_csv(canton_grants, 'all_canton_grants.csv')

done writing


True

In [36]:
# total grants of unmapped unis
unmatched_grants = universities[pd.isnull(universities['canton'])]['amount'].sum()
unmatched_grants

569338301.61

In [38]:
# which is how many % of all grants?
total_grants = universities.amount.sum()
print(str(round((100/ total_grants ) * unmatched_grants, 2) )+ '%')

4.44%
