# HW3: Where is the Röstigraben?

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests as rq

## Data loading and preparation

In [2]:
# Defining NaN values:
na = ['Nicht zuteilbar - NA',       # Not classifiable
      'data not included in P3']    # Missing value
# Getting the data:
df = pd.read_csv('data/P3_GrantExport.csv', delimiter=';', na_values=na, usecols=[0,7,13], index_col=0)
# Universities not classable or not within Switzerland:
no_chance = ['NPO (Biblioth., Museen, Verwalt.) - NPO',
             'Firmen/Privatwirtschaft - FP',
             'Weitere Institute - FINST',
             'Weitere Spitäler - ASPIT',
             'Istituto Svizzero di Roma - ISR']
df = df.replace(no_chance, np.NAN)
df = df[df['Approved Amount'].notnull() & df['University'].notnull()] # Retain only relevant info
# The 26 swiss cantons:
cantons = pd.read_csv('data/cantons_codes.csv', header=None, names=['Name'])

In [3]:
unis = df['University'].drop_duplicates().dropna()

In [5]:
# Using Google to retrieve missing canton and all locations:
# pip install python-google-places
from googleplaces import GooglePlaces, types, lang
from keys import GOOGLE_KEY
google_places = GooglePlaces(GOOGLE_KEY)

def search_google(place):
    place.get_details()
    location = place.geo_location
    for address in place.details['address_components']:
        canton = address['short_name']
        if canton in cantons.values:
            return canton, location
    return None, location

locations = {}
try:
    locations = pd.read_json('data/unis.json').to_dict()
except:
    for name in unis.values:
        if name in locations.keys():
            continue
        r = google_places.text_search(query=name.split(' - ')[0]+' Schweiz')
        if r and r.places:
            canton, location = search_google(r.places[0]) # 1st result from query
            locations.update({name:{'canton':canton,'location':location}})
    manual = { 
    #'Eidg. Material und Prüfungsanstalt':{'canton':'ZH','location':{'lat':47.429403 ,'lng':8.614036}},
    'Physikal.-Meteorolog. Observatorium Davos - PMOD':{'canton':'GR', 'location':{'lat':46.814241, 'lng':9.844508}},
    'Forschungsinstitut für Opthalmologie - IRO':{'canton':'VS', 'location':{'lat':46.233131,  'lng':7.383104}},
    'Swiss Institute of Bioinformatics - SIB':{'canton':'VD', 'location':{'lat':46.519433, 'lng':6.574533}},
    'Pädag. Hochschule Tessin (Teilschule SUPSI) - ASP':{'canton':'TI', 'location':{'lat':46.023528, 'lng':8.917150}},
    'Staatsunabh. Theologische Hochschule Basel - STHB':{'canton':'BL', 'location':{'lat':47.577821, 'lng':7.650187}},
    'Forschungskommission SAGW':{'canton':'LU', 'location': {'lat':47.050179, 'lng':8.312586}}, 
    }
    pd.DataFrame(dict(locations,**manual)).to_json('data/unis.json')

In [None]:
df['Canton'] = np.NAN
for i, row in df.iterrows():
    canton = locations[row['University']]
    df.iloc[i]['Canton'] = canton

In [7]:
len(locations)

71

In [None]:
# Look at universities on a map
import folium
map1 = folium.Map(location=[46.798333, 8.231944],zoom_start=8)

for uni,v in locations.items():
    l = v['location']
    p = uni
    folium.Marker([l['lat'],l['lng']],popup=p).add_to(map1)

map1.save('swiss_universities.html')

In [None]:
map1