# Get geolocations for universities

In [1]:
# import packages

import numpy as np
import pandas as pd
import geopandas as gpd
import geopy

In [2]:
# import university data

universities = pd.read_csv('../data/universities.csv', sep = ';')
universities.head()

Unnamed: 0,University,Street,Number,Postal code,City,Staff_number
0,Wageningen University,Droevendaalsesteeg,4,6708PB,WAGENINGEN,3247
1,Radboud University,Houtlaan,4,6525XZ,NIJMEGEN,3903
2,University of Groningen,Broerstraat,5,9712CP,GRONINGEN,5146
3,Maastricht University,Minderbroedersberg,4,6211LK,MAASTRICHT,4565
4,Open University,Valkenburgerweg,177,6419AT,HEERLEN,668


In [3]:
# concatenate columns into 1 addres column

universities['Address'] = universities['Street'] + ' ' + universities['Number'].astype(str) + ', ' + universities['Postal code'] + ', ' + universities['City'] + ', Nederland'
universities.head()

Unnamed: 0,University,Street,Number,Postal code,City,Staff_number,Address
0,Wageningen University,Droevendaalsesteeg,4,6708PB,WAGENINGEN,3247,"Droevendaalsesteeg 4, 6708PB, WAGENINGEN, Nede..."
1,Radboud University,Houtlaan,4,6525XZ,NIJMEGEN,3903,"Houtlaan 4, 6525XZ, NIJMEGEN, Nederland"
2,University of Groningen,Broerstraat,5,9712CP,GRONINGEN,5146,"Broerstraat 5, 9712CP, GRONINGEN, Nederland"
3,Maastricht University,Minderbroedersberg,4,6211LK,MAASTRICHT,4565,"Minderbroedersberg 4, 6211LK, MAASTRICHT, Nede..."
4,Open University,Valkenburgerweg,177,6419AT,HEERLEN,668,"Valkenburgerweg 177, 6419AT, HEERLEN, Nederland"


In [5]:
# get geo coordinates from addresses

from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="my-example")

from geopy.extra.rate_limiter import RateLimiter

# 1 - function to delay between geocoding calls
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# 2- - create location column
universities['Location'] = universities['Address'].apply(geocode)

# 3 - create longitude, laatitude and altitude from location column (returns tuple)
universities['point'] = universities['Location'].apply(lambda loc: tuple(loc.point) if loc else None)

# 4 - split point column into latitude, longitude and altitude columns
universities[['latitude', 'longitude', 'altitude']] = pd.DataFrame(universities['point'].tolist(), index=universities.index)


In [6]:
# drop unnecessary columns
universities.drop(columns =['Street', 'Number', 'Postal code', 'City', 'Location', 'point', 'altitude'])

Unnamed: 0,University,Staff_number,Address,latitude,longitude
0,Wageningen University,3247,"Droevendaalsesteeg 4, 6708PB, WAGENINGEN, Nede...",51.986389,5.668009
1,Radboud University,3903,"Houtlaan 4, 6525XZ, NIJMEGEN, Nederland",51.816893,5.865491
2,University of Groningen,5146,"Broerstraat 5, 9712CP, GRONINGEN, Nederland",53.219246,6.563101
3,Maastricht University,4565,"Minderbroedersberg 4, 6211LK, MAASTRICHT, Nede...",50.847129,5.686418
4,Open University,668,"Valkenburgerweg 177, 6419AT, HEERLEN, Nederland",50.878702,5.95765
5,TU Eindhoven,3606,"Den Dolech 2, 5612AZ, EINDHOVEN, Nederland",51.447888,5.485949
6,Tilburg University,2144,"Warandelaan 2, 5037AB, TILBURG, Nederland",51.563249,5.042238
7,University van Amsterdam,5697,"Spui 21, 1012WX, AMSTERDAM, Nederland",52.368585,4.890201
8,VU Amsterdam,4354,"De Boelelaan 1105, 1081HV, AMSTERDAM, Nederland",52.333962,4.865197
9,University Twente,3251,"Drienerlolaan 5, 7522NB, ENSCHEDE, Nederland",52.23918,6.856275


In [None]:
# save new file with geolocations

universities.to_csv('universities.csv')