# Get geolocations for universities

In [None]:
# import packages

import numpy as np
import pandas as pd
import geopandas as gpd
import geopy

In [None]:
# import university data

universities = pd.read_csv('../data/universities.csv', sep = ';')
universities.head()

In [None]:
# concatenate columns into 1 addres column

universities['Address'] = universities['Street'] + ' ' + universities['Number'].astype(str) + ', ' + universities['Postal code'] + ', ' + universities['City'] + ', Nederland'
universities.head()

In [None]:
# get geo coordinates from addresses

from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="my-example")

from geopy.extra.rate_limiter import RateLimiter

# 1 - function to delay between geocoding calls
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# 2- - create location column
universities['Location'] = universities['Address'].apply(geocode)

# 3 - create longitude, laatitude and altitude from location column (returns tuple)
universities['point'] = universities['Location'].apply(lambda loc: tuple(loc.point) if loc else None)

# 4 - split point column into latitude, longitude and altitude columns
universities[['latitude', 'longitude', 'altitude']] = pd.DataFrame(universities['point'].tolist(), index=universities.index)


In [None]:
# drop unnecessary columns
universities_geo = universities.drop(columns =['Street', 'Number', 'Postal code', 'City', 'Location', 'point', 'altitude'])

In [None]:
universities_geo.head()

In [None]:
# save new file with geolocations

universities_geo.to_csv('../data/universities_geo.csv')