In [1]:
import pandas as pd
import pycountry
from geopy.geocoders import Nominatim, ArcGIS
from geopy.extra.rate_limiter import RateLimiter

In [2]:
def remove_third_element(point):
    if isinstance(point, tuple):
        return point[:2]
    else:
        return point

def get_code(name):
    try:
        return pycountry.countries.lookup(name).alpha_2
    except LookupError:
        return None
    
def get_location(row):
    if row['Country Code']:
        return geocode(f"{row['GeoAreaName']}, {row['Country Code']}")
    else:
        return geocode(row['GeoAreaName'])


In [3]:
dataset = pd.read_csv('2d SDG Dataset.csv', header=0, low_memory=False).reset_index(drop=True)

In [4]:
geonames = dataset[['GeoAreaCode', 'GeoAreaName']].drop_duplicates()
geonames['Country Code'] = geonames.GeoAreaName.apply(get_code)

In [5]:
geolocator = ArcGIS(user_agent="DAEN-690-Capstone")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
geonames['Location'] =  geonames.apply(get_location, axis=1)
geonames['Point'] = geonames['Location'].apply(lambda loc: tuple(loc.point) if loc else None)

In [6]:
# pd.set_option("Display.max_rows", None)
# geonames

In [7]:
pd.set_option('Display.max_rows', None)
pd.set_option('Display.max_colwidth', None)
geoinfo = geonames.drop('Location', axis=1)
geoinfo = geoinfo.dropna(how='any')
geoinfo.Point = geonames.Point.apply(remove_third_element)
geoinfo[['translate_y','translate_x']] = geoinfo.Point.apply(lambda x: pd.Series(x))
geoinfo.to_csv('GeoLocationInfo.csv', index=False)

In [8]:
geoinfo

Unnamed: 0,GeoAreaCode,GeoAreaName,Country Code,Point,translate_y,translate_x
0,4,Afghanistan,AF,"(33.831137065, 66.024711797)",33.831137,66.024712
24,8,Albania,AL,"(41.134553284, 20.064206431)",41.134553,20.064206
48,12,Algeria,DZ,"(28.144113769, 2.679965933)",28.144114,2.679966
72,16,American Samoa,AS,"(-14.30068806, -170.718116122)",-14.300688,-170.718116
96,20,Andorra,AD,"(42.545303201, 1.576286302)",42.545303,1.576286
120,24,Angola,AO,"(-12.293655626, 17.545335193)",-12.293656,17.545335
144,28,Antigua and Barbuda,AG,"(17.077664637, -61.798710121)",17.077665,-61.79871
168,31,Azerbaijan,AZ,"(40.290161969, 47.528707407)",40.290162,47.528707
192,32,Argentina,AR,"(-35.495758184, -65.071542108)",-35.495758,-65.071542
216,36,Australia,AU,"(-25.70993157, 134.484031198)",-25.709932,134.484031
