In [1]:
import pandas as pd
import numpy as np
import geopy

## 1. Import dataset

In [2]:
l_cols= ['Name',
 'Street Address',
 'City',
 'State',
 'Zipcode']

In [3]:
df = pd.read_csv('/kaggle/input/canada-museums/museums list CAN.csv', encoding = "ISO-8859-1",usecols=l_cols)
df = df[df.City=='Toronto']
df = df.iloc[:30]
df.head()

Unnamed: 0,Name,Street Address,City,State,Zipcode
9,Bizune Event Gallery,452 Richmond St W,Toronto,ON,M5V 1Y1
33,Gallery 129,129 Ossington Ave,Toronto,ON,M6J 2Z6
40,Campbell House,160 Queen St W,Toronto,ON,M5H 3H3
51,Magic Pony Gallery,680 Queen St W,Toronto,ON,M6J 1E5
56,Columbus Centre (Joseph D. Carrier Art Gallery),901 Lawrence Ave W,Toronto,ON,M6A 1C3


In [4]:
df['Country'] = 'Canada'
l_cols_concat = ['Street Address','City','State','Zipcode','Country']
df['unique_address'] = df['Name'].str.cat(others=df[l_cols_concat], sep=',',na_rep='')
df.head()

Unnamed: 0,Name,Street Address,City,State,Zipcode,Country,unique_address
9,Bizune Event Gallery,452 Richmond St W,Toronto,ON,M5V 1Y1,Canada,"Bizune Event Gallery,452 Richmond St W,Toronto..."
33,Gallery 129,129 Ossington Ave,Toronto,ON,M6J 2Z6,Canada,"Gallery 129,129 Ossington Ave,Toronto,ON,M6J 2..."
40,Campbell House,160 Queen St W,Toronto,ON,M5H 3H3,Canada,"Campbell House,160 Queen St W,Toronto,ON,M5H 3..."
51,Magic Pony Gallery,680 Queen St W,Toronto,ON,M6J 1E5,Canada,"Magic Pony Gallery,680 Queen St W,Toronto,ON,M..."
56,Columbus Centre (Joseph D. Carrier Art Gallery),901 Lawrence Ave W,Toronto,ON,M6A 1C3,Canada,Columbus Centre (Joseph D. Carrier Art Gallery...


In [5]:
address1 = df['unique_address'].iloc[0]
address1

'Bizune Event Gallery,452 Richmond St W,Toronto,ON,M5V 1Y1,Canada'

## 2. Google Maps API

In [6]:
from geopy.geocoders import GoogleV3
GM_API_KEY = 'AIzaSyDpfGWc3ZzDzlUq8KKJp3spJw7rIhOAapQ'
geolocator = GoogleV3(api_key=GM_API_KEY)

### single address

In [7]:
location = geolocator.geocode(address1)
print('Latitude: '+str(location.latitude)+', Longitude: '+str(location.longitude))

Latitude: 43.64786549999999, Longitude: -79.39632309999999


## all column

In [8]:
def service_geocode(g_locator, address):
    location = g_locator.geocode(address)
    if location!=None:
      return (location.latitude, location.longitude)
    else:
      return np.NaN

In [9]:
df['LAT_LON'] = df['unique_address'].apply(lambda x: service_geocode(geolocator,x))
df['LAT_LON'].isnull().sum()

0

In [10]:
df[['unique_address','LAT_LON']].head()

Unnamed: 0,unique_address,LAT_LON
9,"Bizune Event Gallery,452 Richmond St W,Toronto...","(43.64786549999999, -79.39632309999999)"
33,"Gallery 129,129 Ossington Ave,Toronto,ON,M6J 2...","(43.6469146, -79.4198136)"
40,"Campbell House,160 Queen St W,Toronto,ON,M5H 3...","(43.6510504, -79.3873108)"
51,"Magic Pony Gallery,680 Queen St W,Toronto,ON,M...","(43.64647679999999, -79.4081582)"
56,Columbus Centre (Joseph D. Carrier Art Gallery...,"(43.71306939999999, -79.4566389)"


## 3. OpenStreetMap API

In [11]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="eugenia.anello@gmail.com")

In [12]:
## single address

location = geolocator.geocode(address1)
print(location)

None


In [13]:
l_cols_concat = ['City','State','Country']
df['unique_address_osm'] = df['Street Address'].str.cat(others=df[l_cols_concat], sep=',',na_rep='')

In [14]:
address1_osm = df['unique_address_osm'].iloc[0]
location = geolocator.geocode(address1_osm)
print('Latitude: '+str(location.latitude)+', Longitude: '+str(location.longitude))

Latitude: 43.647843, Longitude: -79.3972978


In [15]:
## all column

df['LAT_LON_osm'] = df['unique_address_osm'].apply(lambda x: service_geocode(geolocator,x))
print(df['LAT_LON_osm'].isnull().sum())

0


In [16]:
df[['unique_address_osm','LAT_LON','LAT_LON_osm']].head()

Unnamed: 0,unique_address_osm,LAT_LON,LAT_LON_osm
9,"452 Richmond St W,Toronto,ON,Canada","(43.64786549999999, -79.39632309999999)","(43.647843, -79.3972978)"
33,"129 Ossington Ave,Toronto,ON,Canada","(43.6469146, -79.4198136)","(43.6470929, -79.4197301)"
40,"160 Queen St W,Toronto,ON,Canada","(43.6510504, -79.3873108)","(43.6510466, -79.3873221)"
51,"680 Queen St W,Toronto,ON,Canada","(43.64647679999999, -79.4081582)","(43.6466132, -79.4074726)"
56,"901 Lawrence Ave W,Toronto,ON,Canada","(43.71306939999999, -79.4566389)","(43.71287255, -79.45729951731573)"


## 4. ArcGIS API

In [17]:
from geopy.geocoders import ArcGIS
#ARCGIS_API_KEY = "AAPKfad909e25c864a129ce38cc9728c4e2buHsS2w2jZGmBn4-SSAaLBWF4PKeXspVNBMurEgN0_x1rh8yCX9GZiCKOYqR0Udac"
geolocator_arcgis = ArcGIS()

In [18]:
location = geolocator_arcgis.geocode(address1)
print('Latitude: '+str(location.latitude)+', Longitude: '+str(location.longitude))

Latitude: 43.647981017664, Longitude: -79.397477330359


In [19]:
df['LAT_LON_arcgis'] = df['unique_address'].apply(lambda x: service_geocode(geolocator_arcgis,x))
print(df['LAT_LON_arcgis'].isnull().sum())

0


In [20]:
df[['unique_address','LAT_LON','LAT_LON_osm','LAT_LON_arcgis']].head()

Unnamed: 0,unique_address,LAT_LON,LAT_LON_osm,LAT_LON_arcgis
9,"Bizune Event Gallery,452 Richmond St W,Toronto...","(43.64786549999999, -79.39632309999999)","(43.647843, -79.3972978)","(43.647981017664, -79.397477330359)"
33,"Gallery 129,129 Ossington Ave,Toronto,ON,M6J 2...","(43.6469146, -79.4198136)","(43.6470929, -79.4197301)","(43.647103683859, -79.419675627453)"
40,"Campbell House,160 Queen St W,Toronto,ON,M5H 3...","(43.6510504, -79.3873108)","(43.6510466, -79.3873221)","(43.650910492824, -79.387155938608)"
51,"Magic Pony Gallery,680 Queen St W,Toronto,ON,M...","(43.64647679999999, -79.4081582)","(43.6466132, -79.4074726)","(43.646665897056, -79.407481634713)"
56,Columbus Centre (Joseph D. Carrier Art Gallery...,"(43.71306939999999, -79.4566389)","(43.71287255, -79.45729951731573)","(43.71349, -79.45696)"


In [21]:
df[df['LAT_LON']==df['LAT_LON_arcgis']].shape[0]

0

In [22]:
df[df['LAT_LON']==df['LAT_LON_osm']].shape[0]

0

## Measure distance between two places

In [23]:
from geopy.distance import geodesic
point1 = df.LAT_LON_arcgis.iloc[0]
point2 = df.LAT_LON_arcgis.iloc[1]
distance = geodesic(point1, point2)
print('The distance between {} and {} is {} meters'.format(df.Name.iloc[0],df.Name.iloc[1],distance.meters))

The distance between Bizune Event Gallery and Gallery 129 is 1793.5975842643695 meters


## Geopandas DataFrame

In [24]:
df['LAT'] = df['LAT_LON_arcgis'].apply(lambda x: x[0])
df['LON'] = df['LAT_LON_arcgis'].apply(lambda x: x[1])
df[['LAT_LON_arcgis','LAT','LON']].head()

Unnamed: 0,LAT_LON_arcgis,LAT,LON
9,"(43.647981017664, -79.397477330359)",43.647981,-79.397477
33,"(43.647103683859, -79.419675627453)",43.647104,-79.419676
40,"(43.650910492824, -79.387155938608)",43.65091,-79.387156
51,"(43.646665897056, -79.407481634713)",43.646666,-79.407482
56,"(43.71349, -79.45696)",43.71349,-79.45696


In [25]:
import geopandas as gpd
from shapely.geometry import Point

In [26]:
df['LAT_LON_arcgis'] = df['LAT_LON'].apply(Point)

In [27]:
gdf = gpd.GeoDataFrame(df,geometry=df.LAT_LON_arcgis,crs='4326')
gdf.sample(3)

Unnamed: 0,Name,Street Address,City,State,Zipcode,Country,unique_address,LAT_LON,unique_address_osm,LAT_LON_osm,LAT_LON_arcgis,LAT,LON,geometry
33,Gallery 129,129 Ossington Ave,Toronto,ON,M6J 2Z6,Canada,"Gallery 129,129 Ossington Ave,Toronto,ON,M6J 2...","(43.6469146, -79.4198136)","129 Ossington Ave,Toronto,ON,Canada","(43.6470929, -79.4197301)",POINT (43.6469146 -79.4198136),43.647104,-79.419676,POINT (43.64691 -79.41981)
171,Perth/Dupont,1589 Dupont Street,Toronto,ON,M6P 3S5,Canada,"Perth/Dupont,1589 Dupont Street,Toronto,ON,M6P...","(43.66504690000001, -79.4537239)","1589 Dupont Street,Toronto,ON,Canada","(43.665038499999994, -79.45382384869933)",POINT (43.66504690000001 -79.4537239),43.665034,-79.453837,POINT (43.66505 -79.45372)
67,Testaments of Honour Historical Archives,134 Lawton Blvd,Toronto,ON,M4V 2A4,Canada,"Testaments of Honour Historical Archives,134 L...","(43.6943557, -79.3974552)","134 Lawton Blvd,Toronto,ON,Canada","(43.69437695, -79.39754919571769)",POINT (43.6943557 -79.3974552),43.694344,-79.397563,POINT (43.69436 -79.39746)


In [28]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 30 entries, 9 to 379
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   Name                30 non-null     object  
 1   Street Address      30 non-null     object  
 2   City                30 non-null     object  
 3   State               30 non-null     object  
 4   Zipcode             29 non-null     object  
 5   Country             30 non-null     object  
 6   unique_address      30 non-null     object  
 7   LAT_LON             30 non-null     object  
 8   unique_address_osm  30 non-null     object  
 9   LAT_LON_osm         30 non-null     object  
 10  LAT_LON_arcgis      30 non-null     object  
 11  LAT                 30 non-null     float64 
 12  LON                 30 non-null     float64 
 13  geometry            30 non-null     geometry
dtypes: float64(2), geometry(1), object(11)
memory usage: 3.5+ KB


In [29]:
gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich