**Goal:**

Getting coordinates from addresses. Let's see how we can get location coordinates for free in Python using geocoder and geopy libraries.

In [None]:
import pandas as pd 

**About the dataset:**

The dataset is a simplified version of the [real estate listings for Georgia (US) published in Kaggle](https://www.kaggle.com/datasets/yellowj4acket/real-estate-georgia).

In [None]:
# loading dataset
df = pd.read_csv('sample_data/real_estate_georgia_small.csv', usecols=['id', 'countyId',	'cityId',	'price',	'pricePerSquareFoot',	'city',	'streetAddress',	'zipcode',	'longitude',	'latitude',	'bathrooms',	'bedrooms',	'hasGarage',	'pool',	'homeType',	'county'])
display(df.shape)
df.head()

(399, 16)

Unnamed: 0,id,countyId,cityId,price,pricePerSquareFoot,city,streetAddress,zipcode,longitude,latitude,bathrooms,bedrooms,hasGarage,pool,homeType,county
0,31503-110785431,17,55064,169900,74,Waycross,103 Hog Creek Rd,31503,-82.232475,31.200703,3,4,0,0,SINGLE_FAMILY,Brantley County
1,31503-76611082,18,55064,159000,56,Waycross,605 Ware St,31503,-82.365105,31.213835,2,4,0,0,SINGLE_FAMILY,Ware County
2,31503-93126153,19,55064,45000,62,Waycross,1961 Mount Pleasant Rd,31503,-82.327118,31.210838,1,1,0,0,SINGLE_FAMILY,Ware County
3,31503-110785598,20,55064,165000,102,Waycross,39 Joyce Rd,31503,-82.2313,31.169851,2,2,0,0,SINGLE_FAMILY,Brantley County
4,31503-2101070583,21,55064,128593,0,Waycross,0 Swamp Rd,31503,-82.405525,31.045984,0,0,0,0,LOT,Ware County


**GeoPy**

Documentation: https://geopy.readthedocs.io/en/stable/

In [None]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="sample app") # user_agent -> name of your app

In [None]:
# only street name/number may lead to errors because it's not unique across cities
# in this case let's find a geolocation by combining adress and zipcode
location_from_address = geolocator.geocode('1961 Mount Pleasant Rd 31503')
print(location_from_address)

1961, Mount Pleasant Road, Waycross, Ware County, Georgia, 31503, United States


In [None]:
location_from_address.raw

{'boundingbox': ['31.212425547625',
  '31.212525547625',
  '-82.326661870167',
  '-82.326561870167'],
 'class': 'place',
 'display_name': '1961, Mount Pleasant Road, Waycross, Ware County, Georgia, 31503, United States',
 'importance': 0.42099999999999993,
 'lat': '31.212475547625072',
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'lon': '-82.3266118701665',
 'osm_id': 9527168,
 'osm_type': 'way',
 'place_id': 293134502,
 'type': 'house'}

In [None]:
# in this dataset we already have longitude and latitude, but if this data was mising we could 
# get it by:
print(location_from_address.point.latitude)
print(location_from_address.point.longitude)

31.212475547625072
-82.3266118701665


In [None]:
# the response also includes the boundaries data, which in this case, is missing
print(location_from_address.raw.get('boundingbox'))

['31.212425547625', '31.212525547625', '-82.326661870167', '-82.326561870167']


In [None]:
# getting the boundaries
location_county = geolocator.geocode('Brantley County')
print(location_county.raw.get('boundingbox'))

['31.01063', '31.373188', '-82.284531', '-81.731694']


In [None]:
# if we want to create a column for the boundaries, we could do the following

def get_boundaries(place_name):
  place = place_name + ' Georgia US'
  try:
    l = geolocator.geocode(place)
    return l.raw.get('boundingbox')
  except Exception as ex:
    print(place_name, ex)


df['bbox'] = df.county.map(get_boundaries)
df.head()

Unnamed: 0,id,countyId,cityId,price,pricePerSquareFoot,city,streetAddress,zipcode,longitude,latitude,bathrooms,bedrooms,hasGarage,pool,homeType,county,bbox
0,31503-110785431,17,55064,169900,74,Waycross,103 Hog Creek Rd,31503,-82.232475,31.200703,3,4,0,0,SINGLE_FAMILY,Brantley County,"['31.01063', '31.373188', '-82.284531', '-81.7..."
1,31503-76611082,18,55064,159000,56,Waycross,605 Ware St,31503,-82.365105,31.213835,2,4,0,0,SINGLE_FAMILY,Ware County,"['30.568481', '31.469048', '-82.701332', '-82...."
2,31503-93126153,19,55064,45000,62,Waycross,1961 Mount Pleasant Rd,31503,-82.327118,31.210838,1,1,0,0,SINGLE_FAMILY,Ware County,"['30.568481', '31.469048', '-82.701332', '-82...."
3,31503-110785598,20,55064,165000,102,Waycross,39 Joyce Rd,31503,-82.2313,31.169851,2,2,0,0,SINGLE_FAMILY,Brantley County,"['31.01063', '31.373188', '-82.284531', '-81.7..."
4,31503-2101070583,21,55064,128593,0,Waycross,0 Swamp Rd,31503,-82.405525,31.045984,0,0,0,0,LOT,Ware County,"['30.568481', '31.469048', '-82.701332', '-82...."


**Geocoder**

Documentation: https://geocoder.readthedocs.io/

*geocoder* supports over 20 providers including GoogleMaps and OSM, let's use *arcgis* for this example.

In [None]:
# !pip install geocoder
import geocoder

# finding a location by address
location_arcgis = geocoder.arcgis('1961 Mount Pleasant Rd 31503')
location_arcgis

<[OK] Arcgis - Geocode [1961 Mount Pleasant Rd, Waycross, Georgia, 31503]>

In [None]:
# getting latitude and longitude
print(location_arcgis.lat)
print(location_arcgis.lng)

31.210808982705146
-82.32707603942933


In [None]:
location_arcgis.json

{'address': '1961 Mount Pleasant Rd, Waycross, Georgia, 31503',
 'bbox': {'northeast': [31.211808982705147, -82.32607603942932],
  'southwest': [31.209808982705145, -82.32807603942933]},
 'confidence': 9,
 'lat': 31.210808982705146,
 'lng': -82.32707603942933,
 'ok': True,
 'quality': 'PointAddress',
 'raw': {'extent': {'xmax': -82.32607603942932,
   'xmin': -82.32807603942933,
   'ymax': 31.211808982705147,
   'ymin': 31.209808982705145},
  'feature': {'attributes': {'Addr_Type': 'PointAddress', 'Score': 100},
   'geometry': {'x': -82.32707603942933, 'y': 31.210808982705146}},
  'name': '1961 Mount Pleasant Rd, Waycross, Georgia, 31503'},
 'score': 100,
 'status': 'OK'}

In [None]:
location_arcgis.bbox

{'northeast': [31.211808982705147, -82.32607603942932],
 'southwest': [31.209808982705145, -82.32807603942933]}

**Creating a column for the coordinates**

In [None]:
# creating a column to represent coordinates as a tuple
df['coordinates'] = list(zip(df.longitude, df.latitude))
df.head()

Unnamed: 0,id,countyId,cityId,price,pricePerSquareFoot,city,streetAddress,zipcode,longitude,latitude,bathrooms,bedrooms,hasGarage,pool,homeType,county,bbox,coordinates
0,31503-110785431,17,55064,169900,74,Waycross,103 Hog Creek Rd,31503,-82.232475,31.200703,3,4,0,0,SINGLE_FAMILY,Brantley County,"['31.01063', '31.373188', '-82.284531', '-81.7...","(-82.23247528, 31.20070267)"
1,31503-76611082,18,55064,159000,56,Waycross,605 Ware St,31503,-82.365105,31.213835,2,4,0,0,SINGLE_FAMILY,Ware County,"['30.568481', '31.469048', '-82.701332', '-82....","(-82.36510468, 31.21383476)"
2,31503-93126153,19,55064,45000,62,Waycross,1961 Mount Pleasant Rd,31503,-82.327118,31.210838,1,1,0,0,SINGLE_FAMILY,Ware County,"['30.568481', '31.469048', '-82.701332', '-82....","(-82.32711792, 31.21083832)"
3,31503-110785598,20,55064,165000,102,Waycross,39 Joyce Rd,31503,-82.2313,31.169851,2,2,0,0,SINGLE_FAMILY,Brantley County,"['31.01063', '31.373188', '-82.284531', '-81.7...","(-82.23130035, 31.1698513)"
4,31503-2101070583,21,55064,128593,0,Waycross,0 Swamp Rd,31503,-82.405525,31.045984,0,0,0,0,LOT,Ware County,"['30.568481', '31.469048', '-82.701332', '-82....","(-82.40552521, 31.04598427)"


In [None]:
# turning the coordinates tuple into Shapely Point
from shapely.geometry import Point

df['coordinates'] = df['coordinates'].apply(Point)
df.head()

Unnamed: 0,id,countyId,cityId,price,pricePerSquareFoot,city,streetAddress,zipcode,longitude,latitude,bathrooms,bedrooms,hasGarage,pool,homeType,county,bbox,coordinates
0,31503-110785431,17,55064,169900,74,Waycross,103 Hog Creek Rd,31503,-82.232475,31.200703,3,4,0,0,SINGLE_FAMILY,Brantley County,"['31.01063', '31.373188', '-82.284531', '-81.7...",POINT (-82.23247528 31.20070267)
1,31503-76611082,18,55064,159000,56,Waycross,605 Ware St,31503,-82.365105,31.213835,2,4,0,0,SINGLE_FAMILY,Ware County,"['30.568481', '31.469048', '-82.701332', '-82....",POINT (-82.36510468 31.21383476)
2,31503-93126153,19,55064,45000,62,Waycross,1961 Mount Pleasant Rd,31503,-82.327118,31.210838,1,1,0,0,SINGLE_FAMILY,Ware County,"['30.568481', '31.469048', '-82.701332', '-82....",POINT (-82.32711792 31.21083832)
3,31503-110785598,20,55064,165000,102,Waycross,39 Joyce Rd,31503,-82.2313,31.169851,2,2,0,0,SINGLE_FAMILY,Brantley County,"['31.01063', '31.373188', '-82.284531', '-81.7...",POINT (-82.23130035 31.1698513)
4,31503-2101070583,21,55064,128593,0,Waycross,0 Swamp Rd,31503,-82.405525,31.045984,0,0,0,0,LOT,Ware County,"['30.568481', '31.469048', '-82.701332', '-82....",POINT (-82.40552521 31.04598427)
