# Geopy exploration
Perform operations for feature engineering using geographic locations and addresses.

In [1]:
# Nominatim is a geocoder from OpenStreetMap
from geopy.geocoders import Nominatim

In [2]:
# create geocoder object
geolocator = Nominatim(user_agent='experiment')

In [3]:
# geocode an address
test_address = "1600 Pennsylvania Ave NW, Washington, DC 20500"
location = geolocator.geocode(test_address)
print(location.latitude, location.longitude)

38.897699700000004 -77.03655315


In [4]:
# test with other address
test_address = "carrera 17 #64A-236, manizales"
location = geolocator.geocode(test_address)
print("True coordinates: {}, {}".format(5.058536847796866, -75.48085823371233))
print("Geopy coordinates: {}, {}".format(location.latitude, location.longitude))

True coordinates: 5.058536847796866, -75.48085823371233
Geopy coordinates: 7.8373142, -75.4134266


Looking for a random address and comparing with google maps coordinates, seems pretty decent for an open source.

# Performing operations on geolocations
it seems geopy has a function that gets the distance between two coordinated

In [5]:
import geopy.distance

In [9]:
# test with other address
test_address1 = "carrera 17 #64A-236, manizales"
test_address2 = "av. Santander # 65-11, manizales"
location1_lat, location1_lon  = geolocator.geocode(test_address1).latitude, geolocator.geocode(test_address1).longitude
location2_lat, location2_lon  = geolocator.geocode(test_address2).latitude, geolocator.geocode(test_address2).longitude

print((location1_lat, location1_lon))
print((location2_lat, location2_lon))

geopy.distance.geodesic(
    (location1_lat, location1_lon), 
    (location2_lat, location2_lon) 
).km

(7.8373142, -75.4134266)
(5.0621462, -75.4931847)


307.0284327125413

Answer must be ~1.2km

In [7]:
from math import (
    sin,
    cos,
    sqrt,
    atan2,
    radians
)

def get_distance(coord1:tuple, coord2:tuple) -> float:
    # Aprox earth radius in km
    R = 6373.0

    lat1 = radians(coord1[0])
    lon1 = radians(coord1[1])
    lat2 = radians(coord2[0])
    lon2 = radians(coord2[1])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))

    distance = R * c

    return distance 

In [10]:
# distance from geopy coordinates
print(
    get_distance(
        (location1_lat, location1_lon),
        (location2_lat, location2_lon)
    )
)
print(
    get_distance(
        (5.0585686659663605, -75.48090029196025),
        (5.056548974862902, -75.48552225138951)
    )
)

308.8072968090393
0.5592067120951097


The issue was not the distance function. It is more related to the accurate precission of open street map coordinates