# Overview
* visualize geospatial data using Folium
* play with Haversine formula (great-circle distance)

# Dependencies

In [2]:
# general
import os

# data
import pandas as pd
import numpy as np

# geo
from math import radians, cos, sin, asin, sqrt
import math

# viz
import folium
from folium.plugins import MarkerCluster
import matplotlib as plt

# Get Data

In [3]:
df_ports = pd.read_csv("./data/ports.csv")
df_ports.head()

Unnamed: 0,port,lat,long
0,6,42.387051,-71.057504
1,7,22.81233,120.193849
2,10,10.200166,-61.701978
3,11,36.4,126.483333
4,13,29.897303,122.089303


# Funcs

## geospatial

In [4]:
def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    
    return c * r

def bool_coords_within_radius(radius, center_lat, center_long, test_lat, test_long):
    '''
    test whether a given point is within a given radius of a center point
    
    radius (float): length of radius in kilometers
    center_lat (float): latitude of center
    center_long (float): longitude of center
    test_lat (float): latitude of test point
    test_long (float): longitude of test point
    '''
    
    dist = haversine(center_lat, center_long, test_lat, test_long)
    
    return dist <= radius

## visualization

In [9]:
def geo_visualization(df):

    # initialize the map
    map_viz = folium.Map(
        location=[df.iloc[0]['lat'], df.iloc[0]['long']],
        zoom_start=9
    )

    # add ports
    for idx, row in df.iterrows():

        # add popup
        # ie include all other details in df if provided
        ls_popup = [f"<b>index</b>:&nbsp{idx}<br>"] + [f"<b>{col}</b>:&nbsp{val}<br>" for col, val in row.iteritems()]

        # marker
        folium.CircleMarker(
            location=[row['lat'], row['long']],
            color='#43d9de', 
            radius=10, # this scales with the map (ie it always looks the same)
            popup="".join(ls_popup)
        ).add_to(map_viz)

    # map will display
    return map_viz

In [10]:
# testing
geo_visualization(df_ports)

# Calculate the Distance between Each Port Duo

In [11]:
%%time
# distances between each port

# create N x N array
# for each combination, calculate the distance
# take minimum of that final array

# create N x N array
port_count = len(df_ports.index)
arr_dist = np.empty((port_count, port_count))
arr_dist[:] = np.NaN

# # just upper right (no diag)
# for row in range(0, port_count - 1):
#     for col in range(row + 1, port_count):
#         arr_dist[row][col] = haversine(
#             df_ports.iloc[row]['lat'], df_ports.iloc[row]['long'],
#             df_ports.iloc[col]['lat'], df_ports.iloc[col]['long']
#         )

# entire matrix
# more straightforward to take min/max/avg
for row in range(port_count):
    for col in range(port_count):
        if row != col:
            arr_dist[row][col] = haversine(
                df_ports.iloc[row]['lat'], df_ports.iloc[row]['long'],
                df_ports.iloc[col]['lat'], df_ports.iloc[col]['long']
            )

Wall time: 9.63 s


In [12]:
df_port_dist = pd.DataFrame(
    np.column_stack((arr_dist, np.nanmin(arr_dist, axis=1)))
)
df_port_dist

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,113,114,115,116,117,118,119,120,121,122
0,,12673.711776,3693.048980,11075.301764,11865.498836,11276.452112,6309.101775,4928.582728,11588.353422,2669.813484,...,2637.111873,15004.973897,9566.265052,5773.634708,15972.566244,12690.476506,2412.644512,16555.133358,14587.628150,513.820695
1,12673.711776,,16338.461821,1627.566565,810.084676,1419.951896,9770.374562,10171.107279,1086.730106,15309.093236,...,13118.514168,3004.798234,3338.247600,10723.007991,3519.295290,2920.769288,13132.860642,4086.461724,3157.721393,160.910127
2,3693.048980,16338.461821,,14762.988705,15538.013645,14922.615696,7756.990030,6857.194079,15252.681365,1249.071521,...,4067.237180,17751.916082,13240.074583,6855.377855,18686.604600,15451.519377,3947.878757,19346.178251,17107.858998,1024.433393
3,11075.301764,1627.566565,14762.988705,,830.503389,611.659654,9050.003541,9146.678450,633.140459,13689.885830,...,11538.872321,4547.254161,1771.325966,9956.322686,5141.920443,3726.765894,11539.389434,5594.655404,4581.134826,72.444273
4,11865.498836,810.084676,15538.013645,830.503389,,667.974769,9318.134738,9583.399941,299.063749,14499.084813,...,12364.370749,3727.027047,2584.107783,10255.463896,4313.046715,3160.277244,12367.712235,4862.526032,3794.136287,159.789996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,15972.566244,3519.295290,18686.604600,5141.920443,4313.046715,4820.614014,11034.308674,12045.193665,4556.152321,18594.787429,...,16559.577922,1070.240730,6846.061038,11907.339075,,3488.534516,16607.129077,1974.999799,1666.588910,879.898157
118,12690.476506,2920.769288,15451.519377,3726.765894,3160.277244,3126.056341,7734.358177,8600.045083,3161.691626,15133.575223,...,14348.472291,2419.707177,5344.041433,8666.495278,3488.534516,,14251.386211,5216.244172,1903.739706,1399.057385
119,2412.644512,13132.860642,3947.878757,11539.389434,12367.712235,11939.587895,8717.258330,7321.632522,12153.772226,2721.490385,...,226.811045,16067.711332,9794.909512,8179.019437,16607.129077,14251.386211,,15861.645931,15939.762955,61.435741
120,16555.133358,4086.461724,19346.178251,5594.655404,4862.526032,5504.570362,12923.837261,13797.474495,5156.777263,18552.524138,...,15712.271653,2932.047283,7002.774409,13828.056631,1974.999799,5216.244172,15861.645931,,3575.700784,846.531744
