# Import

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
import sqlite3
import requests
import json
from math import radians, cos, sin, asin, sqrt

# Read in

In [2]:
hosp = pd.read_csv('../data/neo4j/full_refs_hosp.csv')
hosp_addresses = pd.read_csv('../data/geocodes/hospital_address_geocodes.csv')
prov_addresses = pd.read_csv('../data/geocodes/provider_address_geocodes.csv')

In [3]:
hosp_geo = (hosp.merge(prov_addresses, on = 'address_provider')
                .merge(hosp_addresses, on = 'address_hospital', suffixes = ('_provider', '_hospital'))
        )

# Write Haversine formula to calculate miles between coordinates

In [4]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 3956 # Radius of earth in miles. Use 6371 for kilometers
    return c * r

# Apply Haversine Formula to hosp_geom and save CSV

In [5]:
hosp_geo['distance_miles'] = [haversine(row['lng_provider'], row['lat_provider'], row['lng_hospital'], row['lat_hospital']) for ind, row in hosp_geo.iterrows()]

In [7]:
hosp_geo.head()

Unnamed: 0,from_npi,referral_id,entity_type_code_provider,name,taxonomy_code_provider,classification_provider,specialization_provider,organization_provider,address_line1_provider,address_line2_provider,...,address_hospital,patient_count,transaction_count,average_day_wait,std_day_wait,lat_provider,lng_provider,lat_hospital,lng_hospital,distance_miles
0,1790730448,13418843,1.0,"STAFFORD, JAMES",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,"313 N MAIN ST, ASHLAND CITY, TN 37015",65,67,24.552,50.253,36.148908,-86.808618,36.277115,-87.065626,16.832839
1,1790762219,13418844,1.0,"GOODIN, ELLIS",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,"313 N MAIN ST, ASHLAND CITY, TN 37015",61,63,17.238,35.569,36.148908,-86.808618,36.277115,-87.065626,16.832839
2,1821060526,19402345,1.0,"WATERS, RONALD",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,"313 N MAIN ST, ASHLAND CITY, TN 37015",62,66,12.697,29.35,36.148908,-86.808618,36.277115,-87.065626,16.832839
3,1306993282,55969969,1.0,"SHIPMAN, JASON",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,"313 N MAIN ST, ASHLAND CITY, TN 37015",80,81,10.864,22.144,36.148908,-86.808618,36.277115,-87.065626,16.832839
4,1639226582,87799698,1.0,"PIERRE, KETSIA",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,"313 N MAIN ST, ASHLAND CITY, TN 37015",66,66,20.258,49.529,36.148908,-86.808618,36.277115,-87.065626,16.832839


In [8]:
hosp_geo.to_csv('../data/neo4j/hosp_geo.csv', index = False)