# Import

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
import sqlite3
import requests
import json
from math import radians, cos, sin, asin, sqrt

# Read in

In [2]:
referrals_big = pd.read_csv('../data/neo4j/referrals_big.csv')
to_addresses = pd.read_csv('../data/geocodes/to_address_geocodes_big.csv')
from_addresses = pd.read_csv('../data/geocodes/from_address_geocodes_big.csv')

In [3]:
ref_geo = (referrals_big.merge(from_addresses, on = 'address_from')
                         .merge(to_addresses, on = 'address_to', suffixes = ('_from', '_to'))
        )

# Write Haversine formula to calculate miles between coordinates

In [4]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 3956 # Radius of earth in miles. Use 6371 for kilometers
    return c * r

# Apply Haversine Formula to hosp_geom and save CSV

In [5]:
ref_geo['distance_miles'] = [haversine(row['lng_from'], row['lat_from'], row['lng_to'], row['lat_to']) for ind, row in ref_geo.iterrows()]

In [6]:
ref_geo.head()

Unnamed: 0,from_npi,referral_id,entity_from,name_from,organization_from,taxonomy_code_from,classification_from,specialization_from,address_line1_from,address_line2_from,...,transaction_count,average_day_wait,std_day_wait,address_from,address_to,lat_from,lng_from,lat_to,lng_to,distance_miles
0,1821080961,18854880,1.0,"SHERWOOD, WILLIAM",,207Q00000X,Family Medicine,,302 N CONGRESS BLVD,,...,238,9.924,40.266,"302 N CONGRESS BLVD, SMITHVILLE, TN 37166","400 E PUBLIC SQUARE, ALEXANDRIA, TN 37012",35.969171,-85.807472,36.077559,-86.033322,14.665166
1,1679677199,104890727,2.0,,"CRIPPS, HOOPER & RHODY, PLLC",207Q00000X,Family Medicine,,302 N CONGRESS BLVD,,...,758,0.0,0.0,"302 N CONGRESS BLVD, SMITHVILLE, TN 37166","400 E PUBLIC SQUARE, ALEXANDRIA, TN 37012",35.969171,-85.807472,36.077559,-86.033322,14.665166
2,1841282779,131966153,1.0,"RHODY, KEVIN",,207Q00000X,Family Medicine,,302 N CONGRESS BLVD,,...,236,7.695,36.447,"302 N CONGRESS BLVD, SMITHVILLE, TN 37166","400 E PUBLIC SQUARE, ALEXANDRIA, TN 37012",35.969171,-85.807472,36.077559,-86.033322,14.665166
3,1770575607,146169993,1.0,"COOPER, STEVEN",,207Q00000X,Family Medicine,,302 N CONGRESS BLVD,,...,320,2.725,18.756,"302 N CONGRESS BLVD, SMITHVILLE, TN 37166","400 E PUBLIC SQUARE, ALEXANDRIA, TN 37012",35.969171,-85.807472,36.077559,-86.033322,14.665166
4,1093741464,29450644,2.0,,"ADVANCED DIAGNOSTIC IMAGING, PC",207R00000X,Internal Medicine,,3024 BUSINESS PARK CIR,,...,60,36.3,43.394,"3024 BUSINESS PARK CIR, GOODLETTSVILLE, TN 37072","400 E PUBLIC SQUARE, ALEXANDRIA, TN 37012",36.332108,-86.70083,36.077559,-86.033322,41.132856


In [7]:
ref_geo.to_csv('../data/neo4j/referrals_big_geo.csv', index = False)