In [1]:
# add any other imports you need here!
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from math import radians, cos, sin, asin, sqrt, exp

In [2]:
# use this to compute distance, not Euclidean Distance!
def haversine(lat1, lon1, lat2, lon2):

      R = 3959.87433 # this is in miles.  For Earth radius in kilometers use 6372.8 km

      dLat = radians(lat2 - lat1)
      dLon = radians(lon2 - lon1)
      lat1 = radians(lat1)
      lat2 = radians(lat2)

      a = sin(dLat/2)**2 + cos(lat1)*cos(lat2)*sin(dLon/2)**2
      c = 2*asin(sqrt(a))

      return R * c

In [3]:
def logistic(x):
  return 1 / (1 + exp(-4*x))

# Optimal Center Formula
- parameters:

    1. `county_lat`: a double containing the latidude of the county's population center
    2. `county_long`: a double containing the longitude of the county's population center
    3. `potential_center_lat`: a double containing the latidude of the potential center
    4. `potential_center_long`: a double containing the longitude of the county's potential center
    5. `county_classification`: an int from 0-8 representing how much mental health need is required (8 = high need, 1 = low need)

- return: should return a double representing a score of how optimal this center is
- Your formula should weight distance more heavily (maybe exponentially), and weight the need more linearly
- Also, make sure to normalize your value from [0,1] after applying your formula!
    - 1 = very optimal center
    - 0 = not optimal center

In [4]:
def optimal_center_formula(county_lat, county_long, potential_center_lat, potential_center_long, county_classification):
    distance = haversine(county_lat, county_long, potential_center_lat, potential_center_long)
    
    # perform some mathematical operation with this calculated distance and the county classification!
    LARGEST_DISTANCE = 3000 # rough distance accross the US
    '''
    Formula Choices:
    1. We use a modified logistic function map distances between geographic county population means and potential centers to 
    values between 0 and 1. This allows there to be a sharp (almost linear) decrease as the distance initially increases from 0 (to
    capture individual's driving to the potential center). At around a distance of 500 miles, the mapped value begins to tend to
    0 asymptotically. This matches the expected behavior that, for distances between a potential center a geographic county 
    population center that are sufficiently large, the center will not have any benefit for residents of that specific county.
    Note that we divide the initialize distance by a constant representing the largest possible distance between any two 
    points in the US to balance the logistic function. We also, perform some mathematic manipulations on the logistic function
    to first invert the function and then stretch it vertically in order to successfully map to [0,1].
    
    2. We choose to normalize the need argument by dividing by 8. Additionally, we take the square root of the this normalized
    value. This is done to weight the need metric more heavily than distance.
    '''
    MAX_CLASSIFICATION = 8
    distance_score = 2 * (1 - logistic(distance / (LARGEST_DISTANCE))) # multiply by 2 to get a normalized value between 0 and 1
    need_score = ((county_classification) / MAX_CLASSIFICATION) ** (1 / 2)
    
    score = distance_score * need_score
    return score

## Use this Method for Testing!

In [5]:
def test_optimal_center_formula():
    # testcase 1: should return a high value, since coordinates are close and need is high
    montgomery_lat = 39.746151
    montgomery_long = -084.207549
    xenia_center_lat = 39.686560
    xenia_center_long = -83.924960
    montgomery_classification = 8
    print("Testcase 1 (should return a value close to 1): ", optimal_center_formula(montgomery_lat, montgomery_long, xenia_center_lat, xenia_center_long, montgomery_classification))
    
    # testcase 2: should return a low value, since coordinates are far and need is low
    orange_county_lat = 33.733953
    orange_county_long = -117.862880
    fayetsville_center_lat = 35.148310
    fayetsville_center_long = -86.579200
    orange_county_classification = 0
    print("Testcase 2 (should return a value close to 0): ", optimal_center_formula(orange_county_lat, orange_county_long, fayetsville_center_lat, fayetsville_center_long, orange_county_classification))
    
    # testcase 3: should return a moderate-low value, since coordinates are far even though need is high
    toelle_county_lat = 40.560780
    toelle_county_long = -112.379217
    fayetsville_center_lat = 35.148310
    fayetsville_center_long = -86.579200
    toelle_county_classification = 8
    print("Testcase 3 (should return a lower value (~0.1-0.3)): ", optimal_center_formula(toelle_county_lat, toelle_county_long, fayetsville_center_lat, fayetsville_center_long, toelle_county_classification))
    
    # testcase 4: should return a moderate-high value, since coordinates are close and need is moderate
    williamson_tx_lat = 30.554707
    williamson_tx_long = -97.711430
    round_rock_center_lat = 30.533330
    round_rock_center_long = -97.694740
    williamson_classification = 4
    print("Testcase 4 (should return moderate value (~0.5-0.9)): ", optimal_center_formula(williamson_tx_lat, williamson_tx_long, round_rock_center_lat, round_rock_center_long, williamson_classification))

In [7]:
test_optimal_center_formula()

Testcase 1 (should return a value close to 1):  0.9896153641712377
Testcase 2 (should return a value close to 0):  0.0
Testcase 3 (should return a lower value (~0.1-0.3)):  0.2524899797517759
Testcase 4 (should return moderate value (~0.5-0.9)):  0.7062675100039082
