In [1]:
import pandas as pd
import gmaps
import gmaps.geojson_geometries

# NEED TO SET UP API KEYS
# Let 'gkey' be the Google maps api key

from config import gkey

In [2]:
# First, we need to clean the horribly formatted initial dataset into something usable.

path = 'resources/centers-uncleaned.csv'
with open(path, 'r') as csv_file:
    output = csv_file.read()
output = output.split('\n')

all_name = []
all_city = []
all_state = []
all_zip = []

for i in range(len(output)):
    info = output[i].split(';')
    try:
        name = info[1]
        # This loop searches for any entry that looks like a city / state / zip combination; if one is found it's saved, otherwise we ignore that row, since we don't have any useful information about that center.
        for j in range(2, len(info) + 1):
            try:
                if(all(x in '1234567890' for x in info[j][-5:])):
                    area = info[j]
                    break
            except:
                pass
        
        city = area.split(',')[0]
        state = area.split(',')[1][1:].split()[0]
        zipcode = area.split(',')[1][1:].split()[1]
        all_name.append(name)
        all_city.append(city)
        all_state.append(state)
        all_zip.append(zipcode)
    except:
        pass

In [3]:
# Create a nicely formatted dataframe to work with

centers_df = pd.DataFrame({'Name' : all_name, 'City' : all_city, 'State' : all_state, 'Zip' : all_zip})

zips_df = pd.read_csv('resources/zips.csv', dtype = {'ZIP' : 'str'})
zips_df = zips_df.rename(columns = {'ZIP' : 'Zip', 'LAT' : 'Lat', 'LNG' : 'Lon'})
centers_df = centers_df.merge(zips_df, on = 'Zip', how = 'inner')

out_path = 'resources/centers.csv'
centers_df.to_csv(out_path)

In [4]:
centers_df.head()

Unnamed: 0,Name,City,State,Zip,Lat,Lon
0,Henry County Day Treatment,Abbeville,AL,36310,31.595148,-85.208852
1,SpectraCare,Abbeville,AL,36310,31.595148,-85.208852
2,Rehab Day Montezuma Complex,Andalusia,AL,36420,31.172401,-86.565077
3,South Central Alabama CMHC,Andalusia,AL,36420,31.172401,-86.565077
4,Grandview Behavioral Health Center,Anniston,AL,36201,33.64916,-85.879642


In [5]:
gmaps.configure(api_key = gkey)

In [6]:
locations = centers_df[['Lat', 'Lon']].drop_duplicates()
fig = gmaps.figure()
num_centers = centers_df.groupby(by = 'Zip').count()['Name']
heat_layer = gmaps.heatmap_layer(locations, weights = num_centers, max_intensity = 75, point_radius = 6)
fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [7]:
state_df = centers_df.loc[~centers_df['State'].isin(['GU', 'VI', 'AS'])].groupby(by = 'State')['Name'].count()
state_df.to_csv('resources/centers_bystate.csv')
state_df

State
AK     80
AL    133
AR    204
AZ    305
CA    670
CO    150
CT    205
DC     36
DE     23
FL    408
GA    185
HI     37
IA    130
ID    168
IL    327
IN    262
KS     99
KY    201
LA    164
MA    266
MD    247
ME    172
MI    304
MN    201
MO    190
MS    143
MT     70
NC    252
ND     28
NE    109
NH     48
NJ    256
NM     60
NV     46
NY    723
OH    511
OK    131
OR    115
PA    508
PR     66
RI     52
SC     96
SD     43
TN    231
TX    306
UT     96
VA    226
VT     56
WA    231
WI    359
WV    106
WY     46
Name: Name, dtype: int64

In [8]:
state_geojson = gmaps.geojson_geometries.load_geometry('us-states')
fig = gmaps.figure()
min_centers = min(state_df)
max_centers = max(state_df)
range_centers = min_centers - max_centers

def calculate_color(count):
    normalized = (count - min_centers) / range_centers
    mpl_color = viridis(normalized)
    gmaps_color = to_hex(mpl_color, keep_alpha = False)
    return gmaps_color

colors = []
for state in state_df:
    print(state)

centers_layer = gmaps.geojson_layer(state_geojson)
fig.add_layer(centers_layer)
fig

80
133
204
305
670
150
205
36
23
408
185
37
130
168
327
262
99
201
164
266
247
172
304
201
190
143
70
252
28
109
48
256
60
46
723
511
131
115
508
66
52
96
43
231
306
96
226
56
231
359
106
46


Figure(layout=FigureLayout(height='420px'))