# Imports

In [110]:
import pandas as pd
import numpy as np
import geopandas as gpd
import folium
from shapely.ops import unary_union
import matplotlib.pyplot as plt

### Color Pallete

In [2]:
c = {
    'green': '#83bca9',
    'darkgreen': '#3e5641'
}

## Load in USA Parks data

In [109]:
usa_parks = gpd.read_file("../data/USA_Parks/v10/park_dtl.gdb/")

In [4]:
usa_parks.columns

Index(['NAME', 'FEATTYPE', 'MNFC', 'SQMI', 'Shape_Length', 'Shape_Area',
       'geometry'],
      dtype='object')

## Load in Places Data

In [5]:
places_df = pd.read_csv('../data/PLACES__Local_Data_for_Better_Health__Place_Data_2023_release_20240504.csv')

In [6]:
places_tact = pd.read_csv('../data/PLACES__Local_Data_for_Better_Health__Census_Tract_Data_2023_release_20240504.csv')

In [7]:
data_dict = pd.read_csv('../data/PLACES_and_500_Cities__Data_Dictionary_20240504.csv')

In [8]:
# usa_parks.loc[usa_parks['NAME'].str.contains('Cumberland')]

In [9]:
# usa_parks.loc[usa_parks['NAME'] == 'Cumberland Gap National Historical Park'].plot()

In [10]:
places_df.columns

Index(['Year', 'StateAbbr', 'StateDesc', 'LocationName', 'DataSource',
       'Category', 'Measure', 'Data_Value_Unit', 'Data_Value_Type',
       'Data_Value', 'Data_Value_Footnote_Symbol', 'Data_Value_Footnote',
       'Low_Confidence_Limit', 'High_Confidence_Limit', 'TotalPopulation',
       'Geolocation', 'LocationID', 'CategoryID', 'MeasureId',
       'DataValueTypeID', 'Short_Question_Text'],
      dtype='object')

#### Get CRS

In [11]:
usa_parks['geometry'].crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

#### Create places geopandas DF 

In [12]:
places_gdf = gpd.GeoDataFrame(places_df)

In [13]:
places_tact.columns

Index(['Year', 'StateAbbr', 'StateDesc', 'CountyName', 'CountyFIPS',
       'LocationName', 'DataSource', 'Category', 'Measure', 'Data_Value_Unit',
       'Data_Value_Type', 'Data_Value', 'Data_Value_Footnote_Symbol',
       'Data_Value_Footnote', 'Low_Confidence_Limit', 'High_Confidence_Limit',
       'TotalPopulation', 'Geolocation', 'LocationID', 'CategoryID',
       'MeasureId', 'DataValueTypeID', 'Short_Question_Text'],
      dtype='object')

In [14]:
places_df.columns

Index(['Year', 'StateAbbr', 'StateDesc', 'LocationName', 'DataSource',
       'Category', 'Measure', 'Data_Value_Unit', 'Data_Value_Type',
       'Data_Value', 'Data_Value_Footnote_Symbol', 'Data_Value_Footnote',
       'Low_Confidence_Limit', 'High_Confidence_Limit', 'TotalPopulation',
       'Geolocation', 'LocationID', 'CategoryID', 'MeasureId',
       'DataValueTypeID', 'Short_Question_Text'],
      dtype='object')

In [15]:
places_df.groupby('Category').count()

Unnamed: 0_level_0,Year,StateAbbr,StateDesc,LocationName,DataSource,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,Data_Value_Footnote_Symbol,Data_Value_Footnote,Low_Confidence_Limit,High_Confidence_Limit,TotalPopulation,Geolocation,LocationID,CategoryID,MeasureId,DataValueTypeID,Short_Question_Text
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Disability,386022,386022,386022,386022,386022,386022,386022,386022,378728,7294,7294,378728,378728,386022,386022,386022,386022,386022,386022,386022
Health Outcomes,718680,718680,718680,718680,718680,718680,718680,718680,705143,13537,13537,705143,705143,718680,718680,718680,718680,718680,718680,718680
Health Risk Behaviors,222406,222406,222406,222406,222406,222406,222406,222406,218227,4179,4179,218227,218227,222406,222406,222406,222406,222406,222406,222406
Health Status,165438,165438,165438,165438,165438,165438,165438,165438,162312,3126,3126,162312,162312,165438,165438,165438,165438,165438,165438,165438
Prevention,562222,562222,562222,562222,562222,562222,562222,562222,551819,10403,10403,551819,551819,562222,562222,562222,562222,562222,562222,562222


In [16]:
places_df.groupby('Measure').count()

Unnamed: 0_level_0,Year,StateAbbr,StateDesc,LocationName,DataSource,Category,Data_Value_Unit,Data_Value_Type,Data_Value,Data_Value_Footnote_Symbol,Data_Value_Footnote,Low_Confidence_Limit,High_Confidence_Limit,TotalPopulation,Geolocation,LocationID,CategoryID,MeasureId,DataValueTypeID,Short_Question_Text
Measure,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
All teeth lost among adults aged >=65 years,56928,56928,56928,56928,56928,56928,56928,56928,55895,1033,1033,55895,55895,56928,56928,56928,56928,56928,56928,56928
Any disability among adults aged >=18 years,55146,55146,55146,55146,55146,55146,55146,55146,54104,1042,1042,54104,54104,55146,55146,55146,55146,55146,55146,55146
Arthritis among adults aged >=18 years,55146,55146,55146,55146,55146,55146,55146,55146,54104,1042,1042,54104,54104,55146,55146,55146,55146,55146,55146,55146
Binge drinking among adults aged >=18 years,55146,55146,55146,55146,55146,55146,55146,55146,54104,1042,1042,54104,54104,55146,55146,55146,55146,55146,55146,55146
Cancer (excluding skin cancer) among adults aged >=18 years,55146,55146,55146,55146,55146,55146,55146,55146,54104,1042,1042,54104,54104,55146,55146,55146,55146,55146,55146,55146
Cervical cancer screening among adult women aged 21-65 years,56968,56968,56968,56968,56968,56968,56968,56968,55915,1053,1053,55915,55915,56968,56968,56968,56968,56968,56968,56968
Cholesterol screening among adults aged >=18 years,55146,55146,55146,55146,55146,55146,55146,55146,54104,1042,1042,54104,54104,55146,55146,55146,55146,55146,55146,55146
Chronic kidney disease among adults aged >=18 years,55146,55146,55146,55146,55146,55146,55146,55146,54104,1042,1042,54104,54104,55146,55146,55146,55146,55146,55146,55146
Chronic obstructive pulmonary disease among adults aged >=18 years,55146,55146,55146,55146,55146,55146,55146,55146,54104,1042,1042,54104,54104,55146,55146,55146,55146,55146,55146,55146
Cognitive disability among adults ages >=18 years,55146,55146,55146,55146,55146,55146,55146,55146,54104,1042,1042,54104,54104,55146,55146,55146,55146,55146,55146,55146


#### Measure to Focus on

In [17]:
measures = ['Cancer (excluding skin cancer) among adults aged >=18 years',
           'Cholesterol screening among adults aged >=18 years',
           'Coronary heart disease among adults aged >=18 years',
           'Current asthma among adults aged >=18 years',
           'Current lack of health insurance among adults aged 18-64 years',
           'Depression among adults aged >=18 years',
           'Diagnosed diabetes among adults aged >=18 years',
           'Fair or poor self-rated health status among adults aged >=18 years',
           'High blood pressure among adults aged >=18 years',
           'Obesity among adults aged >=18 years',
           'Stroke among adults aged >=18 years']

In [18]:
#### Filter the DF by measures

In [19]:
places_filtered = places_df.loc[places_df['Measure'].isin(measures)]

In [20]:
places_filtered.head(1)

Unnamed: 0,Year,StateAbbr,StateDesc,LocationName,DataSource,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,...,Data_Value_Footnote,Low_Confidence_Limit,High_Confidence_Limit,TotalPopulation,Geolocation,LocationID,CategoryID,MeasureId,DataValueTypeID,Short_Question_Text
3,2021,AK,Alaska,Lakes,BRFSS,Health Outcomes,Obesity among adults aged >=18 years,%,Crude prevalence,36.7,...,,32.5,41.2,8364,POINT (-149.3066764 61.60526948),242832,HLTHOUT,OBESITY,CrdPrv,Obesity


# Create base map of parks

In [21]:
# Tile options: "Cartodb Positron"   |   "Stadia.StamenWatercolor"
# "Stadia.StamenTerrain"   |   "CyclOSM"   |   "Esri.WorldImagery"
# "Esri.NatGeoWorldMap"   |   "USGS.USImagery"

In [22]:
# us center point: 44 58 N, 103°46'W
us_map = folium.Map(tiles = "Cartodb Positron",
                     location = (52.97, -105.77),
                     zoom_start = 3)

In [57]:
names_with_mult = []
for i, r in usa_parks.loc[usa_parks['NAME'].str.contains('Cumberland')].groupby('NAME').count().iterrows():
    if r['FEATTYPE'] > 1:
#         polygons = [poly1[0], poly1[1], poly2[0], poly2[1]]
#         boundary = gpd.GeoSeries(cascaded_union(polygons))
#         boundary.plot(color = 'red')
        names_with_mult.append(i)
#     print(i, r['FEATTYPE'])
# for _, r in usa_parks.loc[usa_parks['NAME'].str.contains('Cumberland')].iterrows():

In [118]:
for park in names_with_mult:
    polygons = []
    idxs = []
    for i, r in usa_parks.loc[usa_parks['NAME'].str.contains('Cumberland')].iterrows():
        if r['NAME'] == park:
            polygons.append(r['geometry'])
            idxs.append(i) #     get indexes to drop
            row = r
            
    boundary = gpd.GeoSeries(unary_union(polygons)) # Create new boundary
    usa_parks = usa_parks.drop(idxs) #     Drop old rows
    usa_parks = pd.concat([usa_parks,       # Add new row with combined polygon
                            pd.DataFrame(index=(usa_parks.index.max()+1,),
                                         data = {
                                row.index[0]: row.values[0],  
                                row.index[1]: row.values[1], 
                                row.index[2]: row.values[2], 
                                row.index[3]: row.values[3], 
                                row.index[4]: row.values[4], 
                                row.index[5]: row.values[5],
                                row.index[6]: boundary[0]
                            })]
)
    
#     print("***\n", park, polygons, "*\n", idxs)

In [119]:
usa_parks.loc[usa_parks['NAME'].str.contains('Cumberland')].groupby('NAME').count()

Unnamed: 0_level_0,FEATTYPE,MNFC,SQMI,Shape_Length,Shape_Area,geometry
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Cumberland Bay State Park,1,1,1,1,1,1
Cumberland Falls State Resort Park,1,1,1,1,1,1
Cumberland Gap National Historical Park,1,1,1,1,1,1
Cumberland Greenbelt Park,1,1,1,1,1,1
Cumberland Island National Seashore,1,1,1,1,1,1
Cumberland Marsh State Natural Area Preserve,1,1,1,1,1,1
Cumberland Mountain State Park,1,1,1,1,1,1
Cumberland Park,1,1,1,1,1,1
Cumberland State Forest,1,1,1,1,1,1
Lake Cumberland State Resort Park,1,1,1,1,1,1


In [48]:
print(mult.keys)

<built-in method keys of dict object at 0x00000220DC31C680>


In [120]:
for _, r in usa_parks.loc[usa_parks['NAME'].str.contains('Cumberland')].iterrows():
    # Without simplifying the representation of each borough,
    # the map might not be displayed
#     sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.0005) # Set tolerance 
#     geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=r['geometry'], style_function=lambda x: {"fillColor": c['green'],
                                                                 'color':c['darkgreen'],
                                                                'weight': 0.5})
    folium.Popup(r["NAME"]).add_to(geo_j)
    geo_j.add_to(us_map)

In [121]:
us_map