# Imports

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import folium
from shapely.ops import unary_union
import matplotlib.pyplot as plt
from shapely import wkt

# Constants

In [50]:
CRS = 4326
CRS_METERS = 32616
COLORS = {
    'green': '#83bca9',
    'darkgreen': '#3e5641'
}

## Load in USA Parks data

In [29]:
usa_parks = gpd.read_file("../data/USA_Parks/v10/park_dtl.gdb/")
usa_parks = usa_parks.to_crs(epsg=CRS) # convert to const CRS value (32616)

## Load US State Boundaries

In [30]:
us_states = pd.read_csv("../data/United_States_Boundary_Files.csv")
us_states['geometry'] = us_states['the_geom'].apply(wkt.loads) # create geometry column with safety
us_states_gdf = gpd.GeoDataFrame(us_states, crs = CRS) # convert to GeoPd DF with const CRS value (4326)

### Find parks only in or bordering TN

In [31]:
tn_parks = gpd.sjoin(usa_parks, # sjoin to find the parks that 'intersect' or are in and on the border of TN
                     us_states_gdf.loc[us_states_gdf['STUSPS'] == 'TN'],
                     predicate = 'intersects')

tn_parks['name'] = tn_parks['NAME_left'] # rename name column
tn_parks = tn_parks[['name', 'FEATTYPE', 'SQMI', 'geometry']] # keep only neccessary columns

In [128]:
tn_parks.to_file('../data/tn_parks.geojson', driver='GeoJSON')

# Create a map of the TN parks

In [6]:
# tn_map = folium.Map(tiles = "Cartodb Positron",
#                      location = (36, -86.1),
#                      zoom_start = 7,
#                      prefer_canvas=True)

In [7]:
# add feature group
# tn_layer = folium.FeatureGroup(name = 'Tennessee Parks', show = True)

# for i, park in tn_parks.iterrows():    # iterate throug rows of TN parks GeoDF
#     textbox = folium.Popup(
#             park['name']
#     )
    
#     folium.GeoJson(
#             park['geometry'],
#             popup = textbox,
#             style_function=lambda x: {"fillColor": c['green'],
#                                                                  'color':COLORS['darkgreen'],
#                                                                 'weight': 0.5}).add_to(tn_layer)

# tn_layer.add_to(tn_map)
# folium.LayerControl().add_to(tn_map)

# Load in Places Data

In [135]:
places_df = pd.read_csv('../data/PLACES__Local_Data_for_Better_Health__Place_Data_2023_release_20240504.csv')
places_tact = pd.read_csv('../data/PLACES__Local_Data_for_Better_Health__Census_Tract_Data_2023_release_20240504.csv')
data_dict = pd.read_csv('../data/PLACES_and_500_Cities__Data_Dictionary_20240504.csv')

#### Create places geopandas DF 

In [136]:
places_df['geometry'] = places_df['Geolocation'].apply(wkt.loads) # create geometry column with safety
places_gdf = gpd.GeoDataFrame(places_df, crs = CRS) # convert to GeoPd DF with const CRS value (4326)
places_tact['geometry'] = places_df['Geolocation'].apply(wkt.loads) # create geometry column with safety
tact_gdf = gpd.GeoDataFrame(places_df, crs = CRS) # convert to GeoPd DF with const CRS value (4326)

### Find Places data only in TN and filter out unneccessary columns

In [37]:
tn_places = places_gdf.loc[places_gdf['StateAbbr'] == 'TN'][['Year',
                                                             'Category', 'Measure',
                                                             'Data_Value', 'TotalPopulation',
                                                            'geometry', 'LocationID', 'MeasureId']]

In [141]:
tn_tact = tact_gdf.loc[tact_gdf['StateAbbr'] == 'TN'][['Year',
                                                             'Category', 'Measure',
                                                             'Data_Value', 'TotalPopulation',
                                                            'geometry', 'LocationID', 'MeasureId']]

In [132]:
tn_places.to_file('../data/tn_health_points.geojson', driver='GeoJSON')

In [142]:
tn_tact.to_file('../data/tn_tact.geojson', driver='GeoJSON')

####  Find the min distance between each Health data point and the nearest park. Save as CSV

In [38]:
# reproject to get distances in meters
# tn_health_loc = tn_places[['LocationID', 'geometry']].to_crs(epsg=CRS_METERS) # create smaller Df to iterate through
# tn_parks_loc = tn_parks[['name', 'geometry']].to_crs(epsg=CRS_METERS) # create smaller Df to iterate through

In [40]:
# min_dist = [] # empty list to hold the min distances to create a new column out of
# for i, r_health in tn_health_loc.iterrows(): # iterate through each row of the TN health data points
#     distances = [] # create empty list to hold the distnace from the 
#                    # health point and each state park boundary
#     for idx, r_park in tn_parks_loc.iterrows(): # iterate though each row of the TN parks data
#         distances.append(r_health['geometry'].distance(r_park['geometry'])) # add the distance from 
#                                                                             # the health point top
#                                                                             # each park to the list
#     min_dist.append(np.min(distances)) # find the distance from the health point to the nearest park

# tn_health_loc['distance'] = min_dist # create new column in the TN health points DF
# tn_health_loc.to_csv('../data/TN_health_data_with_distances_to_parks.csv') # save as csv for future recals

# Load in TN Helath Distances CSV

In [41]:
tn_health_distances = pd.read_csv('../data/TN_health_data_with_distances_to_parks.csv')

In [56]:
tn_places = tn_places.to_crs(epsg=CRS_METERS)
tn_places_distances = tn_places.merge(right = tn_health_distances[['LocationID', 'distance']], on='LocationID', how='inner')

#### Measures to Focus on

In [126]:
# list of measures to focus on
measures = ['CHD',
           'CASTHMA',
           'ACCESS2',
           'DEPRESSION',
           'DIABETES',
           'BPHIGH',
           'OBESITY',
           'STROKE']

#### Filter the DF by measures

In [79]:
tn_places_distances = tn_places_distances.loc[tn_places_distances['MeasureId'].isin(measures)]

In [80]:
tn_places_distances['distance'].agg(['min', 'max'])

min        0.000000
max    52456.962023
Name: distance, dtype: float64

In [120]:
tn_places_distances.to_csv('../data/TN_Health_data_cleaned.csv')

In [77]:
tn_places_distances.groupby('MeasureId').count()

Unnamed: 0_level_0,Year,Category,Measure,Data_Value,TotalPopulation,geometry,LocationID,distance
MeasureId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ACCESS2,63492,63492,63492,63344,63492,63492,63492,63492
BPHIGH,63492,63492,63492,63344,63492,63492,63492,63492
CANCER,63492,63492,63492,63344,63492,63492,63492,63492
CASTHMA,63492,63492,63492,63344,63492,63492,63492,63492
CHD,63492,63492,63492,63344,63492,63492,63492,63492
CHOLSCREEN,63492,63492,63492,63344,63492,63492,63492,63492
DEPRESSION,63492,63492,63492,63344,63492,63492,63492,63492
DIABETES,63492,63492,63492,63344,63492,63492,63492,63492
GHLTH,63492,63492,63492,63344,63492,63492,63492,63492
OBESITY,63492,63492,63492,63344,63492,63492,63492,63492


In [85]:
measure_dfs = {}
for measure in measures:
    new_df = tn_places_distances.loc[tn_places_distances['MeasureId'] == measure]

    measure_dfs[f"{measure.lower()}_tn_df"] = new_df

In [102]:
cancer_tn_groupby = measure_dfs['cancer_tn_df'][['distance', 'Data_Value']].groupby(pd.cut(measure_dfs['cancer_tn_df']['distance'],
                                                                                           20))['Data_Value']

  cancer_tn_groupby = measure_dfs['cancer_tn_df'][['distance', 'Data_Value']].groupby(pd.cut(measure_dfs['cancer_tn_df']['distance'], 20))['Data_Value']


In [133]:
def create_3groups(x):
        if x <= 5000:
            return 'close'
        elif x <= 15000:
            return 'medium'
        else:
            return 'far'
        
def create_5groups(x):
        if x <= 2000:
            return '2k'
        elif x <= 5000:
            return '5k'
        elif x <= 10000:
            return '10k'
        elif x <= 20000:
            return '20k'
        else:
            return 'far'

In [134]:
# create distance groups
tn_places_distances['distance_category3'] = tn_places_distances['distance'].apply(lambda r: create_3groups(r))
tn_places_distances['distance_category5'] = tn_places_distances['distance'].apply(lambda r: create_5groups(r))

tn_places_distances.to_csv('../data/TN_Health_data_cleaned.csv')

In [None]:
# for key in measure_dfs_keys:
#     measure_dfs[key][['distance',
#                       'Data_Value']].groupby(pd.cut(measure_dfs['cancer_tn_df']['distance'],
#                                                                         20))['Data_Value']

In [None]:
walkability = 