This script took the preprocessed social explorer data and horizontally merged it so that it could be used to generate the visualization used in our final report, as well as a tool to validate/view both our underlying data, and LLM results.

Towards the end of the script you can modify the hm_category variable to be any of the column names printed out in the cell bellow to generate a heatmap highlighting the particular feature you want to visualize.

In order to get proper borders you need to download shape files:
https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html

In [86]:
import pandas as pd

demographic_df = pd.read_pickle('../data/social_explorer_processed/demographic_df.pkl')
demographic_df.drop('Qualifying Name', axis=1, inplace=True)
demographic_df = demographic_df.dropna()
demographic_df = demographic_df.reset_index(drop=True)

environment_df = pd.read_pickle('../data/social_explorer_processed/environment_df.pkl')
environment_df = environment_df.dropna()
environment_df = environment_df.reset_index(drop=True)

housing_df = pd.read_pickle('../data/social_explorer_processed/housing_df.pkl')
housing_df.drop('Qualifying Name', axis=1, inplace=True)
housing_df = housing_df.dropna()
housing_df = housing_df.reset_index(drop=True)
# Need to convert to int any of these columns you want to filter on, here is one example (strings for QA pairs that's why we do this here)
housing_df['Median Family Income (5-year ACS)'] = housing_df['Median Family Income (5-year ACS)'].astype(int) 

crime_df = pd.read_pickle('../data/social_explorer_processed/crime_df.pkl')
crime_df.drop('Qualifying Name', axis=1, inplace=True)
crime_df = crime_df.dropna()
crime_df = crime_df.reset_index(drop=True)


merged_df = pd.merge(environment_df, demographic_df, on='FIPS', how='left')
merged_df = pd.merge(merged_df, housing_df, on='FIPS', how='left')
merged_df = pd.merge(merged_df, crime_df, on='FIPS', how='left')
merged_df.fillna(0, inplace=True)
merged_df.columns

Index(['FIPS', 'Qualifying Name', 'Total Area', 'Open Water %', 'Ice/Snow %',
       'Developed Open Space %', 'Developed Low Intensity %',
       'Developed Medium Intensity %', 'Developed High Intensity %',
       'Barren Land %', 'Deciduous Forest %', 'Evergreen Forest %',
       'Mixed Forest %', 'Shrub/Scrub %', 'Grassland/Herbaceous %',
       'Pasture/Hay %', 'Cultivated Crops %', 'Woody Wetlands %',
       'Emergent Herbaceous Wetlands %', '2020 Total Population',
       '2020 Population Density', 'White 2020 %',
       'African American or Black 2020 %',
       'American Indian and Alaska Native 2020 %', 'Asian 2020 %',
       'Native Hawaiian and Other Pacific Islander 2020 %', 'Other 2020 %',
       'Two or More Races 2020 %', 'Hispanic 2020 %',
       'Median Family Income (5-year ACS)', 'Fair Market Rent for One Bedroom',
       'Fair Market Rent for Two Bedrooms',
       'Fair Market Rent for Three Bedrooms',
       'Fair Market Rent for Four Bedrooms',
       'Total Viol

In [91]:
import geopandas as gpd
import folium
from folium import LinearColormap

df = merged_df
df['FIPS'] = df['FIPS'].astype(str)

county_shapefile_path = '../mapVis/county_shape_files/cb_2018_us_county_500k.shp'
county_geo_data = gpd.read_file(county_shapefile_path)

if 'GEOID' in county_geo_data.columns:
    county_geo_data['FIPS'] = county_geo_data['GEOID'].astype(str)
elif 'FIPS' in county_geo_data.columns:
    county_geo_data['FIPS'] = county_geo_data['FIPS'].astype(str)
else:
    raise ValueError("FIPS column not found in county shapefile")

merged_county_data = county_geo_data.merge(df, on='FIPS')
state_shapefile_path = '../mapVis/state_shape_files/cb_2018_us_state_500k.shp'
state_geo_data = gpd.read_file(state_shapefile_path)


def gen_vis(hm_category):
    min_value = merged_county_data[hm_category].min()
    max_value = merged_county_data[hm_category].max()
    
    color_scale = LinearColormap(
        colors=['green', 'yellow', 'red'],
        index=[min_value, (min_value + max_value) / 2, max_value],
        vmin=min_value,
        vmax=max_value,
        caption=hm_category,
    )
    
    m = folium.Map(location=[32.806671, -86.791130], zoom_start=7)
    
    folium.GeoJson(
        state_geo_data,
        name='State Boundaries',
        style_function=lambda feature: {
            'color': 'black',
            'weight': 3,
            'fillOpacity': 0 
        }
    ).add_to(m)
    
    def style_function(feature):
        fips = feature['properties']['FIPS']
        rate = merged_county_data.loc[merged_county_data['FIPS'] == fips, hm_category].values[0]
        color = color_scale(rate)
        return {
            'fillColor': color,
            'color': 'black',
            'weight': 1,
            'fillOpacity': 0.7
        }
    
    folium.GeoJson(
        merged_county_data.__geo_interface__,
        name='County Boundaries',
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(
            fields=['NAME', 'Total Area', 'Open Water %', 'Ice/Snow %',
           'Developed Open Space %', 'Developed Low Intensity %',
           'Developed Medium Intensity %', 'Developed High Intensity %',
           'Barren Land %', 'Deciduous Forest %', 'Evergreen Forest %',
           'Mixed Forest %', 'Shrub/Scrub %', 'Grassland/Herbaceous %',
           'Pasture/Hay %', 'Cultivated Crops %', 'Woody Wetlands %',
           'Emergent Herbaceous Wetlands %', '2020 Total Population',
           '2020 Population Density', 'White 2020 %',
           'African American or Black 2020 %',
           'American Indian and Alaska Native 2020 %', 'Asian 2020 %',
           'Native Hawaiian and Other Pacific Islander 2020 %', 'Other 2020 %',
           'Two or More Races 2020 %', 'Hispanic 2020 %',
           'Total Violent and Property Crimes %','Median Family Income (5-year ACS)', 'Fair Market Rent for One Bedroom',
       'Fair Market Rent for Two Bedrooms',
       'Fair Market Rent for Three Bedrooms',
       'Fair Market Rent for Four Bedrooms'],
            aliases=['County:', 'Total Area', 'Open Water %', 'Ice/Snow %',
           'Developed Open Space %', 'Developed Low Intensity %',
           'Developed Medium Intensity %', 'Developed High Intensity %',
           'Barren Land %', 'Deciduous Forest %', 'Evergreen Forest %',
           'Mixed Forest %', 'Shrub/Scrub %', 'Grassland/Herbaceous %',
           'Pasture/Hay %', 'Cultivated Crops %', 'Woody Wetlands %',
           'Emergent Herbaceous Wetlands %', '2020 Total Population',
           '2020 Population Density', 'White 2020 %',
           'African American or Black 2020 %',
           'American Indian and Alaska Native 2020 %', 'Asian 2020 %',
           'Native Hawaiian and Other Pacific Islander 2020 %', 'Other 2020 %',
           'Two or More Races 2020 %', 'Hispanic 2020 %',
           'Total Violent and Property Crimes %','Median Family Income (5-year ACS)', 'Fair Market Rent for One Bedroom',
       'Fair Market Rent for Two Bedrooms',
       'Fair Market Rent for Three Bedrooms',
       'Fair Market Rent for Four Bedrooms'],
            localize=True
        )
    ).add_to(m)
    
    color_scale.add_to(m)
    folium.LayerControl().add_to(m)
    m.save('../mapVis/median_family_example.html')

# Enter the Heat Map Category you wish to visualize bellow, here are a few examples:
# hm_category = 'Total Violent and Property Crimes %'
# hm_category = 'White 2020 %'
# hm_category = 'Asian 2020 %'
# hm_category = 'African American or Black 2020 %'
hm_category = 'Median Family Income (5-year ACS)' 
gen_vis(hm_category)