In [1]:
import geopandas as gpd
import pandas as pd
import folium

In [2]:
# loading in US census data
us_data_df = pd.read_csv('US_DATA.csv', low_memory=False)
us_data_df['NAME'] = us_data_df['NAME'].astype(str)

In [3]:
# Here's how I mapped the the columns with their aliases
# that are easier to understand and I could then pick which columns to select
us_columns_df = pd.read_csv('US_COLUMNS.csv', low_memory=False)
column_aliases = dict(zip(us_columns_df['Column Name'], us_columns_df['Label']))

print("Matched up:")
for col_name, label in list(column_aliases.items()):  
    print(f"{col_name}: {label}")

Matched up:
GEO_ID: Geography
NAME: Geographic Area Name
DP1_0001C: Count!!SEX AND AGE!!Total population
DP1_0002C: Count!!SEX AND AGE!!Total population!!Under 5 years
DP1_0003C: Count!!SEX AND AGE!!Total population!!5 to 9 years
DP1_0004C: Count!!SEX AND AGE!!Total population!!10 to 14 years
DP1_0005C: Count!!SEX AND AGE!!Total population!!15 to 19 years
DP1_0006C: Count!!SEX AND AGE!!Total population!!20 to 24 years
DP1_0007C: Count!!SEX AND AGE!!Total population!!25 to 29 years
DP1_0008C: Count!!SEX AND AGE!!Total population!!30 to 34 years
DP1_0009C: Count!!SEX AND AGE!!Total population!!35 to 39 years
DP1_0010C: Count!!SEX AND AGE!!Total population!!40 to 44 years
DP1_0011C: Count!!SEX AND AGE!!Total population!!45 to 49 years
DP1_0012C: Count!!SEX AND AGE!!Total population!!50 to 54 years
DP1_0013C: Count!!SEX AND AGE!!Total population!!55 to 59 years
DP1_0014C: Count!!SEX AND AGE!!Total population!!60 to 64 years
DP1_0015C: Count!!SEX AND AGE!!Total population!!65 to 69 years
DP

In [4]:
# loading in the shapefile
shapefile_path = 'tl_2023_us_county/tl_2023_us_county.shp'
gdf_counties = gpd.read_file(shapefile_path)

In [5]:
regions = {
    "North East": ['09', '23', '25', '33', '44', '50', '34', '36', '42'],
    "Midwest": ['17', '18', '26', '39', '55', '19', '20', '27', '29', '31', '38', '46'],
    "South Atlantic": ['10', '12', '13', '24', '37', '45', '51', '54','11'],
    "South Central":  ['01', '21', '28', '47', '05', '22', '40', '48'],
    "West": ['04', '08', '32', '35', '49', '02', '56', '06', '15', '41', '53','30', '16']
}

# Create separate DataFrames for each region
gdf_north_east = gdf_counties[gdf_counties['STATEFP'].isin(regions["North East"])]
gdf_midwest = gdf_counties[gdf_counties['STATEFP'].isin(regions["Midwest"])]
gdf_south_atlantic = gdf_counties[gdf_counties['STATEFP'].isin(regions["South Atlantic"])]
gdf_south_central = gdf_counties[gdf_counties['STATEFP'].isin(regions["South Central"])]
gdf_west = gdf_counties[gdf_counties['STATEFP'].isin(regions["West"])]

# Check the DataFrames
print(f"North East Counties: {gdf_north_east.shape[0]} counties")
print(f"Midwest Counties: {gdf_midwest.shape[0]} counties")
print(f"South Atlantic Counties: {gdf_south_atlantic.shape[0]} counties")
print(f"South Central Counties: {gdf_south_central.shape[0]} counties")
print(f"West Counties: {gdf_west.shape[0]} counties")

# Optionally save them as separate files (if needed)
gdf_north_east.to_file('north_east_counties.shp')
gdf_midwest.to_file('midwest_counties.shp')
gdf_south_atlantic.to_file('south_counties.shp')
gdf_south_central.to_file('south_counties.shp')
gdf_west.to_file('west_counties.shp')

North East Counties: 218 counties
Midwest Counties: 1055 counties
South Atlantic Counties: 588 counties
South Central Counties: 834 counties
West Counties: 449 counties


In [6]:
##################### NORTH EAST

# only keeping name, geoid, and geometry from the shapefile df
gdf_north_east = gdf_north_east[['NAME', 'GEOIDFQ', 'geometry']]
# renaming the name to NAME: 'County' to make it clearer when mapping
gdf_north_east = gdf_north_east.rename(columns={'NAME': 'County'})

# merging the dataframes on the GEOID
north_gdf_merged = gdf_north_east.merge(us_data_df, left_on='GEOIDFQ', right_on='GEO_ID', how='left')

# Renaming 'NAME_x' to 'County' and remove 'NAME_y' if it exists
north_gdf_merged = north_gdf_merged.rename(columns={'NAME_x': 'County'})
north_gdf_merged = north_gdf_merged.drop(columns=['NAME_y'], errors='ignore') 


##################### MIDWEST
gdf_midwest = gdf_midwest[['NAME', 'GEOIDFQ', 'geometry']]
gdf_midwest = gdf_midwest.rename(columns={'NAME': 'County'})
midwest_gdf_merged = gdf_midwest.merge(us_data_df, left_on='GEOIDFQ', right_on='GEO_ID', how='left')

midwest_gdf_merged = midwest_gdf_merged.rename(columns={'NAME_x': 'County'})
midwest_gdf_merged = midwest_gdf_merged.drop(columns=['NAME_y'], errors='ignore') 


##################### SOUTH ATLANTIC
gdf_south_atlantic = gdf_south_atlantic[['NAME', 'GEOIDFQ', 'geometry']]
gdf_south_atlantic = gdf_south_atlantic.rename(columns={'NAME': 'County'})
south_atlantic_gdf_merged = gdf_south_atlantic.merge(us_data_df, left_on='GEOIDFQ', right_on='GEO_ID', how='left')

south_atlantic_gdf_merged = south_atlantic_gdf_merged.rename(columns={'NAME_x': 'County'})
south_atlantic_gdf_merged = south_atlantic_gdf_merged.drop(columns=['NAME_y'], errors='ignore') 


##################### SOUTH CENTRAL
gdf_south_central = gdf_south_central[['NAME', 'GEOIDFQ', 'geometry']]
gdf_south_central = gdf_south_central.rename(columns={'NAME': 'County'})
south_central_gdf_merged = gdf_south_central.merge(us_data_df, left_on='GEOIDFQ', right_on='GEO_ID', how='left')

south_central_gdf_merged = south_central_gdf_merged.rename(columns={'NAME_x': 'County'})
south_central_gdf_merged = south_central_gdf_merged.drop(columns=['NAME_y'], errors='ignore') 

##################### MIDWEST
gdf_west = gdf_west[['NAME', 'GEOIDFQ', 'geometry']]
gdf_west = gdf_west.rename(columns={'NAME': 'County'})
west_gdf_merged = gdf_west.merge(us_data_df, left_on='GEOIDFQ', right_on='GEO_ID', how='left')

west_gdf_merged = west_gdf_merged.rename(columns={'NAME_x': 'County'})
west_gdf_merged = west_gdf_merged.drop(columns=['NAME_y'], errors='ignore') 

In [8]:
selected_housing_occupancy = [
    'DP1_0148P', 'DP1_0149P', 'DP1_0150P', 'DP1_0151P', 'DP1_0152P', 
    'DP1_0153P', 'DP1_0154P', 'DP1_0155P'
]  

selected_sex_age = [
    'DP1_0002P', 'DP1_0003P', 'DP1_0004P', 'DP1_0005P', 'DP1_0006P', 
    'DP1_0007P', 'DP1_0008P', 'DP1_0009P', 'DP1_0010P', 'DP1_0011P', 
    'DP1_0012P', 'DP1_0013P', 'DP1_0014P', 'DP1_0015P', 'DP1_0016P', 
    'DP1_0017P', 'DP1_0018P', 'DP1_0019P', 'DP1_0020P', 'DP1_0021P', 
    'DP1_0022P', 'DP1_0023P', 'DP1_0024P'
]
selected_race = [
    'DP1_0078P', 'DP1_0079P', 'DP1_0080P', 'DP1_0081P', 
    'DP1_0082P', 'DP1_0083P', 'DP1_0084P', 'DP1_0096P'
]
selected_housing_tenure = ['DP1_0159P', 'DP1_0160P']

descriptions = us_data_df.loc[0]

def simplify_column_name(column_name):
    description = descriptions[column_name]
    if 'Percent!!' in description:
        parts = description.split('!!')
        return f"Percent {parts[-1]}"  # Return 'Percent' followed by the last part
    return column_name  # If it doesn't match, return the original name

# Apply simplification to the selected columns
simplified_selected_housing_occupancy = [simplify_column_name(col) for col in selected_housing_occupancy]
simplified_selected_sex_age = [simplify_column_name(col) for col in selected_sex_age]
simplified_selected_race = [simplify_column_name(col) for col in selected_race]
simplified_selected_housing_tenure = [simplify_column_name(col) for col in selected_housing_tenure]

# Print the simplified names for verification
#print("Simplified Housing Occupancy Names:", simplified_selected_housing_occupancy)
#print("Simplified Sex and Age Names:", simplified_selected_sex_age)
#print("Simplified Race Names:", simplified_selected_race)
#print("Simplified Housing Tenure Names:", simplified_selected_housing_tenure)

In [9]:
# converting all of them to floats (previously they are strings)
selected_columns = selected_housing_occupancy + selected_sex_age + selected_race + selected_housing_tenure

for col in selected_columns:
    north_gdf_merged[col] = pd.to_numeric(north_gdf_merged[col], errors='coerce').astype('float32')
    
# List of columns to keep and drop all others
columns_to_keep = ['County', 'GEOIDFQ', 'GEO_ID', 'NAME','geometry'] + selected_housing_occupancy + selected_sex_age + selected_race + selected_housing_tenure
north_gdf_merged = north_gdf_merged.drop(columns=[col for col in north_gdf_merged.columns if col not in columns_to_keep])

# Verify the result
#print(north_gdf_merged.head())
#print(north_gdf_merged.columns)

##################### MIDWEST
for col in selected_columns:
    midwest_gdf_merged[col] = pd.to_numeric(midwest_gdf_merged[col], errors='coerce').astype('float32')
columns_to_keep = ['County', 'NAME','geometry'] + selected_housing_occupancy + selected_sex_age + selected_race + selected_housing_tenure
midwest_gdf_merged = midwest_gdf_merged.drop(columns=[col for col in midwest_gdf_merged.columns if col not in columns_to_keep])

##################### SOUTH ATLANTIC
for col in selected_columns:
    south_atlantic_gdf_merged[col] = pd.to_numeric(south_atlantic_gdf_merged[col], errors='coerce').astype('float32')
columns_to_keep = ['County', 'NAME','geometry'] + selected_housing_occupancy + selected_sex_age + selected_race + selected_housing_tenure
south_atlantic_gdf_merged = south_atlantic_gdf_merged.drop(columns=[col for col in south_atlantic_gdf_merged.columns if col not in columns_to_keep])

##################### SOUTH CENTRAL
for col in selected_columns:
    south_central_gdf_merged[col] = pd.to_numeric(south_central_gdf_merged[col], errors='coerce').astype('float32')
columns_to_keep = ['County', 'NAME','geometry'] + selected_housing_occupancy + selected_sex_age + selected_race + selected_housing_tenure
south_central_gdf_merged = south_central_gdf_merged.drop(columns=[col for col in south_central_gdf_merged.columns if col not in columns_to_keep])

##################### WEST
for col in selected_columns:
    west_gdf_merged[col] = pd.to_numeric(west_gdf_merged[col], errors='coerce').astype('float32')
columns_to_keep = ['County', 'NAME','geometry'] + selected_housing_occupancy + selected_sex_age + selected_race + selected_housing_tenure
west_gdf_merged = west_gdf_merged.drop(columns=[col for col in west_gdf_merged.columns if col not in columns_to_keep])

In [10]:
# Function to define color logic for Housing Occupancy (or other categories)
def get_occupancy_color(occupancy_percentage):
    try:
        # Ensure it's a float, and handle NaN values gracefully
        if pd.isna(occupancy_percentage):
            return 'gray'  # Default color for NaN values
        occupancy_percentage = float(occupancy_percentage)  # Ensure it's a float
        if occupancy_percentage < 50:
            return 'red'  # Low occupancy
        elif occupancy_percentage < 75:
            return 'yellow'  # Moderate occupancy
        else:
            return 'green'  # High occupancy
    except ValueError:
        return 'gray'  # Default color for non-numeric or missing values
    

def get_age_color(age_percentage):
    try:
        # Ensure it's a float, and handle NaN values gracefully
        if pd.isna(age_percentage):
            return 'gray'  # Default color for NaN values
        age_percentage = float(age_percentage)  # Ensure it's a float
        if age_percentage < 20:
            return 'green'  # Low age group (example: younger population)
        elif age_percentage < 25:
            return 'yellow'  # Moderate age group
        else:
            return 'red'  # High age group (older population)
    except ValueError:
        return 'gray'  # Default color for non-numeric or missing values

# Function to define color logic for Race
def get_race_color(race_percentage):
    try:
        if pd.isna(race_percentage):
            return 'gray'  # Default color for NaN values
        race_percentage = float(race_percentage)
        if race_percentage > 85:
            return 'red'  # Example color for a race group
        elif race_percentage > 65:
            return 'yellow'
        else:
            return 'green'  # Example color for another race group
    except ValueError:
        return 'gray'  # Default color for non-numeric or missing values

    
# Function to define color logic for Housing Tenure
def get_tenure_color(tenure_percentage):
    try:
        if pd.isna(tenure_percentage):
            return 'gray'  # Default color for NaN values
        tenure_percentage = float(tenure_percentage)
        if tenure_percentage < 50:
            return 'red'  # Example for low homeownership
        elif tenure_percentage < 75:
            return 'yellow'  # Moderate homeownership
        else:
            return 'green'  # High homeownership
    except ValueError:
        return 'gray'  # Default color for non-numeric or missing values

In [11]:
north_gdf_merged['occupancy_color'] = north_gdf_merged['DP1_0148P'].apply(get_occupancy_color)
north_gdf_merged['age_color'] = north_gdf_merged['DP1_0023P'].apply(get_age_color)
north_gdf_merged['race_color'] = north_gdf_merged['DP1_0078P'].apply(get_race_color)
north_gdf_merged['tenure_color'] = north_gdf_merged['DP1_0159P'].apply(get_tenure_color)

midwest_gdf_merged['occupancy_color'] = midwest_gdf_merged['DP1_0148P'].apply(get_occupancy_color)
midwest_gdf_merged['age_color'] = midwest_gdf_merged['DP1_0023P'].apply(get_age_color)
midwest_gdf_merged['race_color'] = midwest_gdf_merged['DP1_0078P'].apply(get_race_color)
midwest_gdf_merged['tenure_color'] = midwest_gdf_merged['DP1_0159P'].apply(get_tenure_color)

south_atlantic_gdf_merged['occupancy_color'] = south_atlantic_gdf_merged['DP1_0148P'].apply(get_occupancy_color)
south_atlantic_gdf_merged['age_color'] = south_atlantic_gdf_merged['DP1_0023P'].apply(get_age_color)
south_atlantic_gdf_merged['race_color'] = south_atlantic_gdf_merged['DP1_0078P'].apply(get_race_color)
south_atlantic_gdf_merged['tenure_color'] = south_atlantic_gdf_merged['DP1_0159P'].apply(get_tenure_color)

south_central_gdf_merged['occupancy_color'] = south_central_gdf_merged['DP1_0148P'].apply(get_occupancy_color)
south_central_gdf_merged['age_color'] = south_central_gdf_merged['DP1_0023P'].apply(get_age_color)
south_central_gdf_merged['race_color'] = south_central_gdf_merged['DP1_0078P'].apply(get_race_color)
south_central_gdf_merged['tenure_color'] = south_central_gdf_merged['DP1_0159P'].apply(get_tenure_color)

west_gdf_merged['occupancy_color'] = west_gdf_merged['DP1_0148P'].apply(get_occupancy_color)
west_gdf_merged['age_color'] = west_gdf_merged['DP1_0023P'].apply(get_age_color)
west_gdf_merged['race_color'] = west_gdf_merged['DP1_0078P'].apply(get_race_color)
west_gdf_merged['tenure_color'] = west_gdf_merged['DP1_0159P'].apply(get_tenure_color)

In [15]:
# Function to add a layer to the map with the color column for styling
def add_layer(data, layer_name, columns, color_column, show=False):
    layer = folium.FeatureGroup(name=layer_name, show=show)
    tooltip = folium.GeoJsonTooltip(
        fields=['County'] + columns,
        aliases=['County:'] + [simplify_column_name(col) for col in columns],  
        localize=True
    )
    
    folium.GeoJson(
        data=data,
        tooltip=tooltip,
        style_function=lambda feature: {
            'fillColor': feature['properties'][color_column],  # Use the color column for styling
            'color': 'black',
            'weight': .7,
            'fillOpacity': .5,
        }
    ).add_to(layer)
    
    return layer

In [None]:
# NORTHEAST OCCUPANCY
m_occupancy = folium.Map(location=[41.2033, -73.0877], zoom_start=6)
m_occupancy.add_child(add_layer(north_gdf_merged, 'Housing Occupancy', selected_housing_occupancy, 'occupancy_color', show=True))
folium.LayerControl().add_to(m_occupancy)
m_occupancy.save('northeast_housing_occupancy_map.html')

In [None]:
# NORTHEAST AGE
m_age = folium.Map(location=[41.2033, -73.0877], zoom_start=6)
m_age.add_child(add_layer(north_gdf_merged, 'Age Group', selected_sex_age, 'age_color', show=True))
folium.LayerControl().add_to(m_age)
m_age.save('northeast_age_map.html')

In [None]:
# NORTHEAST RACE
m_race = folium.Map(location=[41.2033, -73.0877], zoom_start=6)
m_race.add_child(add_layer(north_gdf_merged, 'Race', selected_race, 'race_color', show=True))
folium.LayerControl().add_to(m_race)
m_race.save('northeast_race_map.html')

In [None]:
# NORTHEAST HOUSING TENURE
m_tenure = folium.Map(location=[41.2033, -73.0877], zoom_start=6)
m_tenure.add_child(add_layer(north_gdf_merged, 'Housing Tenure', selected_housing_tenure, 'tenure_color', show=True))
folium.LayerControl().add_to(m_tenure)
m_tenure.save('northeast_housing_tenure_map.html')

In [None]:
# MIDWEST OCCUPANCY
m_occupancy = folium.Map(location=[41.5932, -87.4071],zoom_start=5)
m_occupancy.add_child(add_layer(midwest_gdf_merged, 'Housing Occupancy', selected_housing_occupancy, 'occupancy_color', show=True))
folium.LayerControl().add_to(m_occupancy)
m_occupancy.save('midwest_housing_occupancy_map.html')

In [None]:
# MIDWEST AGE
m_age = folium.Map(location=[41.5932, -87.4071],zoom_start=5)
m_age.add_child(add_layer(midwest_gdf_merged, 'Age Group', selected_sex_age, 'age_color', show=True))
folium.LayerControl().add_to(m_age)
m_age.save('midwest_age_map.html')

In [None]:
# MIDWEST RACE
m_race = folium.Map(location=[41.5932, -87.4071],zoom_start=5)
m_race.add_child(add_layer(midwest_gdf_merged, 'Race', selected_race, 'race_color', show=True))
folium.LayerControl().add_to(m_race)
m_race.save('midwest_race_map.html')

In [None]:
# MIDWEST OCCUPANCY
m_tenure = folium.Map(location=[41.5932, -87.4071],zoom_start=5)
m_tenure.add_child(add_layer(midwest_gdf_merged, 'Housing Tenure', selected_housing_tenure, 'tenure_color', show=True))
folium.LayerControl().add_to(m_tenure)
m_tenure.save('midwest_housing_tenure_map.html')

In [None]:
# SOUTH ATLANTIC OCCUPANCY
m_occupancy = folium.Map(location=[33.7490, -84.3880], zoom_start=6) 
m_occupancy.add_child(add_layer(south_atlantic_gdf_merged, 'Housing Occupancy', selected_housing_occupancy, 'occupancy_color', show=True))
folium.LayerControl().add_to(m_occupancy)
m_occupancy.save('south_atlantic_housing_occupancy_map.html')

In [None]:
# SOUTH ATLANTIC AGE
m_age = folium.Map(location=[33.7490, -84.3880],zoom_start=5)
m_age.add_child(add_layer(south_atlantic_gdf_merged, 'Age Group', selected_sex_age, 'age_color', show=True))
folium.LayerControl().add_to(m_age)
m_age.save('south_atlantic_age_map.html')

In [None]:
# SOUTH ATLANTIC RACE
m_race = folium.Map(location=[33.7490, -84.3880],zoom_start=5)
m_race.add_child(add_layer(south_atlantic_gdf_merged, 'Race', selected_race, 'race_color', show=True))
folium.LayerControl().add_to(m_race)
m_race.save('south_atlantic_race_map.html')

In [None]:
# SOUTH ATLANTIC HOUSING TENURE
m_tenure = folium.Map(location=[33.7490, -84.3880],zoom_start=5)
m_tenure.add_child(add_layer(south_atlantic_gdf_merged, 'Housing Tenure', selected_housing_tenure, 'tenure_color', show=True))
folium.LayerControl().add_to(m_tenure)
m_tenure.save('south_atlantic_housing_tenure_map.html')

In [None]:
# SOUTH CENTRAL OCCUPANCY
m_occupancy = folium.Map(location=[31.9686, -99.9018], zoom_start=5) 
m_occupancy.add_child(add_layer(south_central_gdf_merged, 'Housing Occupancy', selected_housing_occupancy, 'occupancy_color', show=True))
folium.LayerControl().add_to(m_occupancy)
m_occupancy.save('south_central_housing_occupancy_map.html')

In [None]:
# SOUTH CENTRAL AGE
m_age = folium.Map(location=[31.9686, -99.9018],zoom_start=5)
m_age.add_child(add_layer(south_central_gdf_merged, 'Age Group', selected_sex_age, 'age_color', show=True))
folium.LayerControl().add_to(m_age)
m_age.save('south_central_age_map.html')

In [None]:
# SOUTH CENTRAL RACE
m_race = folium.Map(location=[31.9686, -99.9018],zoom_start=5)
m_race.add_child(add_layer(south_central_gdf_merged, 'Race', selected_race, 'race_color', show=True))
folium.LayerControl().add_to(m_race)
m_race.save('south_central_race_map.html')

In [None]:
# SOUTH CENTRAL HOUSING TENURE
m_tenure = folium.Map(location=[31.9686, -99.9018],zoom_start=5)
m_tenure.add_child(add_layer(south_central_gdf_merged, 'Housing Tenure', selected_housing_tenure, 'tenure_color', show=True))
folium.LayerControl().add_to(m_tenure)
m_tenure.save('south_central_housing_tenure_map.html')

In [13]:
# WEST OCCUPANCY
m_occupancy = folium.Map(location=[37.7749, -122.4194], zoom_start=6)
m_occupancy.add_child(add_layer(west_gdf_merged, 'Housing Occupancy', selected_housing_occupancy, 'occupancy_color', show=True))
folium.LayerControl().add_to(m_occupancy)
m_occupancy.save('west_housing_occupancy_map.html')

In [14]:
# WEST AGE
m_age = folium.Map(location=[37.7749, -122.4194],zoom_start=5)
m_age.add_child(add_layer(west_gdf_merged, 'Age Group', selected_sex_age, 'age_color', show=True))
folium.LayerControl().add_to(m_age)
m_age.save('west_age_map.html')

In [16]:
# WEST RACE
m_race = folium.Map(location=[37.7749, -122.41948],zoom_start=5)
m_race.add_child(add_layer(west_gdf_merged, 'Race', selected_race, 'race_color', show=True))
folium.LayerControl().add_to(m_race)
m_race.save('west_race_map.html')

In [17]:
# WEST HOUSING TENURE
m_tenure = folium.Map(location=[37.7749, -122.41948],zoom_start=5)
m_tenure.add_child(add_layer(west_gdf_merged, 'Housing Tenure', selected_housing_tenure, 'tenure_color', show=True))
folium.LayerControl().add_to(m_tenure)
m_tenure.save('west_housing_tenure_map.html')