####                                      Challenge 4: Developing Post-Disaster Housing Response
####                                                             Dashboard

Nur Afsa Syeda

## Enviornment Setup

In [1]:
#load libraries
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import contextily as ctx
import statsmodels.api as sm
import seaborn as sns
import folium
from folium import Choropleth
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import dash
from dash import dcc, html, Input, Output
import webbrowser

In [2]:
# File paths
file_path = './data_FL.csv'                                  # data file
shapefile_path = './tl_rd22_12_tract/tl_rd22_12_tract.shp'   # Shapefile
county_name_path = './st12_fl_cou2020.txt'                   # county name file

def read_dataframe(filename, delimiter='|'):
    # Check the file extension to handle different file types
    if filename.endswith('.csv'):
        df = pd.read_csv(filename)
    elif filename.endswith('.txt'):
        df = pd.read_csv(filename, delimiter=delimiter)
    else:
        raise ValueError("Unsupported file format. Only CSV and TXT files are supported.")
    return df

def read_shapefile(shapefile_path):
    # Read shapefile
    gdf = gpd.read_file(shapefile_path)
    return gdf

In [3]:
# Read the data file
data = read_dataframe(file_path)

In [4]:
# Read the county name file
county_name_data =  read_dataframe(county_name_path, delimiter='|') 

In [5]:
# Read the shapefile data
shapefile_gdf = read_shapefile(shapefile_path)

In [6]:
# Convert the columns in DataFrames to strings
county_name_data['STATEFP'] = county_name_data['STATEFP'].astype(str)
county_name_data['COUNTYFP'] = county_name_data['COUNTYFP'].astype(str)

data['state_fips_code'] = data['state_fips_code'].astype(str)
data['county_fips_code'] = data['county_fips_code'].astype(str)

# Merge the county names into the census tract data
data = data.merge(county_name_data, how='left', 
                  left_on=['state_fips_code', 'county_fips_code'], right_on=['STATEFP', 'COUNTYFP'])

## Data wrangling

In [7]:
#Check for missing values in each column
missing_values = [-666666666, -222222222]

In [8]:
# Create a new DataFrame by replaceing all occurrences of the missing values with NaN 
data_clean = data.replace(missing_values, np.nan)

In [9]:
# One-hot encoding 
for column in ['economic_distress_pop_agg', 'economic_distress_simple_agg', 'investment_areas']:
    data_clean[column] = data_clean[column].map({'YES': 1, 'NO': 0})

In [10]:
# Rename key columns and few others for better readability
# FEMA (2014-2021) - Expected building loss rate (Natural Hazards Risk Index)
# FEMA (2014-2021) - Expected population loss rate (Natural Hazards Risk Index)
# DOE (2018) - Energy burden (percentile)

data_clean = data_clean.rename(columns={
    'expected_building_loss_rate_natural_hazards_risk_index_percentile': 'Building Loss Rate Percentile',
    'expected_population_loss_rate_natural_hazards_risk_index_percentile': 'Population Loss Rate Percentile',
    'energy_burden_percentile': 'Energy Burden Percentile',
    's0101_c04_022e': 'Population under 18 years (male)',
    's0101_c04_026e': 'Population over 18 years (male)',
    's0101_c04_028e': 'Population over 60 years (male)',
    's0101_c06_022e': 'Population under 18 years (female)',
    's0101_c06_026e': 'Population over 18 years (female)',
    's0101_c06_028e': 'Population over 60 years (female)',
    's1701_c03_011e': 'Percentage below poverty (male)',
    's1701_c03_012e': 'Percentage below poverty (female)',
    's2503_c01_024e': 'Housing cost (total)',
    's2503_c03_024e': 'Housing cost for owners',
    's2503_c05_024e': 'Housing cost for renters'    
})

## Understanding vulnerability to disasters

In [11]:
# Select the relevant columns for clustering
clustering_features = ['Building Loss Rate Percentile', 'Population Loss Rate Percentile', 'Energy Burden Percentile']

# Extract these features from the dataset
risk_data_clustering = data_clean[clustering_features].copy()

# Replace infinite values with NaN
risk_data_clustering.replace([np.inf, -np.inf], np.nan, inplace=True)

# Drop rows with NaN values
risk_data_clustering.dropna(inplace=True)

In [12]:
# Normalize the data
scaler = StandardScaler()
risk_data_normalized = scaler.fit_transform(risk_data_clustering)

# Apply K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
risk_data_clustering['cluster'] = kmeans.fit_predict(risk_data_normalized)

# Determine the highest-risk cluster based on cluster centroids
cluster_centers = kmeans.cluster_centers_
high_risk_cluster = np.argmax(cluster_centers.mean(axis=1))  # Identifies the cluster with the highest mean centroid

# Calculate distances to the centroid of the high-risk cluster
high_risk_cluster_points = risk_data_normalized[risk_data_clustering['cluster'] == high_risk_cluster]
centroid = cluster_centers[high_risk_cluster]

# Calculate Euclidean distance to the centroid for each point in the high-risk cluster
distances_to_centroid = np.linalg.norm(high_risk_cluster_points - centroid, axis=1)

# Add distances to the DataFrame
risk_data_clustering.loc[risk_data_clustering['cluster'] == high_risk_cluster, 'distance_to_centroid'] = distances_to_centroid

# Select the top 5 areas based on their distance to the centroid
top_5_high_risk_areas = risk_data_clustering[risk_data_clustering['cluster'] == high_risk_cluster]
top_5_high_risk_areas = top_5_high_risk_areas.sort_values(by='distance_to_centroid').head(5)

In [13]:
# Add 'geoid','county', and 'county name' columns for display purposes
top_5_high_risk_areas_display = data_clean.loc[top_5_high_risk_areas.index, ['geoid', 'county', 'COUNTYNAME'] + clustering_features]
top_5_high_risk_areas_display['distance_to_centroid'] = top_5_high_risk_areas['distance_to_centroid']

In [14]:
# Get unique counties from the top 5 high-risk areas
unique_counties = top_5_high_risk_areas_display['COUNTYNAME'].unique()

# Filter the clean dataset to get all census tracts in the identified counties
all_census_tracts_in_counties = data_clean[data_clean['COUNTYNAME'].isin(unique_counties)].copy()

In [15]:
# Merge the shapefile
shapefile_gdf['GEOID'] = shapefile_gdf['GEOID'].astype(str)
all_census_tracts_in_counties['geoid'] = all_census_tracts_in_counties['geoid'].astype(str)
merged_gdf = shapefile_gdf.merge(all_census_tracts_in_counties, left_on='GEOID', right_on='geoid', how='inner')

## Dashboard

In [16]:
# Sort the unique counties alphabetically
unique_counties_sorted = sorted(unique_counties, key=lambda x: x.strip().lower())

# Initialize Dash app
app = dash.Dash(__name__)

# Define layout of the dashboard
app.layout = html.Div([
    html.H1('Disaster Risk and Demographics Dashboard', style={'textAlign': 'center', 'margin-bottom': '20px'}),
    
    html.Div([
        # Map title
        html.H3(id='map-title', style={'margin-right': '50px'}),
       
        # Dropdown to select a county
        html.Div([
            html.Label('Select County:', style={'margin-right': '10px', 'font-weight': 'bold'}),
            dcc.Dropdown(
                id='county-dropdown',
                options=[{'label': county, 'value': county} for county in unique_counties_sorted],
                value=unique_counties_sorted[0],
                style={'width': '200px'}
            )
            ], style={'display': 'flex', 'align-items': 'center'})
        ], style={'display': 'flex', 'justify-content': 'center', 'align-items': 'center', 'margin-top': '20px', 'margin-bottom': '20px'}),
    
    # Map of disaster-prone areas
    # Two maps side by side
    html.Div([
        # Building Loss Percentile Map
        html.Div([
            dcc.Graph(id='building-loss-map')
        ], style={'width': '48%', 'display': 'inline-block'}),
        
        # Population Loss Percentile Map
        html.Div([
            dcc.Graph(id='population-loss-map')
        ], style={'width': '48%', 'display': 'inline-block'})
    ], style={'display': 'flex', 'justify-content': 'space-between', 'margin-bottom': '20px', 'margin-top': '20px'}),

    # Plots to show demographic data based on selected county
    # Two-column layout for demographic plots
    html.Div([
    # First column
        html.Div([
            dcc.Graph(id='age-male-plot'),
            dcc.Graph(id='age-female-plot'),
            dcc.Graph(id='housing-costs-plot')
        ], style={'width': '48%', 'display': 'inline-block', 'vertical-align': 'top'}),
    
    # Second column
        html.Div([
            dcc.Graph(id='poverty-levels-plot'),
            dcc.Graph(id='poverty-levels-map-male'),
            dcc.Graph(id='poverty-levels-map-female')           
        ], style={'width': '48%', 'display': 'inline-block', 'vertical-align': 'top'})
    ], style={'display': 'flex', 'justify-content': 'space-between', 'padding': '10px'})

])

# Callback to update the map title based on selected county
@app.callback(
    Output('map-title', 'children'),
    [Input('county-dropdown', 'value')]
)

def update_map_title(selected_county):
    return f'High-Risk Areas: {selected_county}'

# Callback to update the map based on the top 5 high-risk areas
@app.callback(
    [Output('building-loss-map', 'figure'),
     Output('population-loss-map', 'figure')],
    [Input('county-dropdown', 'value')]
)

def update_map(selected_county):
    # Filter GeoDataFrame for the selected county
    filtered_gdf = merged_gdf[merged_gdf['COUNTYNAME'] == selected_county]

    # Building Loss Percentile Map
    building_loss_fig = px.choropleth(
        filtered_gdf,
        geojson=filtered_gdf.geometry,
        locations=filtered_gdf.index,
        color='Building Loss Rate Percentile',
        color_continuous_scale='YlOrRd',
        hover_name='COUNTYNAME',
        projection='mercator'
    )
    building_loss_fig.update_geos(fitbounds="locations", visible=False)
    building_loss_fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
    
    # Population Loss Percentile Map
    population_loss_fig = px.choropleth(
        filtered_gdf,
        geojson=filtered_gdf.geometry,
        locations=filtered_gdf.index,
        color='Population Loss Rate Percentile',
        color_continuous_scale='Blues',
        hover_name='COUNTYNAME',
        projection='mercator'
    )
    population_loss_fig.update_geos(fitbounds="locations", visible=False)
    population_loss_fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})

    return building_loss_fig, population_loss_fig


# Callback to update demographic plots based on selected county
@app.callback(
    [Output('age-male-plot', 'figure'),
     Output('age-female-plot', 'figure'),
     Output('poverty-levels-plot', 'figure'),
     Output('poverty-levels-map-male', 'figure'),
     Output('poverty-levels-map-female', 'figure'),
     Output('housing-costs-plot', 'figure')],
    [Input('county-dropdown', 'value')]
)

def update_demographic_plots(selected_county):
    county_data = merged_gdf[merged_gdf['COUNTYNAME'] == selected_county]
    
    # Plot 1: Age Demographics (Male)
    age_categories_male = ['Population under 18 years (male)', 
                           'Population over 18 years (male)', 'Population over 60 years (male)']
    age_values_male = [county_data[category].sum() for category in age_categories_male]
    fig_age_male = px.pie(
        names=age_categories_male,
        values=age_values_male,
        title=f'Age Demographics (Male)',
        labels={'names': 'Age Group', 'values': 'Population Percentage (%)'}
    )
    
    # Plot 2: Age Demographics (Female)
    age_categories_female = ['Population under 18 years (female)', 'Population over 18 years (female)', 
                             'Population over 60 years (female)']
    age_values_female = [county_data[category].sum() for category in age_categories_female]
    fig_age_female = px.pie(
        names=age_categories_female,
        values=age_values_female,
        title=f'Age Demographics (Female)',
        labels={'names': 'Age Group', 'values': 'Population Percentage (%)'}
    )

    # Plot 3: Poverty Levels by Sex
    fig_poverty_levels = px.bar(
        county_data,
        x='geoid',
        y=['Percentage below poverty (male)', 'Percentage below poverty (female)'],
        labels={'value': 'Poverty Percentage (%)', 'variable': 'Sex'},
        title=f'Poverty Levels by Sex'
    )
    
    # Plot 4: Poverty Levels Map (Male)
    fig_poverty_map_male = px.choropleth(
        county_data,
        geojson=county_data.geometry.__geo_interface__,
        locations=county_data.index,
        color='Percentage below poverty (male)',
        hover_name='geoid',
        labels={'Percentage below poverty (male)': 'Poverty (%) - Male'},
        title=f'Poverty Levels (Male)'
    )
    fig_poverty_map_male.update_geos(fitbounds="locations", visible=False)
    
    # Plot 5: Poverty Levels Map (Female)
    fig_poverty_map_female = px.choropleth(
        county_data,
        geojson=county_data.geometry.__geo_interface__,
        locations=county_data.index,
        color='Percentage below poverty (female)',
        hover_name='geoid',
        labels={'Percentage below poverty (female)': 'Poverty (%) - Female'},
        title=f'Poverty Levels (Female)'
    )
    fig_poverty_map_female.update_geos(fitbounds="locations", visible=False)
    
    # Plot 6: Housing Costs
    fig_housing_costs = px.bar(
        county_data,
        x='geoid',
        y=['Housing cost for owners', 'Housing cost for renters'],
        labels={'value': 'Housing Cost ($)', 'variable': 'Housing Type'},
        title=f'Housing Costs'
    )

    return fig_age_male, fig_age_female, fig_poverty_levels, fig_poverty_map_male, fig_poverty_map_female, fig_housing_costs

In [17]:
# Define a port
port = 8051

# Open the browser
webbrowser.open(f'http://127.0.0.1:{port}')

# Run the app
if __name__ == '__main__':
    app.run_server(port=port, debug=True)