Data Visualization and Exploration
CPSC 5530
CRN 21428
Hunter Harris: zgt795
Project: Chattanooga Fire Stations and Incident Analysis

Import Libraries

In [90]:
import numpy as np
import pandas as pd
import folium
from folium.plugins import HeatMapWithTime
import plotly.express as px

Load Data

In [91]:
fire_stations_df = pd.read_csv("Fire_Station_data.csv")
fire_incidents_df = pd.read_csv("Fire_Incidents_Detailed_Updated.csv")
property_df = pd.read_csv("Hamilton_County_Property_Appraisal_Information.csv")


Columns (2,18) have mixed types.Specify dtype option on import or set low_memory=False.


Columns (3,5,8,17,71) have mixed types.Specify dtype option on import or set low_memory=False.



Set DataFrame Options

In [92]:
pd.set_option('display.max_columns', None)

Inspect Data Sets

In [93]:
# Print DataFrame information
print(fire_incidents_df.info())
print(fire_incidents_df['Specific Incident Code and Description'].value_counts().head(50))
print(fire_stations_df.info())
print(property_df.info())

# Print DataFrames
print(fire_incidents_df.head(5))
print(fire_stations_df.head(5))
print(property_df.head(5))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 661178 entries, 0 to 661177
Data columns (total 31 columns):
 #   Column                                           Non-Null Count   Dtype  
---  ------                                           --------------   -----  
 0   Incident Number                                  661178 non-null  object 
 1   Incident Date                                    661178 non-null  object 
 2   Shift Color                                      223014 non-null  object 
 3   Specific Incident Code and Description           655269 non-null  object 
 4   Generic Incident Code                            655269 non-null  float64
 5   Generic Incident Description                     655269 non-null  object 
 6   Incident Category                                651483 non-null  object 
 7   StreetNumber                                     661178 non-null  int64  
 8   StreetPrefix                                     120149 non-null  object 
 9   StreetName     

Clean Fire Station DataFrame

In [94]:
# Drop unneeded columns
fire_stations_df.drop(fire_stations_df.columns[[1, 2, 3, 9, 10]], axis=1, inplace=True)

# Extract station number and rename column
fire_stations_df['NAME'] = pd.to_numeric(fire_stations_df['NAME'].str.extract('(\d+)', expand=False))
fire_stations_df.rename(columns={'NAME': 'Station'}, inplace=True)

# Add missing phone numbers
fire_stations_df['PHONE_NO'].replace(np.NAN, "423-643-5600", inplace=True)

# Separate GPS coordinates into separate columns
fire_stations_df['the_geom'] = fire_stations_df['the_geom'].str.replace('POINT \(', ' ')
fire_stations_df['the_geom'] = fire_stations_df['the_geom'].str.replace(')', ' ')
fire_stations_df['the_geom'] = fire_stations_df['the_geom'].str.strip()
new = fire_stations_df["the_geom"].str.split(" ", n = 1, expand = True)
fire_stations_df["Lng"]= new[0]
fire_stations_df["Lat"]= new[1]

# Drop original GPS column
fire_stations_df.drop(fire_stations_df.columns[[0]], axis=1, inplace=True)


The default value of regex will change from True to False in a future version.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.



Clean Fire Incident DataFrame

In [95]:
years = [2019, 2020, 2021]
# Drop Duplicates
fire_incidents_df.drop_duplicates(subset=['Incident Number'],
                       keep='first',
                       inplace=True)
acc_df = fire_incidents_df.reset_index(drop=True)

# Convert incident date to datatime object and create year column
fire_incidents_df['Incident Date'] = pd.to_datetime(fire_incidents_df['Incident Date'])
fire_incidents_df['Year'] = fire_incidents_df['Incident Date'].dt.year
fire_incidents_df['Month'] = fire_incidents_df['Incident Date'].dt.month

# Keep only specified years
fire_incidents_df = fire_incidents_df[fire_incidents_df['Year'].isin(years)]

# Extract station number and rename column
fire_incidents_df['Responding Unit District Station'] = pd.to_numeric(fire_incidents_df['Responding Unit District Station'].str.extract('(\d+)', expand=False))
fire_incidents_df.rename(columns={'Responding Unit District Station': 'Station'}, inplace=True)
fire_incidents_df.dropna(how='all', subset=['Station'], inplace=True)
fire_incidents_df['Station'] = fire_incidents_df['Station'].astype(int)

# Keep only records of known Fire Stations from fire_stations_df
fire_incidents_df = fire_incidents_df[fire_incidents_df['Station'].isin(list(fire_stations_df['Station']))]

# Add missing Data
col = ['Incident Category', 'Civilian Casualty', 'Firefighter Casualty', 'Property Loss', 'Property Value']
fire_incidents_df[col] = fire_incidents_df[col].replace(np.NAN, 0)

# Drop unneeded columns
fire_incidents_df.drop(fire_incidents_df.columns[[0, 2, 4, 11, 16, 18]], axis=1, inplace=True)

# Drop null values
fire_incidents_df = fire_incidents_df.dropna(subset=['Latitude','Longitude','Location'])
fire_incidents_df = fire_incidents_df.reset_index(drop=True)

Property Value and Fire Incident Combination

In [96]:
# Add Address column of combined information
fire_incidents_df['Address'] = fire_incidents_df['StreetNumber'].astype(str) + " " + fire_incidents_df['StreetPrefix'].astype(str) + " " + fire_incidents_df['StreetName'].astype(str) + " " + fire_incidents_df['StreetType'].astype(str)
fire_incidents_df['Address'].replace(" nan ", " ", inplace=True, regex=True)

# Drop duplicate addresses
property_df.drop_duplicates(subset=['PropStNum', 'PropDirPfx','PropStName','PropSfx'], keep='first', inplace=True)

# Add Address column of combined information
property_df['Address'] = property_df['PropStNum'].astype(str).replace('\.0', '', regex=True) + " " + property_df['PropDirPfx'].astype(str) + " " + property_df['PropStName'].astype(str) + " " +  + property_df['PropSfx'].astype(str)
property_df['Address'].replace(" nan ", " ", inplace=True, regex=True)
property_df['Address'].replace("nan ", "", inplace=True, regex=True)
property_df['Address'].replace(" nan", "", inplace=True, regex=True)

# Count values of Fire incidents with corresponding property information
new = fire_incidents_df['Address'].isin(property_df['Address'].tolist())
print(new.value_counts())

# Merge DataFrames on Address
pv_fi_df = pd.merge(fire_incidents_df, property_df, on=['Address'])

# Drop null values
pv_fi_df = pv_fi_df.dropna(subset=['Generic Incident Description', 'CalcAcres', 'LandValue','BuildingValue', 'AppraisedValue'])
pv_fi_df = pv_fi_df.reset_index(drop=True)

# Print descriptive statistics
print(pv_fi_df['AppraisedValue'].describe().describe().apply(lambda x: format(x, 'f')))

# Bin Property Appraisal values
bins = [6199, 80520, 1335400, 7225905.721413, 664995721]
labels = ['Low', 'Medium', 'Medium High', 'High']
pv_fi_df['AppraisedBucket'] = pd.cut(pv_fi_df['AppraisedValue'], bins, labels=labels)
pv_fi_df['Count'] = 1

True     40520
False    14798
Name: Address, dtype: int64
count            8.000000
mean      86361751.286378
std      233902752.167269
min           6200.000000
25%          80478.000000
50%        1335300.000000
75%        7237113.161304
max      664995720.000000
Name: AppraisedValue, dtype: object


In [97]:
lat_long_list = []
# Create list for mapping coordinates
for x in fire_incidents_df['Year'].sort_values().unique():
    lat_long_list.append(fire_incidents_df.loc[fire_incidents_df['Year'] == x,['Latitude','Longitude']].groupby(['Latitude','Longitude']).sum().reset_index().values.tolist())

# Build Base Map
base_map = folium.Map(location=[35.043631, -85.309677], control_scale=True, zoom_start=11)

# Add labels for Fire Stations
for station in range(0, len(fire_stations_df)):
    html=f"""
        <h1> Station # {fire_stations_df.iloc[station]['Station']}</h1>
        <ul>
            <li>Address: {fire_stations_df.iloc[station]['ADDRESS']}</li>
            <li>City: {fire_stations_df.iloc[station]['CITY']}</li>
            <li>State: {fire_stations_df.iloc[station]['STATE']}</li>
            <li>Zip: {fire_stations_df.iloc[station]['ZIP_CODE']}</li>
            <li>Phone #: {fire_stations_df.iloc[station]['PHONE_NO']}</li>
        </ul>
        """
    iframe = folium.IFrame(html=html, width=250, height=215)
    popup = folium.Popup(iframe, max_width=2650)
    folium.Marker(location=[fire_stations_df.iloc[station]['Lat'], fire_stations_df.iloc[station]['Lng']],
                  popup=popup,
                  icon=folium.DivIcon(html=f"""
                  <img src="https://i.imgur.com/zhm3d2b.png" width="40" height="40"/>
""")).add_to(base_map)

# Save Map for fire stations only
base_map.save(outfile= "Fire_Department_Only.html")

# Add Heatmap to basemap
HeatMapWithTime(lat_long_list, radius=10,index=years, auto_play=True, max_opacity=0.3).add_to(base_map)

# Save Map for heatmap and fire station
base_map.save(outfile= "Fire_Department_Heatmap_2019_2021.html")

# Show map
base_map

Incident Counts Over Time Visualization

In [98]:
# Group by year and station
fire_incidents_year_df = fire_incidents_df.groupby(['Year', 'Station']).size()
fire_incidents_year_df = fire_incidents_year_df.reset_index()
fire_incidents_year_df.rename(columns={0: 'Incidents'}, inplace=True)

# convert dates to datetime object in year format
fire_incidents_year_df['Year'] = pd.to_datetime(fire_incidents_year_df['Year'], format="%Y")

# Plot data
fig = px.line(fire_incidents_year_df, x="Year", y="Incidents", color="Station", height=400, title="Incident Calls From 2019-2021")
fig.update_layout(xaxis = dict(tickmode = 'array', tickvals = years, ticktext = years))

# Save and show figure
fig.write_html("Incident_Count_Time.html")
fig.show()

Incident Types Over Time

In [99]:
# Group by Year and Generic Incident Description
fire_incidents_type_year_df = fire_incidents_df.groupby(['Year', 'Generic Incident Description']).size()
fire_incidents_type_year_df = fire_incidents_type_year_df.reset_index()
fire_incidents_type_year_df.rename(columns={0: 'Count'}, inplace=True)

# Plot data
fig = px.bar(fire_incidents_type_year_df, x='Generic Incident Description', y= 'Count', color='Generic Incident Description', animation_frame="Year", animation_group="Generic Incident Description", height=500, title="Incident Types From 2019-2021")

# Update plot layout
fig.update_layout(margin=dict(l=20, r=20, t=35, b=200))
fig['layout']['updatemenus'][0]['pad']=dict(r= 10, t= 150)
fig['layout']['sliders'][0]['pad']=dict(r= 10, t= 175,)

# Save and show figure
fig.write_html("Incident_Type_Time.html")
fig.show()

another viz

In [100]:
# Rename descriptions to shorter versions
pv_fi_df['Generic Incident Description'].replace("Rescue & Emergency Medical Service (EMS)", "Rescue & EMS", inplace=True)
pv_fi_df['Generic Incident Description'].replace("False Alarm & False Call", "False Alarm/Call", inplace=True)
pv_fi_df['Generic Incident Description'].replace("Hazardous Condition (no fire)", "Hazardous Condition", inplace=True)
pv_fi_df['Generic Incident Description'].replace("Overpressure Rupture, Explosion, Overheat (no fire)", "Rupture, Explosion, Overheat (no fire)", inplace=True)
pv_fi_df['Generic Incident Description'].replace("Severe Weather & Natural Disaster", "Severe Weather/Natural Disaster", inplace=True)

# Plot data
fig = px.sunburst(pv_fi_df, values='Count', path=['Station', 'AppraisedBucket'], hover_name="Station", height=700, title="Incidents by Property Value and Station")
fig.update_layout(margin=dict(t=35, l=0, r=0, b=0)).update_traces(texttemplate="%{label}<br>(%{percentEntry:.2%})")

# Save and show figure
fig.write_html("Property_Value_Station.html")
fig.show()

# Group by for visualization
pv_fi_df_grouped = pv_fi_df.groupby(['AppraisedBucket', 'Generic Incident Description']).size()
pv_fi_df_grouped = pv_fi_df_grouped.reset_index()
pv_fi_df_grouped.rename(columns={0: 'Count'}, inplace=True)

# plot data
fig = px.bar(pv_fi_df_grouped, x='AppraisedBucket', y= 'Count', color='Generic Incident Description', height=500, title="Incident Type by Appraisal")
fig.update_layout(xaxis_title="Appraisal Category")

# Save and show figure
fig.write_html("Property_Value_Incident.html")
fig.show()