Data Visualization and Exploration
CPSC 5530
CRN 21428
Hunter Harris: zgt795
Assignment 3: Geospatial Visualization 2

Import Libraries

In [412]:
import pandas as pd
import numpy as np
from bokeh.io import show
from bokeh.models import LogColorMapper, ColumnDataSource, LogTicker, ColorBar
from bokeh.palettes import Reds256 as palette
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.sampledata.us_counties import data as counties

palette = tuple(reversed(palette))

Read CSV Data

In [413]:
fire_df = pd.read_csv("California_Fire_Incidents.csv")

Inspect Data

In [414]:
print(fire_df.info())
print(fire_df)
# Check the counties
print(fire_df['Counties'].value_counts())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1636 entries, 0 to 1635
Data columns (total 40 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   AcresBurned           1633 non-null   float64
 1   Active                1636 non-null   bool   
 2   AdminUnit             1636 non-null   object 
 3   AirTankers            28 non-null     float64
 4   ArchiveYear           1636 non-null   int64  
 5   CalFireIncident       1636 non-null   bool   
 6   CanonicalUrl          1636 non-null   object 
 7   ConditionStatement    284 non-null    object 
 8   ControlStatement      111 non-null    object 
 9   Counties              1636 non-null   object 
 10  CountyIds             1636 non-null   object 
 11  CrewsInvolved         171 non-null    float64
 12  Dozers                123 non-null    float64
 13  Engines               191 non-null    float64
 14  Extinguished          1577 non-null   object 
 15  Fatalities           

Clean Data

In [415]:
# Remove all columns except acres burned and counties
fire_df = fire_df[['AcresBurned','Counties']]

# Remove null values from dataset. Replace null with 0
fire_df.fillna(0, inplace=True)

# Group by county and sum the acres burned
fire_df = fire_df.groupby(['Counties'])['AcresBurned'].sum().reset_index()

# Rename counties column
fire_df.rename(columns={'Counties': 'county'}, inplace=True)

# Combine county location data with fire data
counties = {code: county for code, county in counties.items() if county["state"] == "ca"}
county_xs = [np.asarray(county["lons"]) for county in counties.values()]
county_ys = [np.asarray(county["lats"]) for county in counties.values()]
county_names = [county['name'] for county in counties.values()]
map_data = pd.DataFrame(dict(x=county_xs, y=county_ys, county=county_names))

# Merge county location data with acres burned by county
fire_df = pd.merge(fire_df, map_data, on='county', how='right')

# Remove null values from dataset. Replace null with 0
fire_df.fillna(0, inplace=True)

print(fire_df)

             county  AcresBurned  \
0           Alameda         6387   
1            Alpine            0   
2            Amador         6285   
3             Butte       190702   
4         Calaveras         2648   
5            Colusa       459316   
6      Contra Costa         6884   
7         Del Norte        38407   
8         El Dorado       114411   
9            Fresno       214411   
10            Glenn       413201   
11         Humboldt        22925   
12         Imperial            0   
13             Inyo        31884   
14             Kern       119464   
15            Kings        54377   
16             Lake       582784   
17           Lassen       151797   
18      Los Angeles       194580   
19           Madera        44505   
20            Marin          349   
21         Mariposa       217852   
22        Mendocino       512712   
23           Merced        13641   
24            Modoc       179192   
25             Mono        35885   
26         Monterey       15

In [416]:
# Convert DataFrames as Column Data Source
source = ColumnDataSource(data=fire_df)
color_mapper = LogColorMapper(palette=palette, low=1)

# Define tools for figure
TOOLS = "pan,wheel_zoom,reset,hover,save"

# Figure and Tooltip Details
p = figure(plot_width=450,
           plot_height=500,
           title="California Wildfires from 2013-2020", tools=TOOLS,
           x_axis_location=None,
           y_axis_location=None,
           tooltips=[
               ("County", "@county"), ("Acres Burned", "@AcresBurned"), ("(Long, Lat)", "($x, $y)")
           ])

# Map Details
p.patches('x', 'y',
          source=source,
          fill_color={'field': 'AcresBurned', 'transform': color_mapper},
          fill_alpha=0.8,
          line_color="white",
          line_width=0.7)
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"

# Add Color Bar relating number of cases to a color
color_bar = ColorBar(color_mapper=color_mapper,
                    ticker= LogTicker(),
                    label_standoff = 15, width= 10,
                    border_line_color=None,
                    title = 'Number of Acres Burned',
                    padding=30,
                    major_label_text_font_size='10px',
                    )
p.add_layout(color_bar, 'right')

# Display visualization
show(column(p,))