---
Total Wildfires in the USA from 1992 to 2015
--
1. There has been a boom-and-bust cycle in the total number of wildfires each year. 
2. However, there has been an increase in the totals for certain states, particularly in the mid-south to south-east.  
3. Most notably, Georgia, Texas, and Mississpippi had the most wildfires during this time.  

---
Sources: 
---
1. https://www.kaggle.com/rtatman/188-million-us-wildfires/kernels 
2. https://plot.ly/python/choropleth-maps/#united-states-choropleth-map
3. https://ezgif.com/apng-maker

In [0]:
# load libraries
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import linregress
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from scipy.stats import sem

import sqlite3
import gmaps
from config import (gkey)

In [0]:
# conn = sqlite3.connect('./FPA_FOD_20170508.sqlite')

# c = conn.cursor()

# data = c.execute('Select * from Fires;')
# master_df = pd.DataFrame(data.fetchall())
# master_df.head()



In [0]:
# master_header_df = master_df.rename(columns={
#     0:"OBJECTID", 
#     1:"FOD_ID", 
#     2:"FPA_ID", 
#     3:"SOURCE_SYSTEM_TYPE", 
#     4:"SOURCE_SYSTEM", 
#     5:"NWCG_REPORTING_AGENCY", 
#     6:"NWCG_REPORTING_UNIT_ID",  
#     7:"NWCG_REPORTING_UNIT_NAME",  
#     8:"SOURCE_REPORTING_UNIT", 
#     9:"SOURCE_REPORTING_UNIT_NAME", 
#     10:"LOCAL_FIRE_REPORT_ID", 
#     11:"LOCAL_INCIDENT_ID",  
#     12:"FIRE_CODE",  
#     13:"FIRE_NAME", 
#     14:"ICS_209_INCIDENT_NUMBER", 
#     15:"ICS_209_NAME", 
#     16:"MTBS_ID",  
#     17:"MTBS_FIRE_NAME",  
#     18:"COMPLEX_NAME", 
#     19:"FIRE_YEAR",  
#     20:"DISCOVERY_DATE", 
#     21:"DISCOVERY_DOY",  
#     22:"DISCOVERY_TIME", 
#     23:"STAT_CAUSE_CODE", 
#     24:"STAT_CAUSE_DESCR",  
#     25:"CONT_DATE", 
#     26:"CONT_DOY",  
#     27:"CONT_TIME", 
#     28:"FIRE_SIZE", 
#     29:"FIRE_SIZE_CLASS", 
#     30:"LATITUDE", 
#     31:"LONGITUDE", 
#     32:"OWNER_DESCROWNER_CODE", 
#     33:"OWNER_DESCR", 
#     34:"STATE",  
#     35:"COUNTY",  
#     36:"FIPS_CODE", 
#     37:"FIPS_NAME", 
#     38:"Shape",
# })
# master_header_df.head()



In [0]:
# group_main_df = master_header_df[["FOD_ID", "FIRE_NAME", "FIRE_YEAR", "DISCOVERY_DATE", "DISCOVERY_DOY", "DISCOVERY_TIME", 
#                                "STAT_CAUSE_CODE", "STAT_CAUSE_DESCR", "FIRE_SIZE", "FIRE_SIZE_CLASS", 
#                                 "LATITUDE", "LONGITUDE", "STATE", "COUNTY"]]



In [0]:
# size_over_one = group_main_df.loc[group_main_df["FIRE_SIZE"]>1,:]



In [0]:
# Created Main database for group to start using.  THIS DATABASE INCLUDES AK, PR, HI
# ===============================================================
# size_over_one.to_csv("Fire_Data_Over_1.csv")



In [0]:
file = "../Fire_Data_Over_1.csv"
df = pd.read_csv(file)



In [0]:
us_df = df.loc[(df.loc[:,'STATE']!='AK') & (df.loc[:,'STATE']!='HI') & (df.loc[:,'STATE']!='PR')]



In [0]:
lat_max = us_df["LATITUDE"].max()
lat_min = us_df["LATITUDE"].min()
lat_interval = (lat_max - lat_min)/3

lon_max = us_df["LONGITUDE"].max()
lon_min = us_df["LONGITUDE"].min()
lon_interval = (lon_max - lon_min)/3


lat_bin = [lat_min, (lat_min + lat_interval), (lat_min + 2*lat_interval), lat_max]
lon_bin = [lon_min, (lon_min + lon_interval), (lon_min + 2*lon_interval), lon_max]

lon_name = ["West", "Central", "East"]
lat_name = ["South", "Central", "North"]



In [0]:
us_df["LON_REGION"] = pd.cut(us_df["LONGITUDE"], lon_bin, labels=lon_name)



In [0]:
us_df["LAT_REGION"] = pd.cut(us_df["LATITUDE"], lat_bin, labels=lat_name)
us_df.head()



In [0]:
lat_df = us_df.groupby("LAT_REGION")
lat_list_bar = lat_df.FOD_ID.count()

lat_list_bar.plot(kind="barh", facecolor="red", align="edge")
plt.title("Wildfires by Lattitude")
plt.xlabel("Number of Fires")
plt.ylabel("Latitude Region")
plt.grid()
plt.savefig("Wildfires_By_Latitude.png", bbox_inches="tight")
plt.show()



In [0]:
lon_df = us_df.groupby("LON_REGION")
lon_list_bar = lon_df.FOD_ID.count()
lon_list_bar.plot(kind="bar", facecolor="blue", align="edge")
plt.title("Wildfires by Longitude")
plt.xlabel("Number of Fires")
plt.ylabel("Longitude Region")
plt.grid()
plt.xticks(rotation=45)
plt.savefig("Wildfires_By_Longitude.png", bbox_inches="tight")
plt.show()



In [0]:
plt.scatter(us_df["LONGITUDE"], us_df["FIRE_SIZE"], marker="o", facecolors="red", edgecolors="black")
plt.title("Longitude Vs. Wildfire Size")
plt.xlabel("Longitude")
plt.ylabel("Fire Size (Acres)")
plt.grid()
plt.savefig("Longitude_Vs_Wildfire_Size.png", bbox_inches="tight")
plt.show()



In [0]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)
# Store 'Lat' and 'Lng' into  locations 
locations = us_df[["LATITUDE", "LONGITUDE"]].astype(float)

# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values
fire_size = us_df["FIRE_SIZE"].astype(float)



In [0]:
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=fire_size, 
                                 dissipating=False, max_intensity=558198,
                                 point_radius = 1)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
heat_layer.dissipating = False
heat_layer.max_intensity = 558198
heat_layer.point_radius = 1

fig.add_layer(heat_layer)

fig



In [0]:
larger = us_df.loc[us_df["FIRE_SIZE"]>10000,:]
larger
plt.scatter(larger["LONGITUDE"], larger["FIRE_SIZE"], marker="o", facecolors="red", edgecolors="black")
plt.title("Longitude Vs. Wildfires > 10,000 Acres")
plt.xlabel("Longitude")
plt.ylabel("Fire Size (Acres)")
plt.grid()
plt.savefig("Lng_vs_fire_gtrtenk.png", bbox_inches="tight")
plt.show()



In [0]:
lon_df = larger.groupby("LON_REGION")
lon_list_bar = lon_df.FOD_ID.count()
lon_list_bar.plot(kind="bar", facecolor="blue", align="edge")
plt.title("Wildfires > 10,000 Acres by Longitude")
plt.xlabel("Number of Fires")
plt.ylabel("Longitude Region")
plt.grid()
plt.xticks(rotation=45)
plt.savefig("Fire_grt_tenK_vs_lon.png", bbox_inches="tight", pad_inches=0.5)
plt.show()



In [0]:
lat_df = larger.groupby("LAT_REGION")
lat_list_bar = lat_df.FOD_ID.count()

lat_list_bar.plot(kind="barh", facecolor="red", align="edge")
plt.title("Wildfires > 10,000 Acres by Lattitude")
plt.xlabel("Number of Fires")
plt.ylabel("Latitude Region")
plt.grid()
plt.savefig("Fire_grt_tenK_vs_lat.png", bbox_inches="tight")
plt.show()



In [0]:
means = years["FIRE_SIZE"].mean()
standard_errors = years["FIRE_SIZE"].sem()


fig, ax = plt.subplots()
x_axis = size_year.index
ax.errorbar(x_axis, means, standard_errors, fmt="o", color="red")


plt.title("Wildfire Size By Year")
plt.xlabel("Year")
plt.ylabel("Average Size of Fire")
plt.grid()
plt.savefig("Wildfire_Size_By_Year.png", bbox_inches="tight")


plt.show()




In [None]:
# ======================================================================
# Victor's code starts here
# ======================================================================

In [None]:


# Load data
fp = "../Resources/Fire_Data_Over_1.csv"

map_df = pd.read_csv(fp)

map_df.head()

In [None]:
# Remove Puerto Rico since it won't show up on our map of the US for the choropleth map

new = map_df.loc[(map_df.loc[:,'STATE']!='PR')]
new.head()

In [None]:
# Sort by year ascending 

df = new.sort_values(["FIRE_YEAR", "STATE"])
df.dropna(how="any")
df = df.reset_index(drop=True)
df.head()

In [None]:
# Simplify to two columns and count number of wildfires in each year

group_df = df[['FIRE_YEAR','STATE']]
state_totals = list(df.groupby("STATE")["FIRE_YEAR"].count())
years = df.groupby("FIRE_YEAR")
year_totals = years["STATE"].count()

group_df.head()

In [None]:
# Make a unique data frame to pull unique states

unique_df = group_df.drop_duplicates().reset_index(drop=True)
unique_states = group_df.loc[group_df["FIRE_YEAR"] == 2015].drop_duplicates().reset_index(drop=True)

unique_df.head()

In [None]:
# Make a df of totals for the years

years = list(range(1992,2016))

years_df = pd.DataFrame({
    "Year": years,
    "Totals": year_totals
})
years_df["Totals"] = years_df["Totals"].map("{:,}".format)
years_df.head()

In [None]:
#Make a line graph with this data

year = years_df.iloc[:,0]
totals = years_df.iloc[:,1]

m_slope, m_int, m_r, m_p, m_std_err = stats.linregress(years, year_totals)
m_fit = m_slope * year + m_int



fig, ax = plt.subplots(1, sharex=True)
fig.suptitle("Total number of wildfires in the USA from 1992 to 2015", fontsize=12, fontweight="bold")

ax.plot(years,year_totals, marker="s", color="Red", linewidth=1) 
ax.plot(year, m_fit, "r--", linewidth=1)


plt.grid(alpha=0.5)

plt.xlabel("Years")
plt.ylabel("Number of Wildfires")

plt.savefig("graphs/total_wildfires_years.png")
plt.show()

stats.linregress(years, year_totals)

In [None]:
# Make a df of totals for the states

states = unique_states["STATE"]

states_df = pd.DataFrame({ 
    "States": states, 
    "Totals": state_totals
})
states_df = states_df.sort_values(["Totals"], ascending=False).reset_index(drop=True)

#pick out the top 10 states

top_10 = pd.DataFrame({
    "Top 10 States": states_df["States"][0:9],
    "Totals": states_df["Totals"][0:9]
}) 

states_df.head()

In [None]:
# Set x axis and tick locations

top_10_states = top_10["Top 10 States"]
x_axis = np.arange(len(top_10))
tick_locations = [value+0.4 for value in x_axis]

# Create a list indicating where to write x labels and set figure size to adjust for space

plt.bar(x_axis, top_10["Totals"], color='r', alpha=0.5, align="edge")

plt.xticks(tick_locations, top_10_states)

# Set x and y limits

# plt.xlim(-0.25, len(x_axis))
# plt.ylim(0, max(state_totals)+10000)

# Set a Title and labels

plt.title("Top Ten States with the Most Wildfires (1992 - 2015)")
plt.xlabel("States")
plt.ylabel("Number of Wildfires")
plt.grid(alpha=0.5)

# Save our graph and show the graph

plt.tight_layout() 
plt.savefig("graphs/total_wildfires_states.png")
plt.show()

In [None]:
# Get the number of wildfires in each year in each state

df = df[["FIRE_YEAR", "STATE"]]
totals = list(df.groupby(["FIRE_YEAR", "STATE"])["STATE"].count())
unique_df["Totals"] = totals
unique_df.head()

In [None]:
# Use pd.pivot_tableto make a new dataframe 

unique_df = pd.pivot_table(unique_df, values = 'Totals', index=['STATE'], columns = 'FIRE_YEAR').reset_index()
unique_df.head()

In [None]:
# Now to make cloropleth map using the plotly library and API

for col in states_df.columns:
    states_df[col] = states_df[col].astype(str)

# Select the colors for the gradient     
scl = [[0.0, 'rgb(255,215,0)'],
       [0.2, 'rgb(255,165,0)'],
       [0.4, 'rgb(255,140,0)'],
       [0.6, 'rgb(255,115,0)'],
       [0.8, 'rgb(200,50,0)'],
       [1.0, 'rgb(139,0,0)']]

#Load the dictionary for a choropleth map 
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = states_df['States'],
        z = states_df["Totals"],
        locationmode = 'USA-states',
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Number of Wildfires")
        ) ]

# Add title and labels 
layout = dict(
        title = 'USA Wildfires 1992-2015 </br></br>Source: Kaggle',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )

# Show the map 
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

---
Sources: 
---
1. https://www.kaggle.com/rtatman/188-million-us-wildfires/kernels 
2. https://plot.ly/python/choropleth-maps/#united-states-choropleth-map
3. https://ezgif.com/apng-maker