In [1]:
import pandas as pd
CA = pd.read_csv('mapdataall2.csv')

In [2]:
CA.shape

(1730, 23)

In [3]:
df = pd.read_fwf('national_county.txt')
df.to_csv('national_county.csv')

In [4]:
CA.columns

Index(['incident_name', 'incident_is_final', 'incident_date_last_update',
       'incident_date_created', 'incident_administrative_unit',
       'incident_administrative_unit_url', 'incident_county',
       'incident_location', 'incident_acres_burned', 'incident_containment',
       'incident_control', 'incident_cooperating_agencies',
       'incident_longitude', 'incident_latitude', 'incident_type',
       'incident_id', 'incident_url', 'incident_date_extinguished',
       'incident_dateonly_extinguished', 'incident_dateonly_created',
       'is_active', 'calfire_incident', 'notification_desired'],
      dtype='object')

In [5]:
df = CA[['incident_acres_burned','incident_county','incident_date_extinguished', 'incident_containment', 'incident_name']]

In [6]:
df.isnull().sum()

incident_acres_burned           0
incident_county                11
incident_date_extinguished    116
incident_containment            0
incident_name                   0
dtype: int64

In [7]:
df.shape

(1730, 5)

In [8]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Exploding a Column into multiple rows

In [9]:
type(df.incident_county[0])

str

In [10]:
df.isnull().sum()

incident_acres_burned           0
incident_county                11
incident_date_extinguished    116
incident_containment            0
incident_name                   0
dtype: int64

In [11]:
df = df[pd.notnull(df['incident_county'])]

In [12]:
df.isnull().sum()

incident_acres_burned           0
incident_county                 0
incident_date_extinguished    116
incident_containment            0
incident_name                   0
dtype: int64

In [13]:
df['incident_date_extinguished'] = df['incident_date_extinguished'].fillna('Active')

In [14]:
df.isnull().sum()

incident_acres_burned         0
incident_county               0
incident_date_extinguished    0
incident_containment          0
incident_name                 0
dtype: int64

In [15]:
new_df = pd.DataFrame(df.incident_county.str.split(', ').tolist(), index=[df.incident_acres_burned, df.incident_date_extinguished, df.incident_containment, df.incident_name]).stack()

In [16]:
new_df.head()

incident_acres_burned  incident_date_extinguished  incident_containment  incident_name   
37                     2018-01-09 13:46:00         100                    Bridge Fire   0            Shasta
122                    2009-05-25 00:00:00         100                   Pala Fire      0         San Diego
406                    2013-02-28 20:00:00         100                   River Fire     0              Inyo
30                     2013-04-22 09:00:00         100                   Fawnskin Fire  0    San Bernardino
274                    2013-05-01 07:00:00         100                   Gold Fire      0            Madera
dtype: object

In [17]:
new_df = new_df.reset_index([0, 'incident_acres_burned', 'incident_date_extinguished','incident_containment','incident_name'])

In [18]:
new_df.rename(columns = {0:'incident_county'}, inplace = True)

In [19]:
new_df['incident_county'] = new_df['incident_county'] + " County"

In [20]:
new_df['STNAME'] = 'CA'

In [21]:
new_df.shape

(1769, 6)

In [22]:
new_df.nlargest(10, 'incident_acres_burned')

Unnamed: 0,incident_acres_burned,incident_date_extinguished,incident_containment,incident_name,incident_county,STNAME
1105,410203,2019-01-04 09:33:00,100,Ranch Fire (Mendocino Complex),Colusa County,CA
1106,410203,2019-01-04 09:33:00,100,Ranch Fire (Mendocino Complex),Glenn County,CA
1107,410203,2019-01-04 09:33:00,100,Ranch Fire (Mendocino Complex),Lake County,CA
1108,410203,2019-01-04 09:33:00,100,Ranch Fire (Mendocino Complex),Mendocino County,CA
1683,396624,2020-10-01 10:29:00,100,SCU Lightning Complex,Santa Clara County,CA
1684,396624,2020-10-01 10:29:00,100,SCU Lightning Complex,Alameda County,CA
1685,396624,2020-10-01 10:29:00,100,SCU Lightning Complex,Contra Costa County,CA
1686,396624,2020-10-01 10:29:00,100,SCU Lightning Complex,San Joaquin County,CA
1687,396624,2020-10-01 10:29:00,100,SCU Lightning Complex,Stanislaus County,CA
1695,363220,2020-10-02 10:38:00,100,"LNU Lightning Complex (includes Hennessey, Gam...",Napa County,CA


In [23]:
test = pd.read_csv('national_county.csv', header = None)

In [24]:
test.rename(columns = {0: "x", 1 : "y"}, inplace = True)

In [25]:
test2 = pd.DataFrame(test.y.str.split(',').tolist())


In [26]:
test2.rename(columns = {0: "STNAME", 1 : "x", 2 : "y", 3 : "incident_county"}, inplace = True)

In [27]:
test2.drop([4], axis = 1 ,inplace = True)

In [28]:
test2["FIPS"] = test2['x'] + test2['y']

In [29]:
test2.drop(['x', 'y'], axis = 1, inplace = True)

In [30]:
testCA = test2[test2["STNAME"] == 'CA']

In [31]:
testCA.head()

Unnamed: 0,STNAME,incident_county,FIPS
186,CA,Alameda County,6001
187,CA,Alpine County,6003
188,CA,Amador County,6005
189,CA,Butte County,6007
190,CA,Calaveras County,6009


In [32]:
new_df['FIPS'] = new_df.incident_county.map(testCA.set_index('incident_county')['FIPS'].to_dict())

In [33]:
import numpy as np

In [34]:
subset = new_df.groupby(['incident_county']).agg({'incident_acres_burned':sum, 'incident_containment':np.mean, 'incident_name':len}).reset_index()

In [35]:
subset['FIPS'] = subset.incident_county.map(testCA.set_index('incident_county')['FIPS'].to_dict())

In [36]:
subset.incident_name.sum()

1769

In [None]:
from urllib.request import urlopen
import json
import pandas as pd
import plotly.express as px

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)


fig = px.choropleth_mapbox(subset, geojson=counties, locations='FIPS', color= 'incident_acres_burned',
                           color_continuous_scale="inferno_r",
                           hover_name = 'incident_county',
                           mapbox_style="carto-positron",
                           zoom=4.8, center = {"lat": 37.3, "lon": -119.4179},
                           hover_data = ['incident_name'],
                           opacity=0.5,
                           labels={'incident_acres_burned':'Acres Burned', 'FIPS' : 'County Code', 
                                   'incident_name' : 'Number of Incidents'},
                           title = 'Acres Burned (By County)'
                          )

fig.write_html("firemap2.html")
fig.show()


In [None]:
CA_avg_contained = subset.incident_containment.mean()
CA_avg_contained

In [None]:
subset.head()

In [None]:
from urllib.request import urlopen
import json
import pandas as pd
import plotly.express as px

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)


fig = px.choropleth_mapbox(subset, geojson=counties, locations='FIPS', color= 'incident_containment',
                           color_continuous_scale="Plasma_r",
                           hover_name = 'incident_county',
                           hover_data = ['incident_name'],
                           mapbox_style="carto-positron",
                           zoom=4.8, center = {"lat": 37.3, "lon": -119.4179},
                           opacity=0.5,
                           labels={'incident_containment':'Percent Contained', 'FIPS' : 'County Code',
                                   'incident_name' : 'Number of Incidents'},
                           title = 'Average Percent Of Wildfires Contained (By County)'
                          )

fig.write_html("firemap3.html")
fig.show()


In [None]:
burned_by_county = subset.nlargest(10, 'incident_acres_burned')

In [None]:
import plotly.express as px

fig = px.bar(burned_by_county, y="incident_county", x="incident_acres_burned", color="incident_county", orientation="h", 
             hover_name="incident_county", labels = {'incident_county' : 'County', 'incident_acres_burned' : 'Acres Burned'},
             color_discrete_sequence = px.colors.sequential.matter_r, title = 'Top 10 CA Counties Ranked By Acres Burned'
            )

fig.write_html("barchart_counties.html")
fig.show()

In [None]:
duplicate = df.copy()

In [None]:
duplicate['incident_name'] = df['incident_name'].copy().str.split('(').str[0]

In [None]:
# duplicate['date_extinguished'] = pd.to_datetime(duplicate['incident_date_extinguished']).dt.strftime('%Y-%m-%d')

In [None]:
duplicate.head()

In [None]:
burned_by_name = duplicate.nlargest(10, 'incident_acres_burned')

In [None]:
import plotly.express as px

fig = px.bar(burned_by_name, y="incident_name", x="incident_acres_burned", color="incident_name", orientation="h", 
             hover_name="incident_name", hover_data = ['incident_county','incident_date_extinguished'], labels = {'incident_date_extinguished' : 'Date Extinguished', 'incident_name' : 'Incident Name', 'incident_acres_burned' : 'Acres Burned', 'incident_county' : 'County'},
             color_discrete_sequence = px.colors.sequential.matter_r, title = 'Top 10 CA Fires Ranked By Acres Burned'
            )

fig.write_html("barchart_names.html")
fig.show()

In [None]:
import sys
import numpy
numpy.set_printoptions(threshold=sys.maxsize)

In [None]:
df.incident_name.unique()

In [None]:
top_20_largest = pd.read_excel('top20_acres.xlsx')
top_20_largest = top_20_largest[1:21]

In [None]:
top_20_deadliest = pd.read_excel('top20_deadliest.xlsx')
top_20_deadliest = top_20_deadliest[1:21]

In [None]:
top_20_destructive = pd.read_excel('top20_destruction.xlsx')
top_20_destructive = top_20_destructive[1:21]

In [None]:
top_20_largest.rename(columns = {'Unnamed: 1' : 'incident_name', 'Unnamed: 2' : 'incident_date', 
                                   'Unnamed: 3' : 'incident_county', 'Unnamed: 4' : 'incident_acres_burned', 
                                   'Unnamed: 5' : 'Structures Destroyed', 'Unnamed: 6' : 'Deaths'}, inplace = True)

In [None]:
top_20_deadliest.rename(columns = {'Unnamed: 1' : 'incident_name', 'Unnamed: 2' : 'incident_date', 
                                   'Unnamed: 3' : 'incident_county', 'Unnamed: 4' : 'incident_acres_burned', 
                                   'Unnamed: 5' : 'Structures Destroyed', 'Unnamed: 6' : 'Deaths'}, inplace = True)

In [None]:
top_20_destructive.rename(columns = {'Unnamed: 1' : 'incident_name', 'Unnamed: 2' : 'incident_date', 
                                   'Unnamed: 3' : 'incident_county', 'Unnamed: 4' : 'incident_acres_burned', 
                                   'Unnamed: 5' : 'Structures Destroyed', 'Unnamed: 6' : 'Deaths'}, inplace = True)

In [None]:
top_20_largest['incident_cause'] = top_20_largest['incident_name'].str.split('(').str.get(1)
top_20_largest['incident_cause'] = top_20_largest['incident_cause'].str.extract('([a-zA-Z ]+)', expand=False).str.strip()
top_20_largest['incident_name'] = top_20_largest['incident_name'].str.extract('([a-zA-Z ]+)', expand=False).str.strip()
top_20_largest['incident_county'] = top_20_largest['incident_county'].str.replace('\n', '')

In [None]:
top_20_deadliest['incident_cause'] = top_20_deadliest['incident_name'].str.split('(').str.get(1)
top_20_deadliest['incident_cause'] = top_20_deadliest['incident_cause'].str.extract('([a-zA-Z ]+)', expand=False).str.strip()
top_20_deadliest['incident_name'] = top_20_deadliest['incident_name'].str.extract('([a-zA-Z ]+)', expand=False).str.strip()

In [None]:
top_20_destructive['incident_cause'] = top_20_destructive['incident_name'].str.split('(').str.get(1)
top_20_destructive['incident_cause'] = top_20_destructive['incident_cause'].str.extract('([a-zA-Z ]+)', expand=False).str.strip()
top_20_destructive['incident_name'] = top_20_destructive['incident_name'].str.extract('([a-zA-Z ]+)', expand=False).str.strip()

In [None]:
top_20_largest['incident_cause'] = top_20_largest['incident_cause'].replace(['Undetermined'],'Unknown')

In [None]:
from collections import Counter

labels = top_20_largest['incident_cause']
largest_labels = list(top_20_largest['incident_cause'].unique())
largest_values = list(Counter(labels).values())

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

colors = ['gold', 'mediumturquoise', 'OrangeRed', 'LightSkyBlue', 'MediumOrchid']
# pull is given as a fraction of the pie radius

fig = make_subplots(rows=1, cols=1, specs=[[{'type':'domain'}]])

fig.add_trace(go.Pie(labels=largest_labels, values=largest_values, pull=[0, 0, 0.2, 0, 0], name = 'Wildfire Cause', sort = False),
              1, 1)
fig.update_traces(hoverinfo="label+percent+name", textfont_size = 16, 
                 marker=dict(colors=colors, line=dict(color='#000000', width=2)))

fig.update_layout(title = 'Causes of the Top 20 Largest California Wildfires')
fig.show()

In [None]:
top_20_deadliest['incident_cause'] = top_20_deadliest['incident_cause'].replace(['Undetermined'],'Unknown')
top_20_deadliest['incident_cause'] = top_20_deadliest['incident_cause'].replace(['Arson', 'Vehicle'],'Human Related')
top_20_deadliest['incident_cause'] = top_20_deadliest['incident_cause'].replace(['Powerline', 'Power Lines'],'Powerlines')

In [None]:
labels = top_20_deadliest['incident_cause']
deadliest_labels = list(top_20_deadliest['incident_cause'].unique())
deadliest_values = list(Counter(labels).values())

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

colors = ['gold', 'mediumturquoise', 'darkorange', 'LightSkyBlue', 'MediumOrchid', 'OrangeRed', 'Pink']
# pull is given as a fraction of the pie radius

fig = make_subplots(rows=1, cols=1, specs=[[{'type':'domain'}]])

fig.add_trace(go.Pie(labels=deadliest_labels, values=deadliest_values, pull=[0, 0, 0, 0, 0, 0.2, 0], name = 'Wildfire Cause', sort = False),
              1, 1)
fig.update_traces(hoverinfo="label+percent+name", textfont_size = 16, 
                 marker=dict(colors=colors, line=dict(color='#000000', width=2)))

fig.update_layout(title = 'Causes of the Top 20 Deadliest California Wildfires')

fig.show()

In [None]:
top_20_destructive['incident_cause'] = top_20_destructive['incident_cause'].replace(['Undetermined'],'Unknown')
top_20_destructive['incident_cause'] = top_20_destructive['incident_cause'].replace(['Arson', 'Vehicle'],'Human Related')
top_20_destructive['incident_cause'] = top_20_destructive['incident_cause'].replace(['Powerline', 'Power Lines'],'Powerlines')

In [None]:
labels = top_20_destructive['incident_cause']
destructive_labels = list(top_20_destructive['incident_cause'].unique())
destructive_values = list(Counter(labels).values())

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

colors = ['gold', 'mediumturquoise', 'darkorange', 'OrangeRed', 'MediumOrchid', 'LightSkyBlue', 'Pink']
# pull is given as a fraction of the pie radius

fig = make_subplots(rows=1, cols=1, specs=[[{'type':'domain'}]])

fig.add_trace(go.Pie(labels=destructive_labels, values=destructive_values, pull=[0, 0, 0, 0.2, 0, 0, 0], name = 'Wildfire Cause', sort = False),
              1, 1)
fig.update_traces(hoverinfo="label+percent+name", textfont_size = 16, 
                 marker=dict(colors=colors, line=dict(color='#000000', width=2)))

fig.update_layout(title = 'Causes of the Top 20 Most Destructive California Wildfires')

fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots



fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels=largest_labels, values=largest_values, pull=[0, 0, 0.2, 0, 0],
                     name = 'Cause', sort = False), 1, 1)
fig.add_trace(go.Pie(labels=deadliest_labels, values=deadliest_values, pull=[0, 0, 0, 0, 0, 0.2, 0], 
                     name='Cause', sort = False), 1, 2)
fig.add_trace(go.Pie(labels=destructive_labels, values=destructive_values, pull=[0, 0, 0, 0.2, 0, 0, 0], 
                     name='Cause', sort = False,
                     marker = {'colors' : ['gold', 'DarkTurquoise', 'BurlyWood', 
                                          'OrangeRed', 'MediumOrchid', 'LightSkyBlue',
                                          'Pink']}), 1, 3)


fig.update_traces(hoverinfo="label+percent+name", textfont_size = 14,
                  marker=dict(line=dict(color='#000000', width=2)))

fig.update_layout(
    title = 'Causes Of The Larget, Deadliest, and Most Destructive CA Wildfires',
    annotations=[dict(text='Causes: Top 20 Largest Fires', x=-0.004, y=0.85, font_size=15, showarrow=False),
                 dict(text='Causes: Top 20 Deadliest Fires', x=0.465, y=0.85, font_size=15, showarrow=False),
                 dict(text='Causes: Top 20 Most Destructive', x=0.925, y=0.85, font_size=15, showarrow=False)])

fig.write_html("fire_piechart.html")

fig.show()

In [None]:
i1 = np.intersect1d(top_20_largest.incident_name, np.intersect1d(top_20_deadliest.incident_name, top_20_destructive.incident_name))

In [None]:
i1

In [None]:
i2 = np.intersect1d(top_20_largest.incident_name, top_20_deadliest.incident_name)

In [None]:
i2

In [None]:
i3 = np.intersect1d(top_20_largest.incident_name, top_20_destructive.incident_name)

In [None]:
i3

In [None]:
i4 = np.intersect1d(top_20_deadliest.incident_name, top_20_destructive.incident_name)

In [None]:
i4

In [None]:
i5 = list(i1) + list(i2) + list(i3) + list(i4)

In [None]:
i6 = list(set(i5))

In [None]:
i6

In [None]:
data = pd.DataFrame(columns = ['TOP 20 LARGEST', 'TOP 20 DEADLIEST', 'TOP 20 MOST DESTRUCTIVE'], index = i6)

In [None]:
data

In [None]:
data.loc['OLD'] = [0,1,1]
data.loc['TUNNEL'] = [0,1,1]
data.loc['NORTH COMPLEX'] = [1,1,1]
data.loc['LNU LIGHTNING COMPLEX'] = [1,0,1]
data.loc['CARR'] = [1,1,1]
data.loc['AUGUST COMPLEX'] = [1,0,1]
data.loc['THOMAS'] = [1,0,1]
data.loc['ATLAS'] = [0,1,1]
data.loc['CREEK FIRE'] = [1,0,1]
data.loc['CEDAR'] = [1,1,1]
data.loc['TUBBS'] = [0,1,1]
data.loc['CAMP FIRE'] = [0,1,1]
data.loc['WITCH'] = [1,0,1]

In [None]:
new_index = []
for i in data.index:
    if 'COMPLEX' in i or 'FIRE' in i:
        new_index.append(i)
    else:
        new_index.append(str(i) + ' FIRE')
        

In [None]:
data.index = new_index

In [None]:
data

In [None]:
from networkx.algorithms import bipartite
from networkx.algorithms import nx

B = nx.Graph()
B.add_nodes_from(data.index, bipartite=0)
B.add_nodes_from(data.columns, bipartite=1)

s = data.stack()
B.add_edges_from(s[s==1].index)


In [None]:
color_dict = {0:'b',1:'r',2:'m'}

t = list(B.nodes.data('bipartite'))
for i in range(len(t)):
    if t[i][0] == 'NORTH COMPLEX' or t[i][0] == 'CARR FIRE' or t[i][0] == 'CEDAR FIRE':
        t[i] = (t[i][0], 2)
        
color_list = [color_dict[i[1]] for i in t]

In [None]:
from matplotlib.pyplot import figure, text
import matplotlib.pyplot as plt

figure(figsize=(12,12))

top = nx.bipartite.sets(B)[0]
pos = nx.bipartite_layout(B, top)


nx.draw(B, pos=pos, with_labels=False, 
        node_size=1000, node_color = color_list)

# count = 0
# for node, (x, y) in pos.items():
#     if node == 'TOP 20 MOST DESTRUCTIVE':
#         text(x,y-0.038,s=node, bbox=dict(facecolor=color_list[count], alpha=0.3), fontsize = 12,
#              horizontalalignment='center')
#     elif 'TOP 20' in node:
#         text(x+0.05,y+0.03,s=node, bbox=dict(facecolor=color_list[count], alpha=0.3), fontsize = 12,
#              horizontalalignment='center')
#     else:
#         text(x,y+0.03,s=node, bbox=dict(facecolor=color_list[count],alpha=0.3),fontsize = 12,horizontalalignment='center')
#     count += 1
 

for node, (x, y) in pos.items():
    if node == 'TOP 20 MOST DESTRUCTIVE':
        text(x+0.01,y-0.062,s=node, bbox=dict(facecolor='r', alpha=0.3), fontsize = 12,
             horizontalalignment='center', rotation = 13)
    elif node == 'TOP 20 LARGEST':
        text(x+0.01,y-0.055,s=node, bbox=dict(facecolor='r', alpha=0.3), fontsize = 12,
             horizontalalignment='center', rotation = 15)
    elif node == 'TOP 20 DEADLIEST':
        text(x+0.02,y-0.058,s=node, bbox=dict(facecolor='r', alpha=0.3), fontsize = 12,
             horizontalalignment='center', rotation = 17)
    elif node == 'CEDAR FIRE' or node == 'NORTH COMPLEX' or node == 'CARR FIRE':
        text(x,y+0.03,s=node, bbox=dict(facecolor='m',alpha=0.3),fontsize = 12,horizontalalignment='center')
    else:
        text(x,y+0.03,s=node, bbox=dict(facecolor='b',alpha=0.3),fontsize = 12,horizontalalignment='center')
plt.title("Bipartite Plot Mapping Fire Incidents to Top 20 Subsets", fontsize = 20);

In [None]:
top_20_largest.head()

In [None]:
top_20_deadliest.head()

In [None]:
top_20_destructive.head()