In [1]:
# Import Dependenices.
import pandas as pd

# Import Meteostat library and dependencies.
from datetime import datetime
from datetime import time
import matplotlib.pyplot as plt
from meteostat import Point, Daily

In [2]:
# Read the California Fire Incidents CSV file into Pandas DataFrame.
fire_incidents_df = pd.read_csv("../Resources/California_Fire_Incidents.csv")
fire_incidents_df

Unnamed: 0,AcresBurned,Active,AdminUnit,AirTankers,ArchiveYear,CalFireIncident,CanonicalUrl,ConditionStatement,ControlStatement,Counties,...,SearchKeywords,Started,Status,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened,UniqueId,Updated,WaterTenders
0,257314.0,False,Stanislaus National Forest/Yosemite National Park,,2013,True,/incidents/2013/8/17/rim-fire/,,,Tuolumne,...,"Rim Fire, Stanislaus National Forest, Yosemite...",2013-08-17T15:25:00Z,Finalized,,,,,5fb18d4d-213f-4d83-a179-daaf11939e78,2013-09-06T18:30:00Z,
1,30274.0,False,USFS Angeles National Forest/Los Angeles Count...,,2013,True,/incidents/2013/5/30/powerhouse-fire/,,,Los Angeles,...,"Powerhouse Fire, May 2013, June 2013, Angeles ...",2013-05-30T15:28:00Z,Finalized,,,,,bf37805e-1cc2-4208-9972-753e47874c87,2013-06-08T18:30:00Z,
2,27531.0,False,CAL FIRE Riverside Unit / San Bernardino Natio...,,2013,True,/incidents/2013/7/15/mountain-fire/,,,Riverside,...,"Mountain Fire, July 2013, Highway 243, Highway...",2013-07-15T13:43:00Z,Finalized,,,,,a3149fec-4d48-427c-8b2c-59e8b79d59db,2013-07-30T18:00:00Z,
3,27440.0,False,Tahoe National Forest,,2013,False,/incidents/2013/8/10/american-fire/,,,Placer,...,"American Fire, August 2013, Deadwood Ridge, Fo...",2013-08-10T16:30:00Z,Finalized,,,,,8213f5c7-34fa-403b-a4bc-da2ace6e6625,2013-08-30T08:00:00Z,
4,24251.0,False,Ventura County Fire/CAL FIRE,,2013,True,/incidents/2013/5/2/springs-fire/,Acreage has been reduced based upon more accur...,,Ventura,...,"Springs Fire, May 2013, Highway 101, Camarillo...",2013-05-02T07:01:00Z,Finalized,6.0,10.0,,,46731fb8-3350-4920-bdf7-910ac0eb715c,2013-05-11T06:30:00Z,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1631,9.0,False,CAL FIRE / Riverside County Fire,,2019,True,/incidents/2019/10/10/eagle-fire/,,,Riverside,...,,2019-10-10T12:08:00Z,Finalized,,,,,6e93b252-99a3-4214-9921-238373b17535,2019-10-10T18:11:59.733Z,
1632,2.0,False,CAL FIRE Nevada-Yuba-Placer Unit,,2019,True,/incidents/2019/6/28/long-fire/,,,Nevada,...,,2019-06-28T15:03:04Z,Finalized,,,,,b38c0563-b321-431b-9174-6336c5a0d449,2019-06-30T15:52:01.023Z,
1633,,False,Yolo County Fire Protection District,,2019,False,/incidents/2019/11/25/cashe-fire/,,,Yolo,...,,2019-11-25T12:02:02Z,Finalized,,,,,9c26f915-1b33-422d-b30a-9eb4da6fd729,2019-12-03T16:35:20.93Z,
1634,,False,Camp Pendleton Marine Corps Base,,2019,False,/incidents/2019/10/22/oak-fire/,,,San Diego,...,,2019-10-22T19:20:44Z,Finalized,,,,,7264a106-e0f4-41de-8fd0-3f9110431e28,2019-11-21T12:21:28.58Z,


In [3]:
# Check the data types of the California Fire Incidents DataFrame columns.
fire_incidents_df.dtypes

AcresBurned             float64
Active                     bool
AdminUnit                object
AirTankers              float64
ArchiveYear               int64
CalFireIncident            bool
CanonicalUrl             object
ConditionStatement       object
ControlStatement         object
Counties                 object
CountyIds                object
CrewsInvolved           float64
Dozers                  float64
Engines                 float64
Extinguished             object
Fatalities              float64
Featured                   bool
Final                      bool
FuelType                 object
Helicopters             float64
Injuries                float64
Latitude                float64
Location                 object
Longitude               float64
MajorIncident              bool
Name                     object
PercentContained        float64
PersonnelInvolved       float64
Public                     bool
SearchDescription        object
SearchKeywords           object
Started 

In [4]:
# Convert the "Started" and "Extinguished" columns into date datatype.
fire_incidents_df["Started"] = pd.to_datetime(fire_incidents_df["Started"])
fire_incidents_df["Extinguished"] = pd.to_datetime(fire_incidents_df["Extinguished"])

In [5]:
# Split the date and time and keep the dates.
fire_incidents_df["Start_Date"] = [d.date() for d in fire_incidents_df["Started"]]
fire_incidents_df["End_Date"] = [d.date() for d in fire_incidents_df["Extinguished"]]

In [6]:
# Keep the required columns.
fire_incidents_df = fire_incidents_df[["AcresBurned", "ArchiveYear", "Counties", "CountyIds", "Start_Date", "End_Date", "Injuries", "Latitude", "Longitude", "MajorIncident", "Location", "Name", "PercentContained", "StructuresDamaged", "StructuresDestroyed", "StructuresEvacuated", "StructuresThreatened"]]
fire_incidents_df.head()

Unnamed: 0,AcresBurned,ArchiveYear,Counties,CountyIds,Start_Date,End_Date,Injuries,Latitude,Longitude,MajorIncident,Location,Name,PercentContained,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
0,257314.0,2013,Tuolumne,55,2013-08-17,2013-09-06,,37.857,-120.086,False,3 miles east of Groveland along Hwy 120,Rim Fire,100.0,,,,
1,30274.0,2013,Los Angeles,19,2013-05-30,2013-06-08,,34.585595,-118.423176,False,Angeles National Forest,Powerhouse Fire,100.0,,,,
2,27531.0,2013,Riverside,33,2013-07-15,2013-07-30,,33.7095,-116.72885,False,Hwy 243 & Hwy 74 near Mountain Center,Mountain Fire,100.0,,,,
3,27440.0,2013,Placer,31,2013-08-10,2013-08-30,,39.12,-120.65,False,"Deadwood Ridge, northeast of Foresthill",American Fire,100.0,,,,
4,24251.0,2013,Ventura,56,2013-05-02,2013-05-11,10.0,0.0,0.0,True,Southbound Highway 101 at Camarillo Springs Ro...,Springs Fire,100.0,6.0,10.0,,


In [7]:
# Read the California Fire Perimeters CSV file into Pandas DataFrame.
fire_perimeters_df = pd.read_csv("../Resources/California_Fire_Perimeters.csv")
fire_perimeters_df.head()

Unnamed: 0,OBJECTID,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,REPORT_AC,GIS_ACRES,C_METHOD,OBJECTIVE,FIRE_NUM,SHAPE_Length,SHAPE_Area
0,21440,2020.0,CA,CDF,NEU,NELSON,13212,2020/06/18 00:00:00+00,2020/06/23 00:00:00+00,11.0,,110.0,109.6025,1.0,1.0,,4179.743142,733134.7
1,21441,2020.0,CA,CDF,NEU,AMORUSO,11799,2020/06/01 00:00:00+00,2020/06/04 00:00:00+00,2.0,,670.0,685.58502,1.0,1.0,,12399.375391,4578172.0
2,21442,2020.0,CA,CDF,NEU,ATHENS,18493,2020/08/10 00:00:00+00,2020/03/01 00:00:00+00,14.0,,26.0,27.30048,1.0,1.0,,2119.19412,182387.6
3,21443,2020.0,CA,CDF,NEU,FLEMING,7619,2020/03/31 00:00:00+00,2020/04/01 00:00:00+00,9.0,,13.0,12.93155,1.0,1.0,,2029.524881,86679.42
4,21444,2020.0,CA,CDF,NEU,MELANESE,8471,2020/04/14 00:00:00+00,2020/04/19 00:00:00+00,18.0,,10.3,10.31596,1.0,1.0,,1342.742903,70179.12


In [8]:
# Check the datatypes of California Fire Perimeters DataFrame columns.
fire_perimeters_df.dtypes

OBJECTID          int64
YEAR_           float64
STATE            object
AGENCY           object
UNIT_ID          object
FIRE_NAME        object
INC_NUM          object
ALARM_DATE       object
CONT_DATE        object
CAUSE           float64
COMMENTS         object
REPORT_AC       float64
GIS_ACRES       float64
C_METHOD        float64
OBJECTIVE       float64
FIRE_NUM         object
SHAPE_Length    float64
SHAPE_Area      float64
dtype: object

In [9]:
# Convert the "ALARM_DATE" and "CONT_DATE" columns into date datatype.
fire_perimeters_df["ALARM_DATE"] = pd.to_datetime(fire_perimeters_df['ALARM_DATE'], errors='coerce')
fire_perimeters_df["CONT_DATE"] = pd.to_datetime(fire_perimeters_df['CONT_DATE'], errors='coerce')

In [10]:
# Split the date and time and keep the dates.
fire_perimeters_df['Start_Date'] = [d.date() for d in fire_perimeters_df['ALARM_DATE']]
fire_perimeters_df['End_Date'] = [d.date() for d in fire_perimeters_df['CONT_DATE']]

In [11]:
# Keep the required columns.
fire_perimeters_df = fire_perimeters_df[['YEAR_', 'STATE','FIRE_NAME', 'Start_Date', 'End_Date', 'CAUSE', 'SHAPE_Length', 'SHAPE_Area', 'GIS_ACRES']]
fire_perimeters_df.head()

Unnamed: 0,YEAR_,STATE,FIRE_NAME,Start_Date,End_Date,CAUSE,SHAPE_Length,SHAPE_Area,GIS_ACRES
0,2020.0,CA,NELSON,2020-06-18,2020-06-23,11.0,4179.743142,733134.7,109.6025
1,2020.0,CA,AMORUSO,2020-06-01,2020-06-04,2.0,12399.375391,4578172.0,685.58502
2,2020.0,CA,ATHENS,2020-08-10,2020-03-01,14.0,2119.19412,182387.6,27.30048
3,2020.0,CA,FLEMING,2020-03-31,2020-04-01,9.0,2029.524881,86679.42,12.93155
4,2020.0,CA,MELANESE,2020-04-14,2020-04-19,18.0,1342.742903,70179.12,10.31596


In [12]:
# Merge fire incident and fire perimeter DataFrames.
california_fire = fire_incidents_df.merge(fire_perimeters_df, how="left", on=["Start_Date", "End_Date"])
california_fire.columns

Index(['AcresBurned', 'ArchiveYear', 'Counties', 'CountyIds', 'Start_Date',
       'End_Date', 'Injuries', 'Latitude', 'Longitude', 'MajorIncident',
       'Location', 'Name', 'PercentContained', 'StructuresDamaged',
       'StructuresDestroyed', 'StructuresEvacuated', 'StructuresThreatened',
       'YEAR_', 'STATE', 'FIRE_NAME', 'CAUSE', 'SHAPE_Length', 'SHAPE_Area',
       'GIS_ACRES'],
      dtype='object')

In [13]:
# Create a new DataFrame for the cause column.
d= {'CAUSE': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], 'Cause_Description': ['Lightning', 'Equipment Use', 'Smoking', 'Campfire', 'Debris', 'Railroad', 'Arson', 'Playing with fire', 'Miscellaneous', 'Vehicle', 'Powerline', 'Figherfighter Training', 'Non-Firefighter Training', 'Unidentified', 'Structure', 'Aircraft','Unidentified', 'Escaped Prescribed Burn', 'Illegal Alien Campfire']}

fire_cause = pd.DataFrame(data=d)
fire_cause

Unnamed: 0,CAUSE,Cause_Description
0,1,Lightning
1,2,Equipment Use
2,3,Smoking
3,4,Campfire
4,5,Debris
5,6,Railroad
6,7,Arson
7,8,Playing with fire
8,9,Miscellaneous
9,10,Vehicle


In [14]:
# Merge fire cause description with the main dataframe.
california_fire = california_fire.merge(fire_cause, how='left', on="CAUSE")
california_fire.head()

Unnamed: 0,AcresBurned,ArchiveYear,Counties,CountyIds,Start_Date,End_Date,Injuries,Latitude,Longitude,MajorIncident,...,StructuresEvacuated,StructuresThreatened,YEAR_,STATE,FIRE_NAME,CAUSE,SHAPE_Length,SHAPE_Area,GIS_ACRES,Cause_Description
0,257314.0,2013,Tuolumne,55,2013-08-17,2013-09-06,,37.857,-120.086,False,...,,,,,,,,,,
1,30274.0,2013,Los Angeles,19,2013-05-30,2013-06-08,,34.585595,-118.423176,False,...,,,,,,,,,,
2,27531.0,2013,Riverside,33,2013-07-15,2013-07-30,,33.7095,-116.72885,False,...,,,2013.0,CA,MOUNTAIN,9.0,95986.473341,161435500.0,27523.666,Miscellaneous
3,27440.0,2013,Placer,31,2013-08-10,2013-08-30,,39.12,-120.65,False,...,,,,,,,,,,
4,24251.0,2013,Ventura,56,2013-05-02,2013-05-11,10.0,0.0,0.0,True,...,,,,,,,,,,


In [15]:
california_fire.drop('CAUSE', axis=1, inplace=True)
california_fire.head()

Unnamed: 0,AcresBurned,ArchiveYear,Counties,CountyIds,Start_Date,End_Date,Injuries,Latitude,Longitude,MajorIncident,...,StructuresDestroyed,StructuresEvacuated,StructuresThreatened,YEAR_,STATE,FIRE_NAME,SHAPE_Length,SHAPE_Area,GIS_ACRES,Cause_Description
0,257314.0,2013,Tuolumne,55,2013-08-17,2013-09-06,,37.857,-120.086,False,...,,,,,,,,,,
1,30274.0,2013,Los Angeles,19,2013-05-30,2013-06-08,,34.585595,-118.423176,False,...,,,,,,,,,,
2,27531.0,2013,Riverside,33,2013-07-15,2013-07-30,,33.7095,-116.72885,False,...,,,,2013.0,CA,MOUNTAIN,95986.473341,161435500.0,27523.666,Miscellaneous
3,27440.0,2013,Placer,31,2013-08-10,2013-08-30,,39.12,-120.65,False,...,,,,,,,,,,
4,24251.0,2013,Ventura,56,2013-05-02,2013-05-11,10.0,0.0,0.0,True,...,10.0,,,,,,,,,


In [16]:
# Check the column names.
california_fire.columns

Index(['AcresBurned', 'ArchiveYear', 'Counties', 'CountyIds', 'Start_Date',
       'End_Date', 'Injuries', 'Latitude', 'Longitude', 'MajorIncident',
       'Location', 'Name', 'PercentContained', 'StructuresDamaged',
       'StructuresDestroyed', 'StructuresEvacuated', 'StructuresThreatened',
       'YEAR_', 'STATE', 'FIRE_NAME', 'SHAPE_Length', 'SHAPE_Area',
       'GIS_ACRES', 'Cause_Description'],
      dtype='object')

In [17]:
california_fire.isnull().sum()

AcresBurned                3
ArchiveYear                0
Counties                   0
CountyIds                  0
Start_Date                 0
End_Date                  59
Injuries                1607
Latitude                   0
Longitude                  0
MajorIncident              0
Location                   0
Name                       0
PercentContained           3
StructuresDamaged       1667
StructuresDestroyed     1552
StructuresEvacuated     1734
StructuresThreatened    1704
YEAR_                   1331
STATE                   1331
FIRE_NAME               1333
SHAPE_Length            1331
SHAPE_Area              1331
GIS_ACRES               1331
Cause_Description       1335
dtype: int64

In [18]:
# Drop fire
california_fire.drop(columns=['FIRE_NAME','GIS_ACRES', 'StructuresEvacuated', 'YEAR_', 'SHAPE_Length', 'SHAPE_Area'], axis=1, inplace=True)
california_fire

Unnamed: 0,AcresBurned,ArchiveYear,Counties,CountyIds,Start_Date,End_Date,Injuries,Latitude,Longitude,MajorIncident,Location,Name,PercentContained,StructuresDamaged,StructuresDestroyed,StructuresThreatened,STATE,Cause_Description
0,257314.0,2013,Tuolumne,55,2013-08-17,2013-09-06,,37.857000,-120.086000,False,3 miles east of Groveland along Hwy 120,Rim Fire,100.0,,,,,
1,30274.0,2013,Los Angeles,19,2013-05-30,2013-06-08,,34.585595,-118.423176,False,Angeles National Forest,Powerhouse Fire,100.0,,,,,
2,27531.0,2013,Riverside,33,2013-07-15,2013-07-30,,33.709500,-116.728850,False,Hwy 243 & Hwy 74 near Mountain Center,Mountain Fire,100.0,,,,CA,Miscellaneous
3,27440.0,2013,Placer,31,2013-08-10,2013-08-30,,39.120000,-120.650000,False,"Deadwood Ridge, northeast of Foresthill",American Fire,100.0,,,,,
4,24251.0,2013,Ventura,56,2013-05-02,2013-05-11,10.0,0.000000,0.000000,True,Southbound Highway 101 at Camarillo Springs Ro...,Springs Fire,100.0,6.0,10.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1729,9.0,2019,Riverside,33,2019-10-10,2019-10-10,,33.827979,-117.499619,False,"Eagle Canyon Rd. and Cajalco Rd., southwest of...",Eagle Fire,100.0,,,,CA,Vehicle
1730,2.0,2019,Nevada,29,2019-06-28,2019-06-28,,39.409722,-121.000556,False,"Off of Long Point Road and Old Mill Road, Sou...",Long Fire,100.0,,,,CA,Playing with fire
1731,,2019,Yolo,57,2019-11-25,NaT,,38.734634,-121.729691,False,"County Road 102 and County Road 17, North of W...",Cashe Fire,,,,,,
1732,,2019,San Diego,37,2019-10-22,NaT,,33.351145,-117.403719,False,"Near Basilone Road and Las Pulgas Road, near C...",Oak Fire,,,,,,


In [19]:
# Rename columns.
california_fire = california_fire.rename(columns={'Name':'Fire_Name',
                                                  'AcresBurned':'Total_Acres_Burned',
                                                  'ArchiveYear':'Archive_Year',
                                                  'Start_Date':'Fire_Started_Date',
                                                  'End_Date':'Fire_Extinguished_Date',
                                                  'PercentContained':'Percent_Contained',
                                                  'Counties':'County_Name',
                                                  'CountyIds':'County_ID',
                                                  'MajorIncident':'Major_Incident',
                                                  'StructuresDestroyed':'Structures_Destroyed',
                                                  'StructuresDamaged':'Structures_Damaged',
                                                  'StructuresEvacuated':'Structures_Evacuated',
                                                  'StructuresThreatened':'Structures_Threatened',
                                                  'STATE':'State',
                                                  })
california_fire.head()

Unnamed: 0,Total_Acres_Burned,Archive_Year,County_Name,County_ID,Fire_Started_Date,Fire_Extinguished_Date,Injuries,Latitude,Longitude,Major_Incident,Location,Fire_Name,Percent_Contained,Structures_Damaged,Structures_Destroyed,Structures_Threatened,State,Cause_Description
0,257314.0,2013,Tuolumne,55,2013-08-17,2013-09-06,,37.857,-120.086,False,3 miles east of Groveland along Hwy 120,Rim Fire,100.0,,,,,
1,30274.0,2013,Los Angeles,19,2013-05-30,2013-06-08,,34.585595,-118.423176,False,Angeles National Forest,Powerhouse Fire,100.0,,,,,
2,27531.0,2013,Riverside,33,2013-07-15,2013-07-30,,33.7095,-116.72885,False,Hwy 243 & Hwy 74 near Mountain Center,Mountain Fire,100.0,,,,CA,Miscellaneous
3,27440.0,2013,Placer,31,2013-08-10,2013-08-30,,39.12,-120.65,False,"Deadwood Ridge, northeast of Foresthill",American Fire,100.0,,,,,
4,24251.0,2013,Ventura,56,2013-05-02,2013-05-11,10.0,0.0,0.0,True,Southbound Highway 101 at Camarillo Springs Ro...,Springs Fire,100.0,6.0,10.0,,,


In [20]:
california_fire.isnull().sum()

Total_Acres_Burned           3
Archive_Year                 0
County_Name                  0
County_ID                    0
Fire_Started_Date            0
Fire_Extinguished_Date      59
Injuries                  1607
Latitude                     0
Longitude                    0
Major_Incident               0
Location                     0
Fire_Name                    0
Percent_Contained            3
Structures_Damaged        1667
Structures_Destroyed      1552
Structures_Threatened     1704
State                     1331
Cause_Description         1335
dtype: int64

In [21]:
california_fire["Structures_Damaged"] = california_fire["Structures_Damaged"].fillna(0)
california_fire["Structures_Destroyed"] = california_fire["Structures_Destroyed"].fillna(0)
california_fire["Structures_Threatened"] = california_fire["Structures_Threatened"].fillna(0)
california_fire["Injuries"] = california_fire["Injuries"].fillna(0)

In [22]:
california_fire.isnull().sum()

Total_Acres_Burned           3
Archive_Year                 0
County_Name                  0
County_ID                    0
Fire_Started_Date            0
Fire_Extinguished_Date      59
Injuries                     0
Latitude                     0
Longitude                    0
Major_Incident               0
Location                     0
Fire_Name                    0
Percent_Contained            3
Structures_Damaged           0
Structures_Destroyed         0
Structures_Threatened        0
State                     1331
Cause_Description         1335
dtype: int64

In [23]:
#california_fire = pd.concat(columns=['Structures_Damaged', 'Structures_Threatened', 'Structures_Destroyed'])
california_fire["Structure_Impacted"]= california_fire["Structures_Damaged"] + california_fire["Structures_Destroyed"]+ california_fire["Structures_Threatened"]

In [24]:
california_fire["Structure_Impacted"].isnull().sum()

0

In [25]:
california_fire.drop_duplicates(subset=['Fire_Name', 'Fire_Started_Date', 'Fire_Extinguished_Date','County_Name'], inplace =True)

In [26]:
california_fire.drop_duplicates('Fire_Name', inplace=True)

In [27]:
california_fire.duplicated().sum()

0

In [28]:
california_fire.drop(columns=['Structures_Damaged', 'Structures_Destroyed', 'Structures_Threatened'], axis=1, inplace=True)

In [29]:
california_fire.isnull().sum()

Total_Acres_Burned          2
Archive_Year                0
County_Name                 0
County_ID                   0
Fire_Started_Date           0
Fire_Extinguished_Date     43
Injuries                    0
Latitude                    0
Longitude                   0
Major_Incident              0
Location                    0
Fire_Name                   0
Percent_Contained           2
State                     953
Cause_Description         954
Structure_Impacted          0
dtype: int64

In [30]:
california_fire["Cause_Description"] = california_fire["Cause_Description"].fillna('Unidentified')
california_fire.isnull().sum()

Total_Acres_Burned          2
Archive_Year                0
County_Name                 0
County_ID                   0
Fire_Started_Date           0
Fire_Extinguished_Date     43
Injuries                    0
Latitude                    0
Longitude                   0
Major_Incident              0
Location                    0
Fire_Name                   0
Percent_Contained           2
State                     953
Cause_Description           0
Structure_Impacted          0
dtype: int64

In [31]:
california_fire["State"] = california_fire["State"].fillna('CA')
california_fire.isnull().sum()

Total_Acres_Burned         2
Archive_Year               0
County_Name                0
County_ID                  0
Fire_Started_Date          0
Fire_Extinguished_Date    43
Injuries                   0
Latitude                   0
Longitude                  0
Major_Incident             0
Location                   0
Fire_Name                  0
Percent_Contained          2
State                      0
Cause_Description          0
Structure_Impacted         0
dtype: int64

In [32]:
california_fire["Cause_Description"] = california_fire["Cause_Description"].fillna('Unidentified')
california_fire.isnull().sum()

Total_Acres_Burned         2
Archive_Year               0
County_Name                0
County_ID                  0
Fire_Started_Date          0
Fire_Extinguished_Date    43
Injuries                   0
Latitude                   0
Longitude                  0
Major_Incident             0
Location                   0
Fire_Name                  0
Percent_Contained          2
State                      0
Cause_Description          0
Structure_Impacted         0
dtype: int64

In [33]:
california_fire.dropna(subset=['Percent_Contained', 'Fire_Extinguished_Date'], inplace=True)
california_fire.isnull().sum()

Total_Acres_Burned        0
Archive_Year              0
County_Name               0
County_ID                 0
Fire_Started_Date         0
Fire_Extinguished_Date    0
Injuries                  0
Latitude                  0
Longitude                 0
Major_Incident            0
Location                  0
Fire_Name                 0
Percent_Contained         0
State                     0
Cause_Description         0
Structure_Impacted        0
dtype: int64

In [34]:
fire_facts = california_fire[["Fire_Name", "Archive_Year", "Fire_Started_Date", "Fire_Extinguished_Date", "Cause_Description"]]
fire_facts.columns = map(str.lower, fire_facts.columns)

In [35]:
fire_impacts = california_fire[["Fire_Name", "Total_Acres_Burned","County_ID", "Injuries", "Structure_Impacted"]]
fire_impacts.columns = map(str.lower, fire_impacts.columns)

In [36]:
fire_cause = fire_cause.rename(columns={'CAUSE':'cause_id'})
fire_cause.columns = map(str.lower, fire_cause.columns)

In [37]:
fire_incidents = california_fire[["Fire_Name", "Fire_Started_Date", "Fire_Extinguished_Date", "Archive_Year", "County_ID", "State", "Cause_Description", "Percent_Contained", "Total_Acres_Burned", "Major_Incident"]]
fire_incidents.columns = map(str.lower, fire_incidents.columns)
fire_incidents.columns

Index(['fire_name', 'fire_started_date', 'fire_extinguished_date',
       'archive_year', 'county_id', 'state', 'cause_description',
       'percent_contained', 'total_acres_burned', 'major_incident'],
      dtype='object')

In [38]:
# Extract weather data.

tmax = []
tmin = []
tavg = []
wspd = []


for index, row in california_fire[["Latitude", "Longitude", "Fire_Started_Date"]].iterrows():
    
    # Set time period.
    start = datetime.combine(row["Fire_Started_Date"], time.min)

    # Create Point for Counties.
    location = Point(row["Latitude"], row["Longitude"])
    
    
    # Get weather data for the dates.
    data = Daily(location, start, start).fetch()
    if len(data.tmax) > 0:
        #print("The mean maximum air temperature in °C:")
        #print(data.tmax[0])
        tmax.append(data.tmax[0]) 
    else:
        tmax.append(-273)
    
    if len(data.tmin) > 0:
        #print("The mean minimum air temperature in °C:")
        #print(data.tmin[0])
        tmin.append(data.tmin[0]) 
    else:
        tmin.append(-273)
        
    if len(data.tavg) > 0:
        #print("The mean air temperature in °C:")
        #print(data.tavg[0])
        tavg.append(data.tavg[0])
    else:
        tavg.append(-273)
        
    if len(data.wspd) > 0:
        #print("The mean wind speed in km/hr:")
        #print(data.wspd[0])
        wspd.append(data.wspd[0])    
    else:
        wspd.append(-1)



In [39]:
california_fire["Minimum_Temp"] = tmin
california_fire["Maximum_Temp"] = tmax
california_fire["Average_Temp"] = tavg
california_fire["Wind_Speed"] = wspd

In [40]:
california_fire.dropna(subset=['Minimum_Temp', 'Maximum_Temp', 'Average_Temp', 'Wind_Speed'], inplace=True)

In [41]:
fire_locations = california_fire[["Fire_Name", "County_ID", "County_Name", "Location", "Latitude", "Longitude", "Minimum_Temp", "Maximum_Temp", "Average_Temp","Wind_Speed"]]
fire_locations.columns = map(str.lower, fire_locations.columns)

In [42]:
california_fire.isnull().sum()

Total_Acres_Burned        0
Archive_Year              0
County_Name               0
County_ID                 0
Fire_Started_Date         0
Fire_Extinguished_Date    0
Injuries                  0
Latitude                  0
Longitude                 0
Major_Incident            0
Location                  0
Fire_Name                 0
Percent_Contained         0
State                     0
Cause_Description         0
Structure_Impacted        0
Minimum_Temp              0
Maximum_Temp              0
Average_Temp              0
Wind_Speed                0
dtype: int64

In [43]:
california_fire.isnull().sum()

Total_Acres_Burned        0
Archive_Year              0
County_Name               0
County_ID                 0
Fire_Started_Date         0
Fire_Extinguished_Date    0
Injuries                  0
Latitude                  0
Longitude                 0
Major_Incident            0
Location                  0
Fire_Name                 0
Percent_Contained         0
State                     0
Cause_Description         0
Structure_Impacted        0
Minimum_Temp              0
Maximum_Temp              0
Average_Temp              0
Wind_Speed                0
dtype: int64

In [44]:
california_fire

Unnamed: 0,Total_Acres_Burned,Archive_Year,County_Name,County_ID,Fire_Started_Date,Fire_Extinguished_Date,Injuries,Latitude,Longitude,Major_Incident,Location,Fire_Name,Percent_Contained,State,Cause_Description,Structure_Impacted,Minimum_Temp,Maximum_Temp,Average_Temp,Wind_Speed
0,257314.0,2013,Tuolumne,55,2013-08-17,2013-09-06,0.0,37.857000,-120.086000,False,3 miles east of Groveland along Hwy 120,Rim Fire,100.0,CA,Unidentified,0.0,17.0,36.0,25.8,6.0
1,30274.0,2013,Los Angeles,19,2013-05-30,2013-06-08,0.0,34.585595,-118.423176,False,Angeles National Forest,Powerhouse Fire,100.0,CA,Unidentified,0.0,16.7,32.2,24.0,36.9
2,27531.0,2013,Riverside,33,2013-07-15,2013-07-30,0.0,33.709500,-116.728850,False,Hwy 243 & Hwy 74 near Mountain Center,Mountain Fire,100.0,CA,Miscellaneous,0.0,28.3,43.3,36.3,14.0
3,27440.0,2013,Placer,31,2013-08-10,2013-08-30,0.0,39.120000,-120.650000,False,"Deadwood Ridge, northeast of Foresthill",American Fire,100.0,CA,Unidentified,0.0,-273.0,-273.0,-273.0,-1.0
4,24251.0,2013,Ventura,56,2013-05-02,2013-05-11,10.0,0.000000,0.000000,True,Southbound Highway 101 at Camarillo Springs Ro...,Springs Fire,100.0,CA,Unidentified,16.0,-273.0,-273.0,-273.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1714,11.0,2019,San Bernardino,36,2019-10-21,2019-10-22,0.0,34.033117,-117.215450,False,Beaumont and San Timorese Canyon,Beaumont Fire,100.0,CA,Unidentified,0.0,9.0,31.9,20.6,4.1
1715,11.0,2019,Tehama,52,2019-10-07,2019-10-07,0.0,40.314187,-122.070055,False,Highway 36E east of Dales Station and Manton Road,Dales Fire,100.0,CA,Unidentified,0.0,10.0,31.7,20.2,9.0
1720,10.0,2019,San Diego,37,2019-10-15,2019-10-16,0.0,32.762706,-117.097441,False,"Off Fairmont Avenue and Aldine Drive, Northwes...",Fairmont Fire,100.0,CA,Unidentified,0.0,13.9,24.4,18.9,7.9
1722,10.0,2019,Amador,3,2019-09-25,2019-09-25,1.0,38.332083,-120.671310,True,"Electra Road and Highway 49, east of Jackson",Electra Fire,100.0,CA,Unidentified,0.0,16.6,34.0,25.4,5.6


In [45]:
california_fire.isnull().sum()

Total_Acres_Burned        0
Archive_Year              0
County_Name               0
County_ID                 0
Fire_Started_Date         0
Fire_Extinguished_Date    0
Injuries                  0
Latitude                  0
Longitude                 0
Major_Incident            0
Location                  0
Fire_Name                 0
Percent_Contained         0
State                     0
Cause_Description         0
Structure_Impacted        0
Minimum_Temp              0
Maximum_Temp              0
Average_Temp              0
Wind_Speed                0
dtype: int64

In [46]:
# Connect Pandas to SQL.
from sqlalchemy import create_engine
from config import db_password
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/California_WildFire"
engine = create_engine(db_string)
fire_cause.to_sql(name='fire_cause', con=engine, if_exists='append', index=False)
fire_incidents.to_sql(name='fire_incidents', con=engine, if_exists='append', index=False)
fire_impacts.to_sql(name='fire_impacts', con=engine, if_exists='append', index=False)
fire_locations.to_sql(name='fire_locations', con=engine, if_exists='append', index=False)