# Merging wildfires, land cover and meteorology data together
This is a final step of data preprocessing before data to be fed into machine learning models.

In [1]:
# Libraries imports
import pandas as pd
import numpy as np

In [12]:
# Data imports
wildfires = pd.read_csv('../../data/cleaned/wildfires_all.csv')
landcover = pd.read_csv('../../data/cleaned/landcover.csv')
historical_precip = pd.read_csv('../../data/processed/meteorology_historical_rain_snow_sum_6mo.csv')
weather = pd.read_csv('../../data/processed/wildfire_weather.csv')

In [13]:
wildfires.shape

(21517, 21)

In [14]:
landcover.shape

(21519, 2)

In [15]:
historical_precip.shape

(2128, 4)

In [16]:
weather.shape

(21518, 37)

Master Dataset for Filtered Data (Fires > 1 Acre and lasting longer than 24 hours)

In [25]:
# Merging wildfires with historical precipitation
data = historical_precip.merge(wildfires, on='UniqueFireIdentifier')

In [26]:
# Merging data with first day of the fire meteorological conditions
data1 = data.merge(weather, left_on = 'id', right_on = 'id')

In [27]:
# Merging data with vegetation types
data2 = data1.merge(landcover, left_on = 'id', right_on = 'fire_id')

In [38]:
# Filtering out columns not participating in fire acreage prediction
cols = ['UniqueFireIdentifier', 'X', 'Y', 'ContainmentDateTime', 'ControlDateTime', 'DailyAcres', 'FireCause', 'FireDiscoveryDateTime',
                      'IncidentTypeCategory', 'IncidentTypeKind', 'InitialLatitude', 'InitialLongitude','IrwinID','LocalIncidentIdentifier','POOCounty',
                      'POODispatchCenterID','POOFips','POOState','fire_id_x','x','y','firecause','firediscoverydatetime','incidenttypecategory','containmentdatetime',
                      'controldatetime','dailyacres','discoveryacres','incidenttypekind','initiallatitude','initiallongitude','rain_sum', 'fire_id_y',
                     'irwinid','localincidentidentifier','poocounty','poodispatchcenterid','poofips','poostate','uniquefireidentifier','wind_speed_2m_mean',	
                      'wind_speed_2m_max_mean','wind_speed_10m_mean','wind_speed_10m_max_mean','humidity_mean','temp_2m_mean']

master_dataset = data2.drop(columns = cols)

In [11]:
# Setting id as index
master_dataset.set_index('id', inplace = True)

In [12]:
# Export of the final, filtered dataset to be used in modeling
master_dataset.to_csv('../../data/processed/master_dataset.csv', index = False)

Repeating the same sequence for All Fires

In [28]:
data_1 = wildfires.merge(weather, left_on = 'fire_id', right_on = 'id')

In [35]:
data_2 = data_1.merge(landcover, left_on = 'id', right_on = 'fire_id')

In [36]:
data_2.shape

(20276, 60)

In [39]:
# Filtering out columns not participating in fire acreage prediction
master_dataset_allfires = data_2.drop(columns = cols)

In [40]:
# Setting id as index
master_dataset_allfires.set_index('id', inplace = True)

In [42]:
# Export of the final dataset for all fires to be used in modeling
master_dataset_allfires.to_csv('../../data/processed/master_dataset_allfires.csv', index=False)