# Merging wildfires, land cover and meteorology data together
This is a final step of data preprocessing before data to be fed into machine learning models.

In [1]:
# Libraries imports
import pandas as pd
import numpy as np

In [2]:
# Data imports
wildfires = pd.read_csv('../../data/cleaned/wildfires_all.csv')
landcover = pd.read_csv('../../data/cleaned/landcover.csv')
historical_precip = pd.read_csv('../../data/processed/meteorology_historical_rain_snow_sum_6mo.csv')
weather = pd.read_csv('../../data/processed/wildfire_weather.csv')

In [3]:
# Merging wildfires with historical precipitation
data = historical_precip.merge(wildfires, on='UniqueFireIdentifier')

In [4]:
# Merging data with first day of the fire meteorological conditions
data1 = data.merge(weather, left_on = 'id', right_on = 'id')

In [5]:
# Merging data with vegetation types
data2 = data1.merge(landcover, left_on = 'id', right_on = 'fire_id')

In [7]:
# Filtering out columns not participating in fire acreage prediction
master_dataset = data2.drop(columns = ['UniqueFireIdentifier', 'X', 'Y', 'ContainmentDateTime', 'ControlDateTime', 'DailyAcres', 'FireCause', 'FireDiscoveryDateTime',
                      'IncidentTypeCategory', 'IncidentTypeKind', 'InitialLatitude', 'InitialLongitude','IrwinID','LocalIncidentIdentifier','POOCounty',
                      'POODispatchCenterID','POOFips','POOState','fire_id_x','x','y','firecause','firediscoverydatetime','incidenttypecategory','containmentdatetime',
                      'controldatetime','dailyacres','discoveryacres','incidenttypekind','initiallatitude','initiallongitude','rain_sum', 'fire_id_y',
                     'irwinid','localincidentidentifier','poocounty','poodispatchcenterid','poofips','poostate','uniquefireidentifier','wind_speed_2m_mean',	
                      'wind_speed_2m_max_mean','wind_speed_10m_mean','wind_speed_10m_max_mean','humidity_mean','temp_2m_mean'])

In [11]:
# Setting id as index
master_dataset.set_index('id', inplace = True)

In [12]:
# Export of the final dataset to be used in modeling
master_dataset.to_csv('../../data/processed/master_dataset.csv', index = False)