In [138]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from datetime import date as datetime_date
import warnings
warnings.filterwarnings('ignore')

In [None]:
#importing historical wildfires dataset and adding missing dates (days with no fires)

In [139]:
wildFiresDF = pd.read_csv('Historical_Wildfires.csv')
wildFiresDF["Date"] = pd.to_datetime(wildFiresDF.Date)

# Insert missing date for each territory
t = np.arange(wildFiresDF.Date.min(), '2021-01-30', timedelta(days=1)).astype(datetime)
df_date=pd.DataFrame(t).rename(columns={0:'Date'})
wildFiresDF_complete = pd.DataFrame()
Region = wildFiresDF.Region.unique()

for i in Region:
    df = df_date.merge(wildFiresDF[wildFiresDF.Region==i],how='left',on='Date')
    df['Region'] = df['Region'].fillna(i) 
    wildFiresDF_complete = pd.concat([wildFiresDF_complete,df])
    

    


In [1]:
#checking time range in the dataset

In [140]:
wildFiresDF_complete.Date.max(), wildFiresDF_complete.Date.min()

(Timestamp('2021-01-29 00:00:00'), Timestamp('2005-01-01 00:00:00'))

In [141]:
# Compute year / month / day variables

wildFiresDF_complete['year']=wildFiresDF_complete['Date'].dt.year
wildFiresDF_complete['month']=wildFiresDF_complete['Date'].dt.month
wildFiresDF_complete['day']=wildFiresDF_complete['Date'].dt.day

# add count_nan for the visualization
wildFiresDF_complete['count_nan']=wildFiresDF_complete['Estimated_fire_area'].\
apply(lambda x: 1 if np.isnan(x) == True else 0)

# fill the nan values of the variables
wildFiresDF_complete['Estimated_fire_area']=wildFiresDF_complete['Estimated_fire_area'].fillna(0)
wildFiresDF_complete['Mean_estimated_fire_brightness']=wildFiresDF_complete['Mean_estimated_fire_brightness'].fillna(0)
wildFiresDF_complete['Mean_estimated_fire_radiative_power']=wildFiresDF_complete['Mean_estimated_fire_radiative_power'].fillna(0)
wildFiresDF_complete['Mean_confidence']=wildFiresDF_complete['Mean_confidence'].fillna(0)
wildFiresDF_complete['Count']=wildFiresDF_complete['Count'].fillna(0)
wildFiresDF_complete['Replaced']=wildFiresDF_complete['Replaced'].fillna('N')

In [2]:
#checking the shape of the dataset

In [143]:
wildFiresDF_complete.shape

(41111, 14)

In [3]:
#checking the dataset

In [144]:
wildFiresDF_complete.head()

Unnamed: 0,Date,Region,Estimated_fire_area,Mean_estimated_fire_brightness,Mean_estimated_fire_radiative_power,Mean_confidence,Std_confidence,Var_confidence,Count,Replaced,year,month,day,count_nan
0,2005-01-01,NSW,0.0,0.0,0.0,0.0,,,0.0,N,2005,1,1,1
1,2005-01-02,NSW,0.0,0.0,0.0,0.0,,,0.0,N,2005,1,2,1
2,2005-01-03,NSW,0.0,0.0,0.0,0.0,,,0.0,N,2005,1,3,1
3,2005-01-04,NSW,8.68,312.266667,42.4,78.666667,2.886751,8.333333,3.0,R,2005,1,4,0
4,2005-01-05,NSW,16.61125,322.475,62.3625,85.5,8.088793,65.428571,8.0,R,2005,1,5,0


In [4]:
#adding predictions dates to the dataset (30th January, 31st January, 1st February)

In [145]:
# Insert missing date for each territory
tdate_pred = np.arange(wildFiresDF_complete.Date.min(), '2021-02-02', timedelta(days=1)).astype(datetime)
df_date_pred=pd.DataFrame(tdate_pred).rename(columns={0:'Date'})
wildFiresDF_complete_pred = pd.DataFrame()
Region = wildFiresDF_complete.Region.unique()

for i in Region:
    df_pred = df_date_pred.merge(wildFiresDF_complete[wildFiresDF_complete.Region==i],how='left',on='Date')
    df_pred['Region'] = df_pred['Region'].fillna(i) 
    wildFiresDF_complete_pred = pd.concat([wildFiresDF_complete_pred,df_pred])

In [5]:
#checking the new shape (21 one rows have been added...7 rows for each day for three days)

In [146]:
wildFiresDF_complete_pred.shape

(41132, 14)

In [147]:
wildFiresDF_complete_pred.tail()

Unnamed: 0,Date,Region,Estimated_fire_area,Mean_estimated_fire_brightness,Mean_estimated_fire_radiative_power,Mean_confidence,Std_confidence,Var_confidence,Count,Replaced,year,month,day,count_nan
5871,2021-01-28,WA,13.824,330.595,61.65,90.0,8.11035,65.777778,10.0,N,2021.0,1.0,28.0,0.0
5872,2021-01-29,WA,0.0,0.0,0.0,0.0,,,0.0,N,2021.0,1.0,29.0,1.0
5873,2021-01-30,WA,,,,,,,,,,,,
5874,2021-01-31,WA,,,,,,,,,,,,
5875,2021-02-01,WA,,,,,,,,,,,,


In [6]:
#checking that 7 rows for each day (30,31 january, 1st february)

In [148]:
len(wildFiresDF_complete_pred.loc[wildFiresDF_complete_pred.Date == "2021-02-01"])

7

In [149]:
len(wildFiresDF_complete_pred.loc[wildFiresDF_complete_pred.Date == "2021-01-30"])

7

In [150]:
len(wildFiresDF_complete_pred.loc[wildFiresDF_complete_pred.Date == "2021-01-31"])

7

In [7]:
#adding columns for year, month, day

In [175]:
wildFiresDF_complete_pred['year']=wildFiresDF_complete_pred['Date'].dt.year
wildFiresDF_complete_pred['month']=wildFiresDF_complete_pred['Date'].dt.month
wildFiresDF_complete_pred['day']=wildFiresDF_complete_pred['Date'].dt.day

In [8]:
#creating a dataframe for each region (7 dataframes overall)

In [176]:
lista = ['NSW', 'NT', 'QL', 'SA', 'TA', 'VI', 'WA']

for i in range(7):
    index = i
    lista[index] = wildFiresDF_complete_pred.loc[wildFiresDF_complete_pred.Region == str(lista[index])]

In [177]:
NSW = lista[0]
NT = lista[1]
QL = lista[2]
SA = lista[3]
TA = lista[4]
VI = lista[5]
WA = lista[6]

In [178]:
wildFiresDF_complete_pred.shape

(41132, 14)

In [10]:
#checking the shape of each dataframe 
#adding lag and roll variables (for fire area, brightness, radiative power and count)
#saving the dataframe

In [179]:
NSW.shape

(5876, 14)

In [180]:
NSW["fire_area_lag3"] = NSW.Estimated_fire_area.shift(3)
NSW["fire_area_mean_5"] = NSW.Estimated_fire_area.shift(3).rolling(5).mean()
NSW["fire_area_mean_10"] = NSW.Estimated_fire_area.shift(3).rolling(10).mean()
NSW["fire_area_mean_15"] = NSW.Estimated_fire_area.shift(3).rolling(15).mean()
NSW["brightness_lag3"] = NSW.Mean_estimated_fire_brightness.shift(3)
NSW["brightness_mean5"] = NSW.Mean_estimated_fire_brightness.shift(3).rolling(5).mean()
NSW["brightness_mean10"] = NSW.Mean_estimated_fire_brightness.shift(3).rolling(10).mean()
NSW["brightness_mean15"] = NSW.Mean_estimated_fire_brightness.shift(3).rolling(15).mean()
NSW["radiative_power_lag3"] = NSW.Mean_estimated_fire_radiative_power.shift(3)
NSW["radiative_power_mean5"] = NSW.Mean_estimated_fire_radiative_power.shift(3).rolling(5).mean()
NSW["radiative_power_mean10"] = NSW.Mean_estimated_fire_radiative_power.shift(3).rolling(10).mean()
NSW["radiative_power_mean15"] = NSW.Mean_estimated_fire_radiative_power.shift(3).rolling(15).mean()
NSW["count_lag3"] = NSW.Count.shift(3)
NSW["count_mean5"] = NSW.Count.shift(3).rolling(5).mean()
NSW["count_mean10"] = NSW.Count.shift(3).rolling(10).mean()
NSW["count_mean15"] = NSW.Count.shift(3).rolling(15).mean()

In [181]:
NSW.to_csv("NSW.csv", index=False)

In [182]:
QL.shape

(5876, 14)

In [183]:
QL["fire_area_lag3"] = QL.Estimated_fire_area.shift(3)
QL["fire_area_mean_5"] = QL.Estimated_fire_area.shift(3).rolling(5).mean()
QL["fire_area_mean_10"] = QL.Estimated_fire_area.shift(3).rolling(10).mean()
QL["fire_area_mean_15"] = QL.Estimated_fire_area.shift(3).rolling(15).mean()
QL["brightness_lag3"] = QL.Mean_estimated_fire_brightness.shift(3)
QL["brightness_mean5"] = QL.Mean_estimated_fire_brightness.shift(3).rolling(5).mean()
QL["brightness_mean10"] = QL.Mean_estimated_fire_brightness.shift(3).rolling(10).mean()
QL["brightness_mean15"] = QL.Mean_estimated_fire_brightness.shift(3).rolling(15).mean()
QL["radiative_power_lag3"] = QL.Mean_estimated_fire_radiative_power.shift(3)
QL["radiative_power_mean5"] = QL.Mean_estimated_fire_radiative_power.shift(3).rolling(5).mean()
QL["radiative_power_mean10"] = QL.Mean_estimated_fire_radiative_power.shift(3).rolling(10).mean()
QL["radiative_power_mean15"] = QL.Mean_estimated_fire_radiative_power.shift(3).rolling(15).mean()
QL["count_lag3"] = QL.Count.shift(3)
QL["count_mean5"] = QL.Count.shift(3).rolling(5).mean()
QL["count_mean10"] = QL.Count.shift(3).rolling(10).mean()
QL["count_mean15"] = QL.Count.shift(3).rolling(15).mean()

In [184]:
QL.to_csv("QL.csv", index=False)

In [185]:
SA.shape

(5876, 14)

In [186]:
SA["fire_area_lag3"] = SA.Estimated_fire_area.shift(3)
SA["fire_area_mean_5"] = SA.Estimated_fire_area.shift(3).rolling(5).mean()
SA["fire_area_mean_10"] = SA.Estimated_fire_area.shift(3).rolling(10).mean()
SA["fire_area_mean_15"] = SA.Estimated_fire_area.shift(3).rolling(15).mean()
SA["brightness_lag3"] = SA.Mean_estimated_fire_brightness.shift(3)
SA["brightness_mean5"] = SA.Mean_estimated_fire_brightness.shift(3).rolling(5).mean()
SA["brightness_mean10"] = SA.Mean_estimated_fire_brightness.shift(3).rolling(10).mean()
SA["brightness_mean15"] = SA.Mean_estimated_fire_brightness.shift(3).rolling(15).mean()
SA["radiative_power_lag3"] = SA.Mean_estimated_fire_radiative_power.shift(3)
SA["radiative_power_mean5"] = SA.Mean_estimated_fire_radiative_power.shift(3).rolling(5).mean()
SA["radiative_power_mean10"] = SA.Mean_estimated_fire_radiative_power.shift(3).rolling(10).mean()
SA["radiative_power_mean15"] = SA.Mean_estimated_fire_radiative_power.shift(3).rolling(15).mean()
SA["count_lag3"] = SA.Count.shift(3)
SA["count_mean5"] = SA.Count.shift(3).rolling(5).mean()
SA["count_mean10"] = SA.Count.shift(3).rolling(10).mean()
SA["count_mean15"] = SA.Count.shift(3).rolling(15).mean()

In [187]:
SA.to_csv("SA.csv", index=False)

In [188]:
TA.shape

(5876, 14)

In [189]:
TA["fire_area_lag3"] = TA.Estimated_fire_area.shift(3)
TA["fire_area_mean_5"] = TA.Estimated_fire_area.shift(3).rolling(5).mean()
TA["fire_area_mean_10"] = TA.Estimated_fire_area.shift(3).rolling(10).mean()
TA["fire_area_mean_15"] = TA.Estimated_fire_area.shift(3).rolling(15).mean()
TA["brightness_lag3"] = TA.Mean_estimated_fire_brightness.shift(3)
TA["brightness_mean5"] = TA.Mean_estimated_fire_brightness.shift(3).rolling(5).mean()
TA["brightness_mean10"] = TA.Mean_estimated_fire_brightness.shift(3).rolling(10).mean()
TA["brightness_mean15"] = TA.Mean_estimated_fire_brightness.shift(3).rolling(15).mean()
TA["radiative_power_lag3"] = TA.Mean_estimated_fire_radiative_power.shift(3)
TA["radiative_power_mean5"] = TA.Mean_estimated_fire_radiative_power.shift(3).rolling(5).mean()
TA["radiative_power_mean10"] = TA.Mean_estimated_fire_radiative_power.shift(3).rolling(10).mean()
TA["radiative_power_mean15"] = TA.Mean_estimated_fire_radiative_power.shift(3).rolling(15).mean()
TA["count_lag3"] = TA.Count.shift(3)
TA["count_mean5"] = TA.Count.shift(3).rolling(5).mean()
TA["count_mean10"] = TA.Count.shift(3).rolling(10).mean()
TA["count_mean15"] = TA.Count.shift(3).rolling(15).mean()

In [190]:
TA.to_csv("TA.csv", index=False)

In [191]:
VI.shape

(5876, 14)

In [192]:
VI["fire_area_lag3"] = VI.Estimated_fire_area.shift(3)
VI["fire_area_mean_5"] = VI.Estimated_fire_area.shift(3).rolling(5).mean()
VI["fire_area_mean_10"] = VI.Estimated_fire_area.shift(3).rolling(10).mean()
VI["fire_area_mean_15"] = VI.Estimated_fire_area.shift(3).rolling(15).mean()
VI["brightness_lag3"] = VI.Mean_estimated_fire_brightness.shift(3)
VI["brightness_mean5"] = VI.Mean_estimated_fire_brightness.shift(3).rolling(5).mean()
VI["brightness_mean10"] = VI.Mean_estimated_fire_brightness.shift(3).rolling(10).mean()
VI["brightness_mean15"] = VI.Mean_estimated_fire_brightness.shift(3).rolling(15).mean()
VI["radiative_power_lag3"] = VI.Mean_estimated_fire_radiative_power.shift(3)
VI["radiative_power_mean5"] = VI.Mean_estimated_fire_radiative_power.shift(3).rolling(5).mean()
VI["radiative_power_mean10"] = VI.Mean_estimated_fire_radiative_power.shift(3).rolling(10).mean()
VI["radiative_power_mean15"] = VI.Mean_estimated_fire_radiative_power.shift(3).rolling(15).mean()
VI["count_lag3"] = VI.Count.shift(3)
VI["count_mean5"] = VI.Count.shift(3).rolling(5).mean()
VI["count_mean10"] = VI.Count.shift(3).rolling(10).mean()
VI["count_mean15"] = VI.Count.shift(3).rolling(15).mean()

In [193]:
VI.to_csv("VI.csv", index=False)

In [194]:
WA.shape

(5876, 14)

In [195]:
WA["fire_area_lag3"] = WA.Estimated_fire_area.shift(3)
WA["fire_area_mean_5"] = WA.Estimated_fire_area.shift(3).rolling(5).mean()
WA["fire_area_mean_10"] = WA.Estimated_fire_area.shift(3).rolling(10).mean()
WA["fire_area_mean_15"] = WA.Estimated_fire_area.shift(3).rolling(15).mean()
WA["brightness_lag3"] = WA.Mean_estimated_fire_brightness.shift(3)
WA["brightness_mean5"] = WA.Mean_estimated_fire_brightness.shift(3).rolling(5).mean()
WA["brightness_mean10"] = WA.Mean_estimated_fire_brightness.shift(3).rolling(10).mean()
WA["brightness_mean15"] = WA.Mean_estimated_fire_brightness.shift(3).rolling(15).mean()
WA["radiative_power_lag3"] = WA.Mean_estimated_fire_radiative_power.shift(3)
WA["radiative_power_mean5"] = WA.Mean_estimated_fire_radiative_power.shift(3).rolling(5).mean()
WA["radiative_power_mean10"] = WA.Mean_estimated_fire_radiative_power.shift(3).rolling(10).mean()
WA["radiative_power_mean15"] = WA.Mean_estimated_fire_radiative_power.shift(3).rolling(15).mean()
WA["count_lag3"] = WA.Count.shift(3)
WA["count_mean5"] = WA.Count.shift(3).rolling(5).mean()
WA["count_mean10"] = WA.Count.shift(3).rolling(10).mean()
WA["count_mean15"] = WA.Count.shift(3).rolling(15).mean()

In [196]:
WA.to_csv("WA.csv", index=False)

In [197]:
WA.columns

Index(['Date', 'Region', 'Estimated_fire_area',
       'Mean_estimated_fire_brightness', 'Mean_estimated_fire_radiative_power',
       'Mean_confidence', 'Std_confidence', 'Var_confidence', 'Count',
       'Replaced', 'year', 'month', 'day', 'count_nan', 'fire_area_lag3',
       'fire_area_mean_5', 'fire_area_mean_10', 'fire_area_mean_15',
       'brightness_lag3', 'brightness_mean5', 'brightness_mean10',
       'brightness_mean15', 'radiative_power_lag3', 'radiative_power_mean5',
       'radiative_power_mean10', 'radiative_power_mean15', 'count_lag3',
       'count_mean5', 'count_mean10', 'count_mean15'],
      dtype='object')

In [198]:
NT.shape

(5876, 14)

In [199]:
NT["fire_area_lag3"] = NT.Estimated_fire_area.shift(3)
NT["fire_area_mean_5"] = NT.Estimated_fire_area.shift(3).rolling(5).mean()
NT["fire_area_mean_10"] = NT.Estimated_fire_area.shift(3).rolling(10).mean()
NT["fire_area_mean_15"] = NT.Estimated_fire_area.shift(3).rolling(15).mean()
NT["brightness_lag3"] = NT.Mean_estimated_fire_brightness.shift(3)
NT["brightness_mean5"] = NT.Mean_estimated_fire_brightness.shift(3).rolling(5).mean()
NT["brightness_mean10"] = NT.Mean_estimated_fire_brightness.shift(3).rolling(10).mean()
NT["brightness_mean15"] = NT.Mean_estimated_fire_brightness.shift(3).rolling(15).mean()
NT["radiative_power_lag3"] = NT.Mean_estimated_fire_radiative_power.shift(3)
NT["radiative_power_mean5"] = NT.Mean_estimated_fire_radiative_power.shift(3).rolling(5).mean()
NT["radiative_power_mean10"] = NT.Mean_estimated_fire_radiative_power.shift(3).rolling(10).mean()
NT["radiative_power_mean15"] = NT.Mean_estimated_fire_radiative_power.shift(3).rolling(15).mean()
NT["count_lag3"] = NT.Count.shift(3)
NT["count_mean5"] = NT.Count.shift(3).rolling(5).mean()
NT["count_mean10"] = NT.Count.shift(3).rolling(10).mean()
NT["count_mean15"] = NT.Count.shift(3).rolling(15).mean()

In [200]:
NT.to_csv("NT.csv", index=False)

1. Each dataframe has been saved and will be imported in the main jupyter notebook to add reamaing features:
 - soilwater 
 - score feature
2. The same preprocessing applied when creating the model will be applied 
3. Predictions will be performed for the three days for each region