In [1]:
import numpy as np
import pandas as pd
from datetime import date, timedelta, datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline

# Create table

In [None]:
fire = pd.read_csv("../data/Historical_Wildfires.csv")
weather = pd.read_csv("../data/HistoricalWeather.csv")
fire["Date"] = pd.to_datetime(fire["Date"])
weather["Date"] = pd.to_datetime(weather["Date"])

fire = fire[["Date", "Region", "Estimated_fire_area"]]

In [None]:
def format_weather(weather):
    weather = weather.rename(columns = {"count()[unit: km^2]" : "count", 
                                        "min()": "min", 
                                        "max()": "max",
                                        "mean()": "mean"})
    weather = weather.pivot_table(values = ["max", "mean"], 
                                  index = ["Date", "Region"],
                                  columns = ["Parameter"])
    weather.reset_index(inplace = True)
    weather.columns = [col[0] if not(col[1]) else '{1}_{0}'.format(*col) for col in weather.columns.values]
    return weather

weather = format_weather(weather)

In [None]:
regions = list(set(fire["Region"]))

for region in regions:
    fire_reg = fire[fire["Region"] == region].drop("Region", axis = 1)
    weather_reg = weather[weather["Region"] == region].drop("Region", axis = 1)
    reg = weather_reg.merge(fire_reg, on = "Date")
    reg.to_csv(f"../data/regions/{region}.csv", index = False)

# Predicting NSW

In [3]:
nsw = pd.read_csv("../data/regions/NSW.csv").dropna()
nsw["Month"] = [date.month for date in pd.to_datetime(nsw["Date"])]

nsw.head()

Unnamed: 0,Date,Precipitation_max,RelativeHumidity_max,SoilWaterContent_max,SolarRadiation_max,Temperature_max,WindSpeed_max,Precipitation_mean,RelativeHumidity_mean,SoilWaterContent_mean,SolarRadiation_mean,Temperature_mean,WindSpeed_mean,Estimated_fire_area,Month
0,2005-01-04,22.842566,90.332771,0.444927,33.214062,28.945488,7.364222,2.80862,57.095628,0.214293,22.617291,23.055527,4.841764,8.68,1
1,2005-01-05,7.657155,88.623436,0.442955,33.554867,29.51012,7.091141,0.157935,47.170735,0.203951,28.076835,22.425765,4.01408,16.61125,1
2,2005-01-06,27.616505,92.85096,0.431879,34.075787,26.982698,9.905821,0.434833,39.874638,0.193668,30.561703,20.621405,4.477879,5.52,1
3,2005-01-07,3.839235,79.103134,0.418513,34.019218,24.752069,10.446799,0.064224,39.27941,0.183778,32.143718,19.632722,4.017578,6.264,1
4,2005-01-08,2.866673,83.001541,0.41337,33.265091,26.472469,6.671862,0.203352,42.400824,0.175935,29.347715,21.477315,3.88455,5.4,1


In [76]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from scipy import stats

In [68]:
X, y = nsw.drop(["Estimated_fire_area", "Date"], axis = 1), nsw["Estimated_fire_area"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 101)

In [69]:
class FireTransform(TransformerMixin):
    def fit(self, df, y = None):
        self.months = list(set(df["Month"]))
        return self
    def transform(self, ori):
        df = ori.copy()
        for month in self.months:
            df[f"Month_{month}"] = (ori["Month"] == month).astype(int)
        df = df.drop("Month", axis = 1)
        return df

pipeline = Pipeline([
    ("transform", FireTransform()),
    ("model", xgb.XGBRegressor())
])

In [70]:
pipeline.fit(X_train, y_train)
preds = pipeline.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, preds))
print("RMSE: %f" % (rmse))
rsq = r2_score(y_test, preds)
print("RSQ: %f" % (rsq))

RMSE: 189.238899
RSQ: 0.395606


In [77]:
pipeline = Pipeline([
    ("transform", FireTransform()),
    ("model", SVR())
])

In [78]:
pipeline.fit(X_train, y_train)
preds = pipeline.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, preds))
print("RMSE: %f" % (rmse))
rsq = r2_score(y_test, preds)
print("RSQ: %f" % (rsq))

RMSE: 245.662690
RSQ: -0.018539
