In [1]:
# Load data analysis libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Load machine learning libraries
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [3]:
# Specify columns from original csv to load into DataFrame
all_cols = ['Polygon_ID',
 'acq_date',
 'frp',
 'acq_time',
 'ELEV_sum',
 'ELEV_mean',
 'SLP_sum',
 'SLP_mean',
 'EVT_sum',
 'EVT_mean',
 'EVH_sum',
 'EVH_mean',
 'EVC_sum',
 'EVC_mean',
 'CBD_sum',
 'CBD_mean',
 'CBH_sum',
 'CBH_mean',
 'CC_sum',
 'CC_mean',
 'CH_sum',
 'CH_mean',
 'c_latitude',
 'c_longitude',
 'TEMP_min',
 'TEMP_max',
 'PRCP',
 'SNOW',
 'WDIR_ave',
 'WSPD_ave',
 'PRES_ave',
 'WCOMP']

# Specify columns to train models with
cols = ['ELEV_mean',
 'SLP_mean',
 'EVT_mean',
 'EVH_mean',
 'TEMP_min',
 'TEMP_max',
 'PRCP',
 'WSPD_ave',
 'PRES_ave']

In [None]:
# Load data with specified columns, downloaded from https://zenodo.org/doi/10.5281/zenodo.5636428
df = pd.read_csv("features_array.csv", sep = "\t", index_col=False, usecols=cols)

In [None]:
# Show first 5 rows
df.head()

In [None]:
# Select all rows where fire was from 2016
fires = df[df['acq_date'].str.contains(pat = '2016')]

In [None]:
# Some cells in FRP column have two values because the satellite may collect two values at a given time for the same location
# Select just the first value in the cell
fires['frp'] = fires['frp'].str.split(',').str[0]

# Drop all NaN values
fires = fires.dropna()

In [None]:
# Show statistics about each parameter
fires.describe()

In [None]:
# Plot frequencies of Fire Radiative Power
plt.hist(fires['frp'], bins=100)
plt.title("Fire Radiative Power, Not Scaled")
plt.show()

In [None]:
# Because FRP are skewed, create new column with log transform
# Method from https://aosmith.rbind.io/2018/09/19/the-log-0-problem/
fires['frp_scaled'] = np.log10(fires['frp'] + 1)

In [None]:
# Plot frequencies of Fire Radiative Power with log transform
plt.hist(fires['frp_scaled'], bins=100)
plt.title("Fire Radiative Power, Scaled")
plt.show()

## Predict with K-Nearest Neighbors Algorithm

In [5]:
X = fires[cols]          # Independent variables
Y = fires['frp_scaled']  # Dependent variable

# Separate 80% data into training set and 20% into test set
# random_state=0 ensures that the results are reproducible
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.20, random_state=0)

In [6]:
# Applying k-nearest neighbors model
KNN_model = KNeighborsRegressor(n_neighbors=12).fit(X_train,Y_train)
KNN_predict = KNN_model.predict(X_test) # Predictions on testing data

In [7]:
# Print the scaled mean squared error
error = mean_squared_error(Y_test, KNN_predict)
print("Mean Squared Error (Scaled):", error)

# Calculate the absolute errors
errors = abs(KNN_predict - Y_test)

# Print the mean absolute error (MAE)
print('Mean Absolute Error (Scaled):', np.mean(errors))

Mean Squared Error (Scaled): 0.05622150334352182
Mean Absolute Error (Scaled): 0.16534501308119112


## Predict with Decision Tree Algorithm

- Code references tutorial https://www.askpython.com/python/examples/python-predict-function

In [8]:
# Separate data without scaled FRP values

X = fires[cols]  # Independent variables
Y = fires['frp']  # Dependent variable

# Separate 80% data into training set and 20% into test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.20, random_state=0)

In [9]:
DT_model = DecisionTreeRegressor(max_depth=5).fit(X_train,Y_train)
DT_predict = DT_model.predict(X_test) # Predictions on testing data

In [15]:
# Print root mean square error (RMSE)
error = mean_squared_error(Y_test, DT_predict)
print("Root Mean Squared Error:", error**0.5)

# Calculate the absolute errors
errors = abs(DT_predict - Y_test)

# Print out the mean absolute error (MAE)
print('Mean Absolute Error:', np.mean(errors))

Root Mean Squared Error: 25.457777153918286
Mean Absolute Error: 9.413833581960157


## Predict with Random Forest

- Code references article https://medium.com/@theclickreader/random-forest-regression-explained-with-implementation-in-python-3dad88caf165
- Based on parameters used by Singla et al., the team behind WildfireDB https://openreview.net/pdf?id=6nblryHxVbO

In [12]:
RF_model = RandomForestRegressor(n_estimators = 50, max_depth = 30, max_samples=5000).fit(X_train, Y_train)
RF_predict = RF_model.predict(X_test) # Predictions on testing data

In [14]:
# Print root mean square error (RMSE)
error = mean_squared_error(Y_test, RF_predict)
print("Root Mean Squared Error:", error**0.5)

# Calculate the absolute errors
errors = abs(RF_predict - Y_test)

# Print out the mean absolute error (MAE)
print('Mean Absolute Error:', np.mean(errors))

Root Mean Squared Error: 24.235273829661573
Mean Absolute Error: 9.322914812024603
