# Compare TF Models Trained on Different Feature Sets

Trains 12 different models with the same architecture on variations of the same data. These variations include:
- Variations on how NaN values were treated in the dataset:
  - dropped entirely,
  - imputed using mean for some and median for highly skewed features (and mode for non-numeric features), or
  - imputed using mean for all features (and mode for non-numeric features).
- Variations of which features were included to train the model:
  - baseline data (location, time, immediate weather data),
  - baseline data plus long-term weather data,
  - baseline data plus fire history data, or
  - baseline data plus long-term weather and fire history data.

Each model is trained with a different pairing of data imputation and feature set.

The train-test split is the same for each variation of features within one data imputation approach.

Compares the performances of the models.

## Setup
Imports, read in data

In [None]:
import numpy as np  # linear algebra
import pandas as pd  # data processing
import matplotlib.pyplot as plt  # plotting

import math
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import warnings

import tensorflow as tf  # model

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
DATASET_FILE_DROPPED_NANS = "/content/drive/My Drive/ML6140  - Project/Data/enhanced_data_df1.csv"
DATASET_FILE_IMPUTED_MEAN_ONLY = "/content/drive/My Drive/ML6140  - Project/Data/enhanced_data_df3.csv"
DATASET_FILE_IMPUTED_MEAN_MEDIAN = "/content/drive/My Drive/ML6140  - Project/Data/enhanced_data_df2.csv"

In [None]:
drop_cols = ["incident_name", "GHCN_station_id", "LCD_station_id",
                      "mode_functionSkyConditions", "10_mode_functionSkyConditions",
                      "30_mode_functionSkyConditions",
                      "60_mode_functionSkyConditions", "incident_geohash",
                      "LCD_station_elevation", "LCD_station_distance",
                      "GHCN_station_elevation", "GHCN_station_distance",
             'dist_mode_functionSkyConditions',]

In [None]:
data_dropped_nans = pd.read_csv(DATASET_FILE_DROPPED_NANS).drop(drop_cols, axis=1)
data_imputed_mean_only = pd.read_csv(DATASET_FILE_IMPUTED_MEAN_ONLY).drop(drop_cols, axis=1)
data_imputed_mean_median = pd.read_csv(DATASET_FILE_IMPUTED_MEAN_MEDIAN).drop(drop_cols, axis=1)

In [None]:
baseline_features = [
  "incident_created_year",
  "incident_created_month",
  "incident_created_day",
  "incident_created_hour",
  "incident_created_minute",
  "incident_latitude",
  "incident_longitude",
  "sumPrecipitation",
  "dist_sumPrecipitation",
  "maxDryBulbTemperature",
  "dist_maxDryBulbTemperature",
  "minDryBulbTemperature",
  "dist_minDryBulbTemperature",
  "meanDryBulbTemperature",
  "dist_meanDryBulbTemperature",
  "meanDewPointTemperature",
  "dist_meanDewPointTemperature",
  "meanWetBulbTemperature",
  "dist_meanWetBulbTemperature",
  "meanWindSpeed",
  "dist_meanWindSpeed",
  "meanRelativeHumidity",
  "dist_meanRelativeHumidity",
  "minRelativeHumidity",
  "dist_minRelativeHumidity",
  "maxRelativeHumidity",
  "dist_maxRelativeHumidity",
  "maxWindSpeed",
  "dist_maxWindSpeed",
  "calculate_circular_meanWindDirection",
  "dist_calculate_circular_meanWindDirection",
  "mode_functionWindDirection",
  "dist_mode_functionWindDirection"
]

In [None]:
weather_history_features = [
  "10_sumPrecipitation",
  "10_maxDryBulbTemperature",
  "10_minDryBulbTemperature",
  "10_meanDryBulbTemperature",
  "10_meanDewPointTemperature",
  "10_meanWetBulbTemperature",
  "10_meanWindSpeed",
  "10_meanRelativeHumidity",
  "10_minRelativeHumidity",
  "10_maxRelativeHumidity",
  "10_maxWindSpeed",
  "10_calculate_circular_meanWindDirection",
  "10_mode_functionWindDirection",
  "30_sumPrecipitation",
  "30_maxDryBulbTemperature",
  "30_minDryBulbTemperature",
  "30_meanDryBulbTemperature",
  "30_meanDewPointTemperature",
  "30_meanWetBulbTemperature",
  "30_meanWindSpeed",
  "30_meanRelativeHumidity",
  "30_minRelativeHumidity",
  "30_maxRelativeHumidity",
  "30_maxWindSpeed",
  "30_calculate_circular_meanWindDirection",
  "30_mode_functionWindDirection",
  "60_sumPrecipitation",
  "60_maxDryBulbTemperature",
  "60_minDryBulbTemperature",
  "60_meanDryBulbTemperature",
  "60_meanDewPointTemperature",
  "60_meanWetBulbTemperature",
  "60_meanWindSpeed",
  "60_meanRelativeHumidity",
  "60_minRelativeHumidity",
  "60_maxRelativeHumidity",
  "60_maxWindSpeed",
  "60_calculate_circular_meanWindDirection",
  "60_mode_functionWindDirection"
]

In [None]:
fire_history_features = [
  "far_hist_avg_acres_burned",
  "near_hist_avg_acres_burned"
]

In [None]:
def get_experiment_data(dataframe):
  # So the split is the same across each experimental feature set.
  train, validate = train_test_split(dataframe)
  train_x = train.drop(["class_label"], axis=1)
  train_y = train["class_label"]
  validate_x = validate.drop(["class_label"], axis=1)
  validate_y = validate["class_label"]

  data_dict = {}
  data_dict["train_y"] = train_y.values
  data_dict["validate_y"] = validate_y.values
  data_dict["train_x"] = {}
  data_dict["validate_x"] = {}

  data_dict["train_x"]["baseline"] = train_x[baseline_features].values
  data_dict["validate_x"]["baseline"] = validate_x[baseline_features].values

  data_dict["train_x"]["weather_hist"] = train_x[baseline_features
                                                 + weather_history_features].values
  data_dict["validate_x"]["weather_hist"] = validate_x[baseline_features
                                                       + weather_history_features].values

  data_dict["train_x"]["fire_hist"] = train_x[baseline_features
                                              + fire_history_features].values
  data_dict["validate_x"]["fire_hist"] = validate_x[baseline_features
                                                    + fire_history_features].values

  data_dict["train_x"]["all"] = train_x.values
  data_dict["validate_x"]["all"] = validate_x.values

  return data_dict

In [None]:
class Metric:
  def __init__(self, y_array, pred_y_array):
    self.y_array = y_array
    self.pred_y_array = pred_y_array

    df = pd.concat([pd.DataFrame(y_array, columns=["y"]),
                  pd.DataFrame(pred_y_array, columns=["pred_y"])],
                 axis=1)

    self.tp = len(df[(df.y == 1) & (df.pred_y == 1)])
    self.tn = len(df[(df.y == 0) & (df.pred_y == 0)])
    self.fp = len(df[(df.y == 0) & (df.pred_y == 1)])
    self.fn = len(df[(df.y == 1) & (df.pred_y == 0)])
    self.total = len(df)

  def true_positive_rate(self):
    try:
      return self.tp / self.total
    except ZeroDivisionError:
      return np.nan


  def true_negative_rate(self):
    try:
      return self.tn / self.total
    except ZeroDivisionError:
      return np.nan


  def false_positive_rate(self):
    try:
      return self.fp / self.total
    except ZeroDivisionError:
      return np.nan


  def false_negative_rate(self):
    try:
      return self.fn / self.total
    except ZeroDivisionError:
      return np.nan


  def confusion_matrix(self, rates=False):
    if rates:
      return self._confusion_matrix(self.true_positive_rate(),
                                    self.true_negative_rate(),
                                    self.false_positive_rate(),
                                    self.false_negative_rate())
    return self._confusion_matrix(self.tp, self.tn, self.fp, self.fn)


  def _confusion_matrix(self, tp, tn, fp, fn):
    mat = pd.DataFrame([[tp, fn], [fp, tn]],
                       columns=["Predicted Positive", "Predicted Negative"],
                       index=["Positive", "Negative"])
    return mat


  def precision(self):
    try:
      den = self.tp + self.fp
      return self.tp / den
    except ZeroDivisionError:
      return np.nan


  def recall(self):
    try:
      den = self.tp + self.fn
      return self.tp / den
    except ZeroDivisionError:
      return np.nan


  def accuracy(self):
    try:
      trues = self.tp + self.tn
      return trues / self.total
    except ZeroDivisionError:
      return np.nan


  def f1_score(self):
    try:
      den = 2 * self.tp + self.fp + self.fn
      return 2 * self.tp / den
    except ZeroDivisionError:
      return np.nan

## Model

### DF 1: All NaNs dropped

In [None]:
df1_data = get_experiment_data(data_dropped_nans)

#### Baseline

In [None]:
df1_baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df1_baseline_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df1_baseline_history = df1_baseline_model.fit(df1_data["train_x"]["baseline"], df1_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df1_baseline_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               17920     
                                                                 
 dense_1 (Dense)             (None, 512)               262656    
                                                                 
 dense_2 (Dense)             (None, 256)               131328    
                                                                 
 dense_3 (Dense)             (None, 256)               65792     
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 64)                8256      
                                                                 
 dense_6 (Dense)             (None, 32)                2

In [None]:
df1_baseline_pred_y = df1_baseline_model.predict(df1_data["validate_x"]["baseline"])
df1_baseline_pred_y = np.where(df1_baseline_pred_y > .5, 1, 0)



In [None]:
df1_baseline_model_metric = Metric(df1_data["validate_y"], df1_baseline_pred_y)
print(f"Precision: {df1_baseline_model_metric.precision()}")
print(f"Recall: {df1_baseline_model_metric.recall()}")
print(f"Accuracy: {df1_baseline_model_metric.accuracy()}")
print(f"F1 Score: {df1_baseline_model_metric.f1_score()}")
display(df1_baseline_model_metric.confusion_matrix())

Precision: 0.6299435028248588
Recall: 0.5631313131313131
Accuracy: 0.7512274959083469
F1 Score: 0.5946666666666667


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,223,173
Negative,131,695


#### With Weather History

In [None]:
df1_weather_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df1_weather_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df1_weather_history = df1_weather_model.fit(df1_data["train_x"]["weather_hist"], df1_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df1_weather_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 512)               37888     
                                                                 
 dense_9 (Dense)             (None, 512)               262656    
                                                                 
 dense_10 (Dense)            (None, 256)               131328    
                                                                 
 dense_11 (Dense)            (None, 256)               65792     
                                                                 
 dense_12 (Dense)            (None, 128)               32896     
                                                                 
 dense_13 (Dense)            (None, 64)                8256      
                                                                 
 dense_14 (Dense)            (None, 32)               

In [None]:
df1_weather_pred_y = df1_weather_model.predict(df1_data["validate_x"]["weather_hist"])
df1_weather_pred_y = np.where(df1_weather_pred_y > .5, 1, 0)



In [None]:
df1_weather_model_metric = Metric(df1_data["validate_y"], df1_weather_pred_y)
print(f"Precision: {df1_weather_model_metric.precision()}")
print(f"Recall: {df1_weather_model_metric.recall()}")
print(f"Accuracy: {df1_weather_model_metric.accuracy()}")
print(f"F1 Score: {df1_weather_model_metric.f1_score()}")
display(df1_weather_model_metric.confusion_matrix())

Precision: 0.6418269230769231
Recall: 0.6742424242424242
Accuracy: 0.7725040916530278
F1 Score: 0.6576354679802956


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,267,129
Negative,149,677


#### With Fire History

In [None]:
df1_fire_hist_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df1_fire_hist_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df1_fire_hist_history = df1_fire_hist_model.fit(df1_data["train_x"]["fire_hist"], df1_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df1_fire_hist_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 512)               18944     
                                                                 
 dense_17 (Dense)            (None, 512)               262656    
                                                                 
 dense_18 (Dense)            (None, 256)               131328    
                                                                 
 dense_19 (Dense)            (None, 256)               65792     
                                                                 
 dense_20 (Dense)            (None, 128)               32896     
                                                                 
 dense_21 (Dense)            (None, 64)                8256      
                                                                 
 dense_22 (Dense)            (None, 32)               

In [None]:
df1_fire_hist_pred_y = df1_fire_hist_model.predict(df1_data["validate_x"]["fire_hist"])
df1_fire_hist_pred_y = np.where(df1_fire_hist_pred_y > .5, 1, 0)



In [None]:
df1_fire_hist_model_metric = Metric(df1_data["validate_y"], df1_fire_hist_pred_y)
print(f"Precision: {df1_fire_hist_model_metric.precision()}")
print(f"Recall: {df1_fire_hist_model_metric.recall()}")
print(f"Accuracy: {df1_fire_hist_model_metric.accuracy()}")
print(f"F1 Score: {df1_fire_hist_model_metric.f1_score()}")
display(df1_fire_hist_model_metric.confusion_matrix())

Precision: 0.7623318385650224
Recall: 0.8585858585858586
Accuracy: 0.867430441898527
F1 Score: 0.8076009501187649


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,340,56
Negative,106,720


#### With All Features

In [None]:
df1_all_features_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df1_all_features_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df1_all_features_history = df1_all_features_model.fit(df1_data["train_x"]["all"], df1_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df1_all_features_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 512)               40960     
                                                                 
 dense_25 (Dense)            (None, 512)               262656    
                                                                 
 dense_26 (Dense)            (None, 256)               131328    
                                                                 
 dense_27 (Dense)            (None, 256)               65792     
                                                                 
 dense_28 (Dense)            (None, 128)               32896     
                                                                 
 dense_29 (Dense)            (None, 64)                8256      
                                                                 
 dense_30 (Dense)            (None, 32)               

In [None]:
df1_all_features_pred_y = df1_all_features_model.predict(df1_data["validate_x"]["all"])
df1_all_features_pred_y = np.where(df1_all_features_pred_y > .5, 1, 0)



In [None]:
df1_all_features_model_metric = Metric(df1_data["validate_y"], df1_all_features_pred_y)
print(f"Precision: {df1_all_features_model_metric.precision()}")
print(f"Recall: {df1_all_features_model_metric.recall()}")
print(f"Accuracy: {df1_all_features_model_metric.accuracy()}")
print(f"F1 Score: {df1_all_features_model_metric.f1_score()}")
display(df1_all_features_model_metric.confusion_matrix())

Precision: 0.7655860349127181
Recall: 0.7752525252525253
Accuracy: 0.8502454991816694
F1 Score: 0.7703889585947302


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,307,89
Negative,94,732


### DF 2: Impute with mean and median

In [None]:
df2_data = get_experiment_data(data_imputed_mean_median)

#### Baseline

In [None]:
df2_baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df2_baseline_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df2_baseline_history = df2_baseline_model.fit(df2_data["train_x"]["baseline"], df2_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df2_baseline_model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_32 (Dense)            (None, 512)               17920     
                                                                 
 dense_33 (Dense)            (None, 512)               262656    
                                                                 
 dense_34 (Dense)            (None, 256)               131328    
                                                                 
 dense_35 (Dense)            (None, 256)               65792     
                                                                 
 dense_36 (Dense)            (None, 128)               32896     
                                                                 
 dense_37 (Dense)            (None, 64)                8256      
                                                                 
 dense_38 (Dense)            (None, 32)               

In [None]:
df2_baseline_pred_y = df2_baseline_model.predict(df2_data["validate_x"]["baseline"])
df2_baseline_pred_y = np.where(df2_baseline_pred_y > .5, 1, 0)



In [None]:
df2_baseline_model_metric = Metric(df2_data["validate_y"], df2_baseline_pred_y)
print(f"Precision: {df2_baseline_model_metric.precision()}")
print(f"Recall: {df2_baseline_model_metric.recall()}")
print(f"Accuracy: {df2_baseline_model_metric.accuracy()}")
print(f"F1 Score: {df2_baseline_model_metric.f1_score()}")
display(df2_baseline_model_metric.confusion_matrix())

Precision: 0.6300813008130082
Recall: 0.6638115631691649
Accuracy: 0.7670103092783506
F1 Score: 0.6465067778936392


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,310,157
Negative,182,806


#### With Weather History

In [None]:
df2_weather_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df2_weather_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df2_weather_history = df2_weather_model.fit(df2_data["train_x"]["weather_hist"], df2_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df2_weather_model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_40 (Dense)            (None, 512)               37888     
                                                                 
 dense_41 (Dense)            (None, 512)               262656    
                                                                 
 dense_42 (Dense)            (None, 256)               131328    
                                                                 
 dense_43 (Dense)            (None, 256)               65792     
                                                                 
 dense_44 (Dense)            (None, 128)               32896     
                                                                 
 dense_45 (Dense)            (None, 64)                8256      
                                                                 
 dense_46 (Dense)            (None, 32)               

In [None]:
df2_weather_pred_y = df2_weather_model.predict(df2_data["validate_x"]["weather_hist"])
df2_weather_pred_y = np.where(df2_weather_pred_y > .5, 1, 0)



In [None]:
df2_weather_model_metric = Metric(df2_data["validate_y"], df2_weather_pred_y)
print(f"Precision: {df2_weather_model_metric.precision()}")
print(f"Recall: {df2_weather_model_metric.recall()}")
print(f"Accuracy: {df2_weather_model_metric.accuracy()}")
print(f"F1 Score: {df2_weather_model_metric.f1_score()}")
display(df2_weather_model_metric.confusion_matrix())

Precision: 0.6377118644067796
Recall: 0.6445396145610278
Accuracy: 0.768384879725086
F1 Score: 0.6411075612353567


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,301,166
Negative,171,817


#### With Fire History

In [None]:
df2_fire_hist_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df2_fire_hist_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df2_fire_hist_history = df2_fire_hist_model.fit(df2_data["train_x"]["fire_hist"], df2_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df2_fire_hist_model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_48 (Dense)            (None, 512)               18944     
                                                                 
 dense_49 (Dense)            (None, 512)               262656    
                                                                 
 dense_50 (Dense)            (None, 256)               131328    
                                                                 
 dense_51 (Dense)            (None, 256)               65792     
                                                                 
 dense_52 (Dense)            (None, 128)               32896     
                                                                 
 dense_53 (Dense)            (None, 64)                8256      
                                                                 
 dense_54 (Dense)            (None, 32)               

In [None]:
df2_fire_hist_pred_y = df2_fire_hist_model.predict(df2_data["validate_x"]["fire_hist"])
df2_fire_hist_pred_y = np.where(df2_fire_hist_pred_y > .5, 1, 0)



In [None]:
df2_fire_hist_model_metric = Metric(df2_data["validate_y"], df2_fire_hist_pred_y)
print(f"Precision: {df2_fire_hist_model_metric.precision()}")
print(f"Recall: {df2_fire_hist_model_metric.recall()}")
print(f"Accuracy: {df2_fire_hist_model_metric.accuracy()}")
print(f"F1 Score: {df2_fire_hist_model_metric.f1_score()}")
display(df2_fire_hist_model_metric.confusion_matrix())

Precision: 0.76
Recall: 0.8137044967880086
Accuracy: 0.8577319587628865
F1 Score: 0.7859358841778697


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,380,87
Negative,120,868


#### With All Features

In [None]:
df2_all_features_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df2_all_features_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df2_all_features_history = df2_all_features_model.fit(df2_data["train_x"]["all"], df2_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df2_all_features_model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_56 (Dense)            (None, 512)               40960     
                                                                 
 dense_57 (Dense)            (None, 512)               262656    
                                                                 
 dense_58 (Dense)            (None, 256)               131328    
                                                                 
 dense_59 (Dense)            (None, 256)               65792     
                                                                 
 dense_60 (Dense)            (None, 128)               32896     
                                                                 
 dense_61 (Dense)            (None, 64)                8256      
                                                                 
 dense_62 (Dense)            (None, 32)               

In [None]:
df2_all_features_pred_y = df2_all_features_model.predict(df2_data["validate_x"]["all"])
df2_all_features_pred_y = np.where(df2_all_features_pred_y > .5, 1, 0)



In [None]:
df2_all_features_model_metric = Metric(df2_data["validate_y"], df2_all_features_pred_y)
print(f"Precision: {df2_all_features_model_metric.precision()}")
print(f"Recall: {df2_all_features_model_metric.recall()}")
print(f"Accuracy: {df2_all_features_model_metric.accuracy()}")
print(f"F1 Score: {df2_all_features_model_metric.f1_score()}")
display(df2_all_features_model_metric.confusion_matrix())

Precision: 0.7721774193548387
Recall: 0.8201284796573876
Accuracy: 0.8646048109965636
F1 Score: 0.7954309449636553


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,383,84
Negative,113,875


### DF 3: Impute with mean only

In [None]:
df3_data = get_experiment_data(data_imputed_mean_only)

#### Baseline

In [None]:
df3_baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df3_baseline_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df3_baseline_history = df3_baseline_model.fit(df3_data["train_x"]["baseline"], df3_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df3_baseline_model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_64 (Dense)            (None, 512)               17920     
                                                                 
 dense_65 (Dense)            (None, 512)               262656    
                                                                 
 dense_66 (Dense)            (None, 256)               131328    
                                                                 
 dense_67 (Dense)            (None, 256)               65792     
                                                                 
 dense_68 (Dense)            (None, 128)               32896     
                                                                 
 dense_69 (Dense)            (None, 64)                8256      
                                                                 
 dense_70 (Dense)            (None, 32)               

In [None]:
df3_baseline_pred_y = df3_baseline_model.predict(df3_data["validate_x"]["baseline"])
df3_baseline_pred_y = np.where(df3_baseline_pred_y > .5, 1, 0)



In [None]:
df3_baseline_model_metric = Metric(df3_data["validate_y"], df3_baseline_pred_y)
print(f"Precision: {df3_baseline_model_metric.precision()}")
print(f"Recall: {df3_baseline_model_metric.recall()}")
print(f"Accuracy: {df3_baseline_model_metric.accuracy()}")
print(f"F1 Score: {df3_baseline_model_metric.f1_score()}")
display(df3_baseline_model_metric.confusion_matrix())

Precision: 0.642578125
Recall: 0.676954732510288
Accuracy: 0.7663230240549829
F1 Score: 0.6593186372745491


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,329,157
Negative,183,786


#### With Weather History

In [None]:
df3_weather_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df3_weather_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df3_weather_history = df3_weather_model.fit(df3_data["train_x"]["weather_hist"], df3_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df3_weather_model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_72 (Dense)            (None, 512)               37888     
                                                                 
 dense_73 (Dense)            (None, 512)               262656    
                                                                 
 dense_74 (Dense)            (None, 256)               131328    
                                                                 
 dense_75 (Dense)            (None, 256)               65792     
                                                                 
 dense_76 (Dense)            (None, 128)               32896     
                                                                 
 dense_77 (Dense)            (None, 64)                8256      
                                                                 
 dense_78 (Dense)            (None, 32)               

In [None]:
df3_weather_pred_y = df3_weather_model.predict(df3_data["validate_x"]["weather_hist"])
df3_weather_pred_y = np.where(df3_weather_pred_y > .5, 1, 0)



In [None]:
df3_weather_model_metric = Metric(df3_data["validate_y"], df3_weather_pred_y)
print(f"Precision: {df3_weather_model_metric.precision()}")
print(f"Recall: {df3_weather_model_metric.recall()}")
print(f"Accuracy: {df3_weather_model_metric.accuracy()}")
print(f"F1 Score: {df3_weather_model_metric.f1_score()}")
display(df3_weather_model_metric.confusion_matrix())

Precision: 0.6320939334637965
Recall: 0.6646090534979424
Accuracy: 0.7587628865979381
F1 Score: 0.6479438314944834


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,323,163
Negative,188,781


#### With Fire History

In [None]:
df3_fire_hist_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df3_fire_hist_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df3_fire_hist_history = df3_fire_hist_model.fit(df3_data["train_x"]["fire_hist"], df3_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df3_fire_hist_model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_80 (Dense)            (None, 512)               18944     
                                                                 
 dense_81 (Dense)            (None, 512)               262656    
                                                                 
 dense_82 (Dense)            (None, 256)               131328    
                                                                 
 dense_83 (Dense)            (None, 256)               65792     
                                                                 
 dense_84 (Dense)            (None, 128)               32896     
                                                                 
 dense_85 (Dense)            (None, 64)                8256      
                                                                 
 dense_86 (Dense)            (None, 32)              

In [None]:
df3_fire_hist_pred_y = df3_fire_hist_model.predict(df3_data["validate_x"]["fire_hist"])
df3_fire_hist_pred_y = np.where(df3_fire_hist_pred_y > .5, 1, 0)



In [None]:
df3_fire_hist_model_metric = Metric(df3_data["validate_y"], df3_fire_hist_pred_y)
print(f"Precision: {df3_fire_hist_model_metric.precision()}")
print(f"Recall: {df3_fire_hist_model_metric.recall()}")
print(f"Accuracy: {df3_fire_hist_model_metric.accuracy()}")
print(f"F1 Score: {df3_fire_hist_model_metric.f1_score()}")
display(df3_fire_hist_model_metric.confusion_matrix())

Precision: 0.7852882703777336
Recall: 0.8127572016460906
Accuracy: 0.8632302405498282
F1 Score: 0.7987866531850354


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,395,91
Negative,108,861


#### With All Features

In [None]:
df3_all_features_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(512, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(256, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, use_bias=True),
    tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, use_bias=True)
])

In [None]:
df3_all_features_model.compile(loss=tf.keras.metrics.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
df3_all_features_history = df3_all_features_model.fit(df3_data["train_x"]["all"], df3_data["train_y"], epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
df3_all_features_model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_88 (Dense)            (None, 512)               40960     
                                                                 
 dense_89 (Dense)            (None, 512)               262656    
                                                                 
 dense_90 (Dense)            (None, 256)               131328    
                                                                 
 dense_91 (Dense)            (None, 256)               65792     
                                                                 
 dense_92 (Dense)            (None, 128)               32896     
                                                                 
 dense_93 (Dense)            (None, 64)                8256      
                                                                 
 dense_94 (Dense)            (None, 32)              

In [None]:
df3_all_features_pred_y = df3_all_features_model.predict(df3_data["validate_x"]["all"])
df3_all_features_pred_y = np.where(df3_all_features_pred_y > .5, 1, 0)



In [None]:
df3_all_features_model_metric = Metric(df3_data["validate_y"], df3_all_features_pred_y)
print(f"Precision: {df3_all_features_model_metric.precision()}")
print(f"Recall: {df3_all_features_model_metric.recall()}")
print(f"Accuracy: {df3_all_features_model_metric.accuracy()}")
print(f"F1 Score: {df3_all_features_model_metric.f1_score()}")
display(df3_all_features_model_metric.confusion_matrix())

Precision: 0.7728194726166329
Recall: 0.7839506172839507
Accuracy: 0.8508591065292096
F1 Score: 0.7783452502553626


Unnamed: 0,Predicted Positive,Predicted Negative
Positive,381,105
Negative,112,857


### Compare

In [None]:
compare_precision = pd.DataFrame([
        [df1_baseline_model_metric.precision(),
        df1_weather_model_metric.precision(),
        df1_fire_hist_model_metric.precision(),
        df1_all_features_model_metric.precision()],
        [df2_baseline_model_metric.precision(),
        df2_weather_model_metric.precision(),
        df2_fire_hist_model_metric.precision(),
        df2_all_features_model_metric.precision()],
        [df3_baseline_model_metric.precision(),
        df3_weather_model_metric.precision(),
        df3_fire_hist_model_metric.precision(),
        df3_all_features_model_metric.precision()]],
    columns=["baseline", "weather_hist", "fire_hist", "all"],
    index=["df1", "df2", "df3"])
compare_precision

Unnamed: 0,baseline,weather_hist,fire_hist,all
df1,0.629944,0.641827,0.762332,0.765586
df2,0.630081,0.637712,0.76,0.772177
df3,0.642578,0.632094,0.785288,0.772819


In [None]:
compare_recall = pd.DataFrame([
        [df1_baseline_model_metric.recall(),
        df1_weather_model_metric.recall(),
        df1_fire_hist_model_metric.recall(),
        df1_all_features_model_metric.recall()],
        [df2_baseline_model_metric.recall(),
        df2_weather_model_metric.recall(),
        df2_fire_hist_model_metric.recall(),
        df2_all_features_model_metric.recall()],
        [df3_baseline_model_metric.recall(),
        df3_weather_model_metric.recall(),
        df3_fire_hist_model_metric.recall(),
        df3_all_features_model_metric.recall()]],
    columns=["baseline", "weather_hist", "fire_hist", "all"],
    index=["df1", "df2", "df3"])
compare_recall

Unnamed: 0,baseline,weather_hist,fire_hist,all
df1,0.563131,0.674242,0.858586,0.775253
df2,0.663812,0.64454,0.813704,0.820128
df3,0.676955,0.664609,0.812757,0.783951


In [None]:
compare_accuracy = pd.DataFrame([
        [df1_baseline_model_metric.accuracy(),
        df1_weather_model_metric.accuracy(),
        df1_fire_hist_model_metric.accuracy(),
        df1_all_features_model_metric.accuracy()],
        [df2_baseline_model_metric.accuracy(),
        df2_weather_model_metric.accuracy(),
        df2_fire_hist_model_metric.accuracy(),
        df2_all_features_model_metric.accuracy()],
        [df3_baseline_model_metric.accuracy(),
        df3_weather_model_metric.accuracy(),
        df3_fire_hist_model_metric.accuracy(),
        df3_all_features_model_metric.accuracy()]],
    columns=["baseline", "weather_hist", "fire_hist", "all"],
    index=["df1", "df2", "df3"])
compare_accuracy

Unnamed: 0,baseline,weather_hist,fire_hist,all
df1,0.751227,0.772504,0.86743,0.850245
df2,0.76701,0.768385,0.857732,0.864605
df3,0.766323,0.758763,0.86323,0.850859


In [None]:
compare_f1_score = pd.DataFrame([
        [df1_baseline_model_metric.f1_score(),
        df1_weather_model_metric.f1_score(),
        df1_fire_hist_model_metric.f1_score(),
        df1_all_features_model_metric.f1_score()],
        [df2_baseline_model_metric.f1_score(),
        df2_weather_model_metric.f1_score(),
        df2_fire_hist_model_metric.f1_score(),
        df2_all_features_model_metric.f1_score()],
        [df3_baseline_model_metric.f1_score(),
        df3_weather_model_metric.f1_score(),
        df3_fire_hist_model_metric.f1_score(),
        df3_all_features_model_metric.f1_score()]],
    columns=["baseline", "weather_hist", "fire_hist", "all"],
    index=["df1", "df2", "df3"])
compare_f1_score

Unnamed: 0,baseline,weather_hist,fire_hist,all
df1,0.594667,0.657635,0.807601,0.770389
df2,0.646507,0.641108,0.785936,0.795431
df3,0.659319,0.647944,0.798787,0.778345


### Save datasets to use again

In [None]:
from datetime import datetime

'2023-12-10 09:34:55.423826'

In [None]:
OUTFILE_NAME_TEMPLATE = "/content/drive/My Drive/ML6140  - Project/Model/train_test_split_data/{date}_{identifier}.csv"

In [None]:
date = f"{datetime.now().date()}"

In [None]:
def data_dict_to_csv(data_dict, name):
  pd.DataFrame(data_dict["train_x"]["all"]).to_csv(OUTFILE_NAME_TEMPLATE.format(
      date=date,
      identifier=f"{name}_train_x"),
    header=True, index=False)
  pd.DataFrame(data_dict["train_y"]).to_csv(OUTFILE_NAME_TEMPLATE.format(
      date=date,
      identifier=f"{name}_train_y"),
    header=True, index=False)
  pd.DataFrame(data_dict["validate_x"]["all"]).to_csv(OUTFILE_NAME_TEMPLATE.format(
      date=date,
      identifier=f"{name}_validate_x"),
    header=True, index=False)
  pd.DataFrame(data_dict["validate_y"]).to_csv(OUTFILE_NAME_TEMPLATE.format(
      date=date,
      identifier=f"{name}_validate_y"),
    header=True, index=False)

In [None]:
data_dict_to_csv(df1_data, "df1")
data_dict_to_csv(df2_data, "df2")
data_dict_to_csv(df3_data, "df3")