In [18]:
# ===============================================================
# 1. IMPORTS
# ===============================================================
# %%
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


In [19]:
# ===============================================================
# 2. LOAD CLEANED DATASET
# ===============================================================
# %%
df = pd.read_csv("../data/processed/Fire/fnf_cleaned.csv")

X = df.drop(columns=["fire"]).values
y = df["fire"].values


In [20]:
df.shape

(94738, 16)

In [21]:
"""from sklearn.model_selection import train_test_split

X = df.drop(columns=["fire"])     # features
y = df["fire"]                    # target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

from imblearn.over_sampling import SMOTE

sm = SMOTE(k_neighbors=5, random_state=42)

X_train, y_train = sm.fit_resample(X_train, y_train)
"""

'from sklearn.model_selection import train_test_split\n\nX = df.drop(columns=["fire"])     # features\ny = df["fire"]                    # target\n\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.2, random_state=42, stratify=y\n)\n\nfrom imblearn.over_sampling import SMOTE\n\nsm = SMOTE(k_neighbors=5, random_state=42)\n\nX_train, y_train = sm.fit_resample(X_train, y_train)\n'

In [22]:
# ===============================================================
# 3. TRAIN/TEST SPLIT
# ===============================================================
# %%
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [23]:
# ===============================================================
# 4. FEATURE SCALING
# Scaling is VERY important for KNN, optional for tree models
# ===============================================================
# %%
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [24]:
# %%
print("===========================================")
print("K-NEAREST NEIGHBORS (SKLEARN)")
print("===========================================")

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

knn_preds = knn.predict(X_test)

print("Accuracy:", accuracy_score(y_test, knn_preds))
print("\nClassification Report:")
print(classification_report(y_test, knn_preds))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, knn_preds))


K-NEAREST NEIGHBORS (SKLEARN)
Accuracy: 0.9641123073675322

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98     18032
           1       0.64      0.60      0.62       916

    accuracy                           0.96     18948
   macro avg       0.81      0.79      0.80     18948
weighted avg       0.96      0.96      0.96     18948


Confusion Matrix:
[[17721   311]
 [  369   547]]


In [25]:
# %%
print("===========================================")
print("DECISION TREE (SKLEARN)")
print("===========================================")

dt = DecisionTreeClassifier(
    max_depth=10,
    min_samples_split=4,
    random_state=42
)

dt.fit(X_train, y_train)
dt_preds = dt.predict(X_test)

print("Accuracy:", accuracy_score(y_test, dt_preds))
print("\nClassification Report:")
print(classification_report(y_test, dt_preds))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, dt_preds))


DECISION TREE (SKLEARN)
Accuracy: 0.9617901625501373

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98     18032
           1       0.69      0.39      0.50       916

    accuracy                           0.96     18948
   macro avg       0.83      0.69      0.74     18948
weighted avg       0.96      0.96      0.96     18948


Confusion Matrix:
[[17869   163]
 [  561   355]]


In [26]:
# %%
print("===========================================")
print("RANDOM FOREST (SKLEARN)")
print("===========================================")

rf = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=4,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, rf_preds))
print("\nClassification Report:")
print(classification_report(y_test, rf_preds))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, rf_preds))


RANDOM FOREST (SKLEARN)
Accuracy: 0.9605763141228626

Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     18032
           1       0.79      0.25      0.38       916

    accuracy                           0.96     18948
   macro avg       0.88      0.62      0.68     18948
weighted avg       0.95      0.96      0.95     18948


Confusion Matrix:
[[17972    60]
 [  687   229]]
