In [1]:
# Initial imports
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
# Loading data
file_path = Path("data/WF_Incident.xlsx")
df = pd.read_excel(file_path)
df.head()

Unnamed: 0,OBJECTID,SourceOID,CreatedBySystem,IncidentSize,DiscoveryAcres,EstimatedCostToDate,FireCause,FireDiscoveryDateTime,GACC,InitialLatitude,InitialLongitude,ModifiedBySystem,POOCounty,EstimatedFinalCost,CreatedOnDateTime_dt,ModifiedOnDateTime_dt,IncidentComplexityLevel,x,y
0,427774,33042061,wildcade,26.0,38.0,,0,2024-12-09 15:33:07,NWCC,42.944486,-122.859618,wildcade,Douglas,,2024-12-09 15:33:31.977,2025-01-22 16:49:11.757,,-122.859618,42.944486
1,428074,33042361,wildcade,2225.0,,,0,2024-12-11 15:06:00,SACC,33.761389,-95.951944,wildcade,Fannin,,2024-12-11 15:07:21.483,2025-01-25 23:22:47.320,,-95.951944,33.761389
2,428555,33042842,wildcade,8346.0,0.1,11840618.08,1,2024-12-15 01:31:00,SWCC,34.384474,-111.455141,ics209,Gila,20000000.0,2024-12-15 18:59:55.810,2025-01-27 22:33:09.730,Type 4 Incident,-111.059,34.371
3,429179,33043466,wildcade,153.0,153.0,,0,2024-12-20 15:05:00,SACC,30.644185,-98.096282,wildcade,Burnet,,2024-12-20 15:07:03.760,2025-01-18 14:35:32.413,,-98.096282,30.644185
4,429719,33044007,wildcade,0.5,0.5,,1,2024-12-22 22:43:00,SWCC,34.963102,-106.277343,INFORM_Inspector,Bernalillo,,2024-12-26 14:15:09.293,2025-01-28 21:29:23.300,,-106.277343,34.963102


In [3]:
# Define features set
X = df.copy()
X = df.drop(["CreatedBySystem", "FireDiscoveryDateTime", "ModifiedBySystem", "DiscoveryAcres",
             "CreatedOnDateTime_dt", "ModifiedOnDateTime_dt", "IncidentComplexityLevel", "EstimatedCostToDate", "EstimatedFinalCost",  
             "POOCounty", "GACC", "x", "y"], axis=1, errors='ignore')
X

Unnamed: 0,OBJECTID,SourceOID,IncidentSize,FireCause,InitialLatitude,InitialLongitude
0,427774,33042061,26.0,0,42.944486,-122.859618
1,428074,33042361,2225.0,0,33.761389,-95.951944
2,428555,33042842,8346.0,1,34.384474,-111.455141
3,429179,33043466,153.0,0,30.644185,-98.096282
4,429719,33044007,0.5,1,34.963102,-106.277343
...,...,...,...,...,...,...
253,437854,33052159,26.0,0,36.713820,-83.961680
254,437844,33052146,,0,34.129930,-117.933600
255,437864,33052166,30.0,1,36.042132,-87.871429
256,437846,33052150,,0,34.719390,-117.951850


In [4]:
# Define target vector
y = df["FireCause"].values
y

array([0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])

In [5]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [6]:
# Creating StandardScaler instance
scaler = StandardScaler()

In [7]:
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)

In [8]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier(random_state=42)

In [10]:
# Fitting the model
model = model.fit(X_train_scaled, y_train)

In [11]:
# Making predictions using the testing data
predictions = model.predict(X_test_scaled)

In [12]:
# Create the confusion matrix DataFrame
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0 (Natural/Undetermined)", "Actual 1 (Human)"], columns=["Predicted 0 (Natural/Undetermined)", "Predicted 1 (Human)"]
)

print(cm_df)

                                 Predicted 0 (Natural/Undetermined)  \
Actual 0 (Natural/Undetermined)                                  59   
Actual 1 (Human)                                                  0   

                                 Predicted 1 (Human)  
Actual 0 (Natural/Undetermined)                    0  
Actual 1 (Human)                                   6  


In [13]:
acc_score = accuracy_score(y_test, predictions)

In [14]:
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0 (Natural/Undetermined),Predicted 1 (Human)
Actual 0 (Natural/Undetermined),59,0
Actual 1 (Human),0,6


Accuracy Score : 1.0
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        59
           1       1.00      1.00      1.00         6

    accuracy                           1.00        65
   macro avg       1.00      1.00      1.00        65
weighted avg       1.00      1.00      1.00        65

