In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:

df = pd.read_csv("../data/processed/Fire/fnf_cleaned.csv")

X = df.drop(columns=["fire"]).values
y = df["fire"].values


In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)


In [None]:

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature          # index of feature for split
        self.threshold = threshold      # threshold for split
        self.left = left                # left child node
        self.right = right              # right child node
        self.value = value              # class label (leaf)
    
    def is_leaf_node(self):
        return self.value is not None


In [None]:
# Gini Impurity
def gini(y):
    classes = np.unique(y)
    impurity = 1.0
    for c in classes:
        p = np.sum(y == c) / len(y)
        impurity -= p**2
    return impurity


In [None]:
# Split Dataset
def split_dataset(X, y, feature, threshold):
    left_idx = np.where(X[:, feature] <= threshold)[0]
    right_idx = np.where(X[:, feature] > threshold)[0]
    return X[left_idx], y[left_idx], X[right_idx], y[right_idx]


In [None]:
# Best Split Search
def best_split(X, y):
    best_feature, best_threshold = None, None
    best_gain = -1
    n_samples, n_features = X.shape
    parent_impurity = gini(y)

    for feature in range(n_features):
        thresholds = np.unique(X[:, feature])
        
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature, threshold)

            if len(y_left) == 0 or len(y_right) == 0:
                continue

            # weighted gini
            left_gini = gini(y_left)
            right_gini = gini(y_right)
            child_impurity = (len(y_left)/n_samples)*left_gini + (len(y_right)/n_samples)*right_gini

            # information gain
            gain = parent_impurity - child_impurity

            if gain > best_gain:
                best_gain = gain
                best_feature = feature
                best_threshold = threshold
    
    return best_feature, best_threshold, best_gain


In [None]:
# Build the Tree (Recursive)
from collections import Counter


class DecisionTree:
    def __init__(self, max_depth=10, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.root = None
    
    def fit(self, X, y):
        self.root = self._grow_tree(X, y)
    
    def _grow_tree(self, X, y, depth=0):

        num_samples = len(y)
        num_labels = len(np.unique(y))

        # stopping conditions
        if (depth >= self.max_depth 
            or num_labels == 1 
            or num_samples < self.min_samples_split):
            leaf_value = self._leaf_value(y)
            return Node(value=leaf_value)

        feature, threshold, gain = best_split(X, y)

        # if split is not useful
        if gain < 1e-7:
            leaf_value = self._leaf_value(y)
            return Node(value=leaf_value)

        # split
        X_left, y_left, X_right, y_right = split_dataset(X, y, feature, threshold)

        left_child = self._grow_tree(X_left, y_left, depth+1)
        right_child = self._grow_tree(X_right, y_right, depth+1)

        return Node(feature, threshold, left_child, right_child)
    
    def _leaf_value(self, y):
        return Counter(y).most_common(1)[0][0]


In [None]:
# Prediction Logic
def _predict_single(x, node):
    if node.is_leaf_node():
        return node.value
    if x[node.feature] <= node.threshold:
        return _predict_single(x, node.left)
    return _predict_single(x, node.right)


class DecisionTree(DecisionTree):    
    def predict(self, X):
        return np.array([_predict_single(x, self.root) for x in X])


In [33]:
# Train the Decision Tree

tree = DecisionTree(max_depth=10, min_samples_split=4)
tree.fit(X_train, y_train)

preds = tree.predict(X_test)


In [None]:

print("Accuracy:", accuracy_score(y_test, preds))
print("\nClassification Report:")
print(classification_report(y_test, preds))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, preds))


Accuracy: 0.9618429385687144

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98     18032
           1       0.68      0.40      0.50       916

    accuracy                           0.96     18948
   macro avg       0.83      0.69      0.74     18948
weighted avg       0.96      0.96      0.96     18948


Confusion Matrix:
[[17863   169]
 [  554   362]]


In [None]:
# Different Depths
for depth in [3, 5, 7, 10, 15]:
    tree = DecisionTree(max_depth=depth)
    tree.fit(X_train, y_train)
    preds = tree.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(f"Depth={depth} â†’ Accuracy={acc:.4f}")
