In [1]:

import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:

df = pd.read_csv("../data/processed/Fire/fnf_cleaned.csv")

X = df.drop(columns=["fire"])
y = df["fire"]


In [3]:
df.shape

(94738, 16)

In [4]:
# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X.values, y.values, test_size=0.2, random_state=42, stratify=y
)


In [5]:
# Normalize Features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)


In [6]:
# KNN From Scratch


def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))


In [7]:

class KNN:
    def __init__(self, k=5):
        self.k = k
    
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    def predict(self, X):
        predictions = [self._predict_single(x) for x in X]
        return np.array(predictions)

    def _predict_single(self, x):
        # compute distances to all training samples
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        # get k nearest neighbors
        k_indices = np.argsort(distances)[:self.k]
        k_neighbor_labels = self.y_train[k_indices]

        # majority vote
        most_common = Counter(k_neighbor_labels).most_common(1)
        return most_common[0][0]


In [8]:
# training
knn = KNN(k=5)
knn.fit(X_train, y_train)
preds = knn.predict(X_test)


KeyboardInterrupt: 

In [None]:
# %%
print("Accuracy:", accuracy_score(y_test, preds))
print("\nClassification Report:")
print(classification_report(y_test, preds))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, preds))


In [None]:
# test Different Values of K

accuracies = {}

for k in [1,3,5,7,9,11]:
    knn = KNN(k=k)
    knn.fit(X_train, y_train)
    preds = knn.predict(X_test)
    acc = accuracy_score(y_test, preds)
    accuracies[k] = acc
    print(f"K = {k} --> Accuracy = {acc:.4f}")

accuracies


In [None]:

import matplotlib.pyplot as plt

plt.plot(list(accuracies.keys()), list(accuracies.values()), marker="o")
plt.xlabel("K")
plt.ylabel("Accuracy")
plt.title("KNN From Scratch â€” K vs Accuracy")
plt.grid()
plt.show()
