In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

In [2]:
data= pd.read_csv('forest_fire_realistic_balanced_v2.csv')
data.head(10)

Unnamed: 0,temperature,humidity,smoke,temp_max,temp_min,pressure,clouds_all,wind_speed,wind_deg,wind_gust,temp_local,fire_risk
0,56.181018,18.513293,523.411367,59.544533,53.321038,978.727104,0.648257,0.775989,259.296453,22.83945,22.418449,0
1,142.607146,54.190095,493.957598,146.590553,138.579984,994.687132,0.172386,3.735451,247.421882,13.134012,19.974726,1
2,109.799091,87.294584,1812.509161,111.051431,105.998287,1070.909479,0.872395,16.624916,34.471512,18.122757,10.569235,1
3,89.798773,73.222489,499.0924,92.923143,89.029273,968.000877,0.613116,15.335367,332.126066,10.901207,36.436,0
4,23.402796,80.656115,543.899452,26.261526,22.656549,1073.929937,0.157204,7.012854,204.649993,15.750864,28.59745,0
5,23.399178,65.878337,1518.796525,27.56333,22.058306,917.626886,0.962338,7.536213,130.941188,8.514398,51.94206,0
6,8.712542,69.227656,899.479685,13.242977,6.907168,1055.359687,0.518365,10.671089,272.35389,7.528975,1.926575,0
7,129.926422,84.919565,1553.421114,129.987206,127.884144,1069.509527,0.072898,0.00482,92.651565,14.455745,38.632076,1
8,90.167252,24.966801,130.732315,93.537351,86.768766,936.363531,0.626833,4.824887,249.663541,12.453242,45.776933,0
9,106.210887,48.942496,975.142387,106.470066,105.927485,986.069306,0.253199,4.164636,14.296035,14.488409,45.569194,1


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

In [4]:
X = data.drop('fire_risk', axis=1)
y = data['fire_risk']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [6]:
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "DecisionTree": DecisionTreeClassifier(random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=300, random_state=42),
    "GradientBoosting": GradientBoostingClassifier(random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "SVM": SVC(probability=True, random_state=42),
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB()
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"\n=== {name} ===")
    print("Accuracy:", round(acc, 4))
    print(classification_report(y_test, y_pred))



=== LogisticRegression ===
Accuracy: 0.96
              precision    recall  f1-score   support

           0       0.96      0.96      0.96       100
           1       0.96      0.96      0.96       100

    accuracy                           0.96       200
   macro avg       0.96      0.96      0.96       200
weighted avg       0.96      0.96      0.96       200


=== DecisionTree ===
Accuracy: 0.89
              precision    recall  f1-score   support

           0       0.89      0.89      0.89       100
           1       0.89      0.89      0.89       100

    accuracy                           0.89       200
   macro avg       0.89      0.89      0.89       200
weighted avg       0.89      0.89      0.89       200


=== RandomForest ===
Accuracy: 0.94
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       100
           1       0.93      0.95      0.94       100

    accuracy                           0.94       200
   macro av

In [7]:
print("\nðŸ“ˆ Model Accuracy Comparison:")
for name, acc in sorted(results.items(), key=lambda x: x[1], reverse=True):
    print(f"{name}: {acc:.4f}")



ðŸ“ˆ Model Accuracy Comparison:
LogisticRegression: 0.9600
AdaBoost: 0.9550
RandomForest: 0.9400
GradientBoosting: 0.9300
SVM: 0.9300
KNN: 0.8950
DecisionTree: 0.8900
Naive Bayes: 0.8550


In [8]:
data.value_counts('fire_risk')

fire_risk
0    500
1    500
Name: count, dtype: int64

In [9]:
from sklearn.linear_model import LogisticRegression
import joblib

# Re-train only Logistic Regression
best_model = LogisticRegression(max_iter=1000)
best_model.fit(X_train, y_train)

# Save both model and scaler
joblib.dump(best_model, "forest_fire_model.pkl")
joblib.dump(scaler, "scaler.pkl")
print("âœ… Model and scaler saved successfully.")


âœ… Model and scaler saved successfully.
