In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import joblib

In [3]:
df= pd.read_csv("uttarakhand_forest_fire_dataset (2).csv")
df.head(10)

Unnamed: 0,temperature,humidity,smoke,temp_max,temp_min,wind_speed,wind_gust,fire_risk
0,26.0,44.0,28.2,27.8,18.6,14.4,14.8,1
1,18.0,31.0,89.4,23.5,11.6,7.1,,1
2,39.3,47.7,55.5,40.8,38.6,8.8,28.2,1
3,38.7,45.3,52.2,42.3,33.4,12.3,35.8,1
4,20.0,-2.2,163.7,21.3,18.5,7.4,14.2,1
5,12.0,52.0,17.0,2.9,17.5,15.9,18.1,0
6,15.0,60.0,26.6,15.7,8.8,10.7,19.0,0
7,18.0,41.0,0.6,23.2,13.6,2.8,9.2,0
8,10.0,63.0,112.9,13.9,3.6,8.3,18.4,1
9,18.0,38.0,7.5,18.7,13.2,23.5,28.1,1


In [4]:
df = df.dropna()

In [5]:
corr = df.corr()
print("\nðŸ“Š Feature Correlation with fire_risk:")
print(corr["fire_risk"].sort_values(ascending=False))


ðŸ“Š Feature Correlation with fire_risk:
fire_risk      1.000000
smoke          0.613433
wind_gust      0.279736
temperature    0.194852
wind_speed     0.192132
temp_min       0.190389
temp_max       0.174644
humidity      -0.182384
Name: fire_risk, dtype: float64


In [6]:
useful_features = [
    'temperature',
    'humidity',
    'smoke',
    'temp_max',
    'temp_min',
    'wind_speed',
    'wind_gust'
]

In [7]:
X = df[useful_features]
y = df['fire_risk']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=200, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=200, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "SVM": SVC(probability=True, random_state=42)
}

In [11]:
results = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    cv_acc = cross_val_score(model, X_train_scaled, y_train, cv=5).mean()
    results[name] = round(cv_acc, 4)
    print(f"{name}: {cv_acc:.4f}")

Logistic Regression: 0.8585
Random Forest: 0.9874
Gradient Boosting: 0.9945
Decision Tree: 0.9835
SVM: 0.9489


In [12]:
best_model_name = max(results, key=results.get)
best_model = models[best_model_name]

print("\nðŸ”¥ Best Model Selected:", best_model_name)
print("âœ… Cross-validated Accuracy:", results[best_model_name])


ðŸ”¥ Best Model Selected: Gradient Boosting
âœ… Cross-validated Accuracy: 0.9945


In [13]:
joblib.dump(best_model, "forest_fire_model.pkl")
joblib.dump(scaler, "scaler.pkl")
print("\nâœ… Model and Scaler Saved Successfully!")


âœ… Model and Scaler Saved Successfully!
