In [None]:
# Import Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

In [None]:
# Load the forest fire dataset
forest_data = pd.read_csv("D:\forest fire project\forestfires.csv")

In [None]:
# Preview the dataset
print(forest_data.head())

In [None]:
# Get the number of rows and columns in the dataset
print("\nDataset shape:", forest_data.shape)

In [None]:
# Display statistical measures of the data
print("Statistical measures of the data:")
print(forest_data.describe())

In [None]:
# Create a new column 'status': Fire occurred or No Fire
forest_data['status'] = forest_data['area'].apply(lambda x: 'Fire' if x > 0 else 'No Fire')

# Display the count of outcomes
print("Status counts:")
print(forest_data['status'].value_counts())


In [None]:
# Correct separation of features and target
X = forest_data.drop(columns=['area','status'])  # only numeric input features
y = forest_data['status']                        # target labels

In [None]:
# Encode month/day as numeric codes
forest_data['month'] = forest_data['month'].astype('category').cat.codes
forest_data['day'] = forest_data['day'].astype('category').cat.codes

In [None]:
# Features (X) → only numeric columns
X = forest_data.drop(columns=['area','status'])  # remove target and text columns

# Target (y)
y = (forest_data['area'] > 0).astype(int)  # 1 = Fire, 0 = No Fire

In [None]:
# Display features and labels
print("\nInput features (first 5 rows):")
print(X.head())

print("\nLabels (first 5 rows):")
print(y.head())

In [None]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# 6️⃣ Scale Features

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train Random Forest Classifier

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

In [None]:
# Evaluate the model

# Training accuracy
y_train_pred = model.predict(X_train_scaled)
print("\nTraining Accuracy:", accuracy_score(y_train, y_train_pred))

# Testing accuracy
y_test_pred = model.predict(X_test_scaled)
print("\nTesting Accuracy:", accuracy_score(y_test, y_test_pred))
print("\nClassification Report:\n", classification_report(y_test, y_test_pred))

In [None]:
# Save the model and scaler
with open('forest_fire_model.pkl', 'wb') as file:
    pickle.dump(model, file)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

print("\nModel and scaler saved successfully!")


In [None]:
# Load model and make a prediction

# Load model
with open('forest_fire_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

# Load scaler
with open('scaler.pkl', 'rb') as file:
    loaded_scaler = pickle.load(file)

# Sample input for prediction
# Replace values with actual weather data: month, day, X, Y, FFMC, DMC, DC, ISI, temp, RH, wind, rain
sample_input = np.array([[7, 5, 7, 5, 85.0, 30.0, 400.0, 5.0, 30.0, 40.0, 10.0, 0.0]])

# Scale input
sample_input_scaled = loaded_scaler.transform(sample_input)

# Predict fire risk
prediction = loaded_model.predict(sample_input_scaled)
print("\nPredicted Fire Risk:", prediction[0])

In [None]:
# Pie chart for Fire vs No Fire
fire_counts = forest_data['status'].value_counts()

plt.figure(figsize=(6,6))
plt.pie(fire_counts, labels=fire_counts.index, autopct='%1.1f%%', colors=['green','red'], startangle=90)
plt.title('Forest Fire Occurrences')
plt.show()


In [None]:

# Feature names and importance values
features = X.columns
importances = model.feature_importances_


sorted_idx = np.argsort(importances)
sorted_features = features[sorted_idx]
sorted_importances = importances[sorted_idx]

# Line plot
plt.figure(figsize=(10,6))
plt.plot(sorted_features, sorted_importances, marker='o', linestyle='-', color='blue')
plt.title('Feature Importance for Forest Fire Prediction', fontsize=16)
plt.xlabel('Features', fontsize=12)
plt.ylabel('Importance', fontsize=12)
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Prepare data
monthly_counts = forest_data.groupby(['month','status']).size().unstack(fill_value=0)

# Heatmap
plt.figure(figsize=(8,6))
sns.heatmap(monthly_counts, annot=True, fmt='d', cmap='YlOrRd')
plt.title('Monthly Fire Occurrences Heatmap')
plt.xlabel('Status')
plt.ylabel('Month (0=Jan, 11=Dec)')
plt.show()