In [1]:
# 1. Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
import pickle
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os

 Simple predictive model for district-wise fire risk using Climate Variables

In [3]:
file_path=os.path.join("../data/processed/combined_fire_climate.csv")
df=pd.read_csv(file_path)

In [4]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score, classification_report
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import warnings

# Ignore the specific warning about feature names
warnings.filterwarnings("ignore", category=UserWarning, 
                       message="X has feature names, but .* was fitted without feature names")

# Add cyclical month features
df['Month_sin'] = np.sin(2 * np.pi * df['MONTH']/12)
df['Month_cos'] = np.cos(2 * np.pi * df['MONTH']/12)

# Calculate FireRisk based on the given formula
df['FireRisk'] = (0.4*df['MaxTemp']) + (0.3*(100 - df['Humidity'])) + \
                 (0.2*df['WindSpeed']) - (0.1*df['Prep'])

# Create a binary fire occurrence feature
df['Fire_Occurred'] = (df['Fire_Count'] > 0).astype(int)

# One-hot encode districts
district_encoder = OneHotEncoder(sparse_output=False)
district_encoded = district_encoder.fit_transform(df[['DISTRICT']])
district_df = pd.DataFrame(district_encoded,
                           columns=[f"DISTRICT_{d}" for d in district_encoder.categories_[0]])

# Concatenate with the original data
processed_data = pd.concat([df, district_df], axis=1)

# Define features and target
district_cols = [col for col in processed_data.columns if col.startswith('DISTRICT_')]
features = ['Prep', 'AvgTemp', 'MaxTemp', 'Humidity', 'WindSpeed',
            'Month_sin', 'Month_cos', 'LAT', 'LON'] + district_cols

# Standard scale the features for better performance
scaler = StandardScaler()
X = processed_data[features]
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=features)

# Create models for FireRisk and Fire_Occurred (binary)
y_risk = processed_data['FireRisk']
y_fire_occurred = processed_data['Fire_Occurred']

# Track historical fire occurrence by district and month
district_month_fire_stats = {}
for district in df['DISTRICT'].unique():
    district_data = df[df['DISTRICT'] == district]
    for month in range(1, 13):
        month_data = district_data[district_data['MONTH'] == month]
        if not month_data.empty:
            total_records = len(month_data)
            fire_records = len(month_data[month_data['Fire_Count'] > 0])
            fire_probability = fire_records / total_records if total_records > 0 else 0
            
            district_month_fire_stats[(district, month)] = {
                'total_records': total_records,
                'fire_records': fire_records,
                'fire_probability': fire_probability,
                'max_count': month_data['Fire_Count'].max() if not month_data.empty else 0
            }

# Split data
X_train, X_test, y_risk_train, y_risk_test, y_fire_train, y_fire_test = train_test_split(
    X_scaled, y_risk, y_fire_occurred, test_size=0.2, random_state=42)

# Train risk model
risk_model = RandomForestRegressor(n_estimators=100, random_state=42)
risk_model.fit(X_train, y_risk_train)

# Train fire occurrence model
fire_model = RandomForestClassifier(n_estimators=100, random_state=42)
fire_model.fit(X_train, y_fire_train)

# Evaluate risk model
risk_train_predictions = risk_model.predict(X_train)
risk_test_predictions = risk_model.predict(X_test)

risk_train_mse = mean_squared_error(y_risk_train, risk_train_predictions)
risk_test_mse = mean_squared_error(y_risk_test, risk_test_predictions)
risk_r2 = r2_score(y_risk_test, risk_test_predictions)

# Evaluate fire occurrence model
fire_train_predictions = fire_model.predict(X_train)
fire_test_predictions = fire_model.predict(X_test)
fire_test_probabilities = fire_model.predict_proba(X_test)[:, 1]  # Class 1 probabilities

fire_train_accuracy = (fire_train_predictions == y_fire_train).mean()
fire_test_accuracy = (fire_test_predictions == y_fire_test).mean()

# Print model performance
print(f"Fire Risk Model:")
print(f"  Training MSE: {risk_train_mse:.2f}")
print(f"  Test MSE: {risk_test_mse:.2f}")
print(f"  R² Score: {risk_r2:.4f}")

print(f"\nFire Occurrence Model:")
print(f"  Training Accuracy: {fire_train_accuracy:.4f}")
print(f"  Test Accuracy: {fire_test_accuracy:.4f}")
print(f"  Classification Report:")
print(classification_report(y_fire_test, fire_test_predictions))

# Function to make predictions for a specific district and month with confidence
def predict_for_district_month(district, month, climate_data):
    # Create input data
    input_data = pd.DataFrame({
        'DISTRICT': [district],
        'MONTH': [month],
        'LAT': [climate_data['LAT']],
        'LON': [climate_data['LON']],
        'Prep': [climate_data['Prep']],
        'AvgTemp': [climate_data['AvgTemp']],
        'MaxTemp': [climate_data['MaxTemp']],
        'Humidity': [climate_data['Humidity']],
        'WindSpeed': [climate_data['WindSpeed']]
    })
    
    # Add cyclical month features
    input_data['Month_sin'] = np.sin(2 * np.pi * input_data['MONTH']/12)
    input_data['Month_cos'] = np.cos(2 * np.pi * input_data['MONTH']/12)
    
    # One-hot encode district
    district_encoded = district_encoder.transform(input_data[['DISTRICT']])
    district_cols = [f"DISTRICT_{d}" for d in district_encoder.categories_[0]]
    for i, col in enumerate(district_cols):
        input_data[col] = district_encoded[0][i]
    
    # Select features and scale them
    X_pred = input_data[features]
    X_pred_scaled = scaler.transform(X_pred)
    
    # Make fire risk prediction with all trees in the forest
    risk_predictions = np.array([tree.predict(X_pred_scaled)[0] for tree in risk_model.estimators_])
    risk_prediction = risk_predictions.mean()
    
    # Make fire occurrence probability prediction
    fire_probability = fire_model.predict_proba(X_pred_scaled)[0][1]  # Probability of class 1 (fire)
    
    # Get historical fire statistics for this district and month
    district_month_key = (district.lower(), month)
    historical_stats = district_month_fire_stats.get(district_month_key, 
                                                   {'fire_probability': 0, 'total_records': 0})
    
    # Calculate fire confidence using both model prediction and historical data
    model_weight = 0.7
    history_weight = 0.3
    
    # If we have substantial historical data, give it more weight
    if historical_stats['total_records'] > 10:
        model_weight = 0.5
        history_weight = 0.5
    
    # Calculate weighted fire confidence
    weighted_confidence = (model_weight * fire_probability) + \
                         (history_weight * historical_stats['fire_probability'])
    
    # Adjust confidence based on the fire risk
    # Higher fire risk should increase confidence slightly
    risk_factor = min(risk_prediction / 40, 1.0)  # Normalize risk to 0-1 scale
    adjusted_confidence = weighted_confidence * (0.8 + 0.2 * risk_factor)
    
    # Express as percentage
    fire_confidence_percent = adjusted_confidence * 100
    
    # Return additional information for analysis
    return {
        'risk': risk_prediction,
        'confidence': fire_confidence_percent,
        'model_fire_probability': fire_probability * 100,
        'historical_fire_probability': historical_stats['fire_probability'] * 100,
        'historical_records': historical_stats['total_records'],
        'historical_fire_records': historical_stats.get('fire_records', 0),
        'risk_category': get_risk_category(risk_prediction)
    }

# Categorize fire risk levels
def get_risk_category(risk_value):
    if risk_value < 15:
        return "Low"
    elif risk_value < 25:
        return "Moderate"
    elif risk_value < 35:
        return "High"
    else:
        return "Extreme"





Fire Risk Model:
  Training MSE: 0.06
  Test MSE: 0.48
  R² Score: 0.9983

Fire Occurrence Model:
  Training Accuracy: 1.0000
  Test Accuracy: 0.8667
  Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.94      0.92       679
           1       0.78      0.62      0.69       214

    accuracy                           0.87       893
   macro avg       0.83      0.78      0.80       893
weighted avg       0.86      0.87      0.86       893



In [5]:
# Example prediction
district = "bardiya"  # Replace with actual district name
month = 4  # April

# Example climate data
climate_data = {
    'LAT': 28.45,
    'LON': 81.3,
    'Prep': 26.7,  # mm
    'AvgTemp': 27.43,  # °C
    'MaxTemp': 34.93,  # °C
    'Humidity': 30.7,  # %
    'WindSpeed': 2.86   # km/h
}

bardiya_prediction = predict_for_district_month(district, month, climate_data)
print(f"\nPredicted fire risk for {district} in month {month}: {bardiya_prediction['risk']:.2f}")
print(f"Fire occurrence confidence: {bardiya_prediction['confidence']:.2f}%")
print(f"Risk category: {bardiya_prediction['risk_category']}")
print(f"Historical data: {bardiya_prediction['historical_records']} records, "
      f"{bardiya_prediction['historical_fire_records']} with fires "
      f"({bardiya_prediction['historical_fire_probability']:.2f}% historical probability)")

# Example with zero historical fires from your data
example_zero_fire = {
    'LAT': 27.98,
    'LON': 83.23,
    'Prep': 36.46,  # mm
    'AvgTemp': 12.78,  # °C
    'MaxTemp': 19.09,  # °C
    'Humidity': 51.21,  # %
    'WindSpeed': 2.23   # km/h
}

argha_prediction = predict_for_district_month("arghakhanchi", 1, example_zero_fire)
print(f"\nPredicted fire risk for arghakhanchi in month 1: {argha_prediction['risk']:.2f}")
print(f"Fire occurrence confidence: {argha_prediction['confidence']:.2f}%")
print(f"Risk category: {argha_prediction['risk_category']}")
print(f"Historical data: {argha_prediction['historical_records']} records, "
      f"{argha_prediction['historical_fire_records']} with fires "
      f"({argha_prediction['historical_fire_probability']:.2f}% historical probability)")



Predicted fire risk for bardiya in month 4: 32.65
Fire occurrence confidence: 94.98%
Risk category: High
Historical data: 6 records, 6 with fires (100.00% historical probability)

Predicted fire risk for arghakhanchi in month 1: 18.77
Fire occurrence confidence: 6.35%
Risk category: Moderate
Historical data: 6 records, 1 with fires (16.67% historical probability)




In [6]:
# Function to provide a more detailed interpretation
def interpret_prediction(prediction):
    risk = prediction['risk']
    confidence = prediction['confidence']
    risk_category = prediction['risk_category']
    
    if confidence < 20:
        confidence_level = "Very Low"
    elif confidence < 40:
        confidence_level = "Low"
    elif confidence < 60:
        confidence_level = "Moderate"
    elif confidence < 80:
        confidence_level = "High"
    else:
        confidence_level = "Very High"
    
    if risk_category == "Low" and confidence_level in ["Very Low", "Low"]:
        return f"{risk_category} risk with {confidence_level} confidence: Fire is unlikely."
    elif risk_category == "Low" and confidence_level in ["Moderate", "High", "Very High"]:
        return f"{risk_category} risk with {confidence_level} confidence: Though risk is low, historical patterns suggest possible fire events."
    elif risk_category == "Moderate" and confidence_level in ["Very Low", "Low"]:
        return f"{risk_category} risk with {confidence_level} confidence: Some caution advised despite uncertain prediction."
    elif risk_category == "Moderate" and confidence_level in ["Moderate", "High", "Very High"]:
        return f"{risk_category} risk with {confidence_level} confidence: Prepare for potential fire events."
    elif risk_category == "High" and confidence_level in ["Very Low", "Low"]:
        return f"{risk_category} risk with {confidence_level} confidence: Take precautions despite uncertain prediction."
    elif risk_category == "High" and confidence_level in ["Moderate", "High", "Very High"]:
        return f"{risk_category} risk with {confidence_level} confidence: High likelihood of fire events, preventive measures strongly recommended."
    elif risk_category == "Extreme":
        return f"{risk_category} risk with {confidence_level} confidence: Highest alert level, immediate prevention and preparedness measures required."

print("\nDetailed interpretation for Bardiya:")
print(interpret_prediction(bardiya_prediction))

print("\nDetailed interpretation for Arghakhanchi:")
print(interpret_prediction(argha_prediction))





Detailed interpretation for Bardiya:
High risk with Very High confidence: High likelihood of fire events, preventive measures strongly recommended.

Detailed interpretation for Arghakhanchi:
Moderate risk with Very Low confidence: Some caution advised despite uncertain prediction.


In [7]:
import joblib
import os

# Define the target directory
model_dir = os.path.join("..", "models")

# Ensure the directory exists
os.makedirs(model_dir, exist_ok=True)

# Save models and preprocessing objects to the specified path
joblib.dump(risk_model, os.path.join(model_dir, 'risk_model.pkl'))
joblib.dump(fire_model, os.path.join(model_dir, 'fire_model.pkl'))
joblib.dump(scaler, os.path.join(model_dir, 'scaler.pkl'))
joblib.dump(district_encoder, os.path.join(model_dir, 'district_encoder.pkl'))


['..\\models\\district_encoder.pkl']