In [4]:

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier, XGBRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
import joblib

# Load dataset
df = pd.read_csv("../data/11.csv")

# Drop 'Year' column if present
df.drop(columns=["Year"], inplace=True, errors="ignore")

# Feature Engineering (Interaction Terms)
df["Rainfall × River Level"] = df["Rainfall (mm)"] * df["River Level"]

# Encode categorical features
label_encoders = {}
for col in ["River", "District"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Save encoders for later use

# Define feature columns
X = df[["River", "River Level", "Rainfall (mm)", "District", "Rainfall × River Level"]]

# Define target variables and handle missing values
y_flood_risk = df["Flood Risk"].fillna(df["Flood Risk"].mode()[0]).astype(int)
y_area_affected = df["Area affected in (m.ha)"].fillna(df["Area affected in (m.ha)"].median())
y_population_affected = df["Population affected in (million)"].fillna(df["Population affected in (million)"].median())
y_damage_crops = df["Damage to Crops"].fillna(df["Damage to Crops"].median())
y_damage_houses = df["Damage to Houses"].fillna(df["Damage to Houses"].median())

# Apply Standard Scaler separately for each damage-related feature
scaler_area = StandardScaler()
scaler_population = StandardScaler()
scaler_crops = StandardScaler()
scaler_houses = StandardScaler()

y_area_affected_scaled = scaler_area.fit_transform(y_area_affected.values.reshape(-1, 1))
y_population_affected_scaled = scaler_population.fit_transform(y_population_affected.values.reshape(-1, 1))
y_damage_crops_scaled = scaler_crops.fit_transform(y_damage_crops.values.reshape(-1, 1))
y_damage_houses_scaled = scaler_houses.fit_transform(y_damage_houses.values.reshape(-1, 1))

# Save scalers for later use
joblib.dump(scaler_area, "scaler_area.pkl")
joblib.dump(scaler_population, "scaler_population.pkl")
joblib.dump(scaler_crops, "scaler_crops.pkl")
joblib.dump(scaler_houses, "scaler_houses.pkl")

# Split data for training/testing
X_train, X_test, y_train_risk, y_test_risk, y_train_area, y_test_area, \
y_train_pop, y_test_pop, y_train_crops, y_test_crops, y_train_houses, y_test_houses = \
    train_test_split(X, y_flood_risk, y_area_affected_scaled, y_population_affected_scaled, 
                     y_damage_crops_scaled, y_damage_houses_scaled, test_size=0.2, random_state=42)

# Train classification model (Flood Risk)
clf_risk = XGBClassifier(n_estimators=200, learning_rate=0.07, max_depth=6, random_state=42)
clf_risk.fit(X_train, y_train_risk)

# Train regression models
reg_area = XGBRegressor(n_estimators=200, learning_rate=0.07, max_depth=6, random_state=42)
reg_area.fit(X_train, y_train_area)

reg_pop = XGBRegressor(n_estimators=200, learning_rate=0.07, max_depth=6, random_state=42)
reg_pop.fit(X_train, y_train_pop)

reg_crops = XGBRegressor(n_estimators=200, learning_rate=0.07, max_depth=6, random_state=42)
reg_crops.fit(X_train, y_train_crops)

reg_houses = XGBRegressor(n_estimators=200, learning_rate=0.07, max_depth=6, random_state=42)
reg_houses.fit(X_train, y_train_houses)

# Model evaluation (convert predictions back to original values)
y_pred_area = scaler_area.inverse_transform(reg_area.predict(X_test).reshape(-1, 1))
y_pred_pop = scaler_population.inverse_transform(reg_pop.predict(X_test).reshape(-1, 1))
y_pred_crops = scaler_crops.inverse_transform(reg_crops.predict(X_test).reshape(-1, 1))
y_pred_houses = scaler_houses.inverse_transform(reg_houses.predict(X_test).reshape(-1, 1))

# Print Evaluation Metrics
print("Flood Risk Accuracy:", accuracy_score(y_test_risk, clf_risk.predict(X_test)))
print("Area Affected RMSE:", np.sqrt(mean_squared_error(y_test_area, y_pred_area)))
print("Population Affected RMSE:", np.sqrt(mean_squared_error(y_test_pop, y_pred_pop)))
print("Damage to Crops RMSE:", np.sqrt(mean_squared_error(y_test_crops, y_pred_crops)))
print("Damage to Houses RMSE:", np.sqrt(mean_squared_error(y_test_houses, y_pred_houses)))

# Save models and encoders
joblib.dump(clf_risk, "xgb_flood_risk_model.pkl")
joblib.dump(reg_area, "xgb_area_affected_model.pkl")
joblib.dump(reg_pop, "xgb_population_affected_model.pkl")
joblib.dump(reg_crops, "xgb_damage_crops_model.pkl")
joblib.dump(reg_houses, "xgb_damage_houses_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")



Flood Risk Accuracy: 0.8752475247524752
Area Affected RMSE: 0.039891280029942265
Population Affected RMSE: 0.03913647506017264
Damage to Crops RMSE: 0.038343386437035464
Damage to Houses RMSE: 0.04029611896392284


['label_encoders.pkl']

In [5]:
import pandas as pd
import joblib
import numpy as np
import os

# Load models and scalers
MODEL_DIR = "."  # Update path if needed

try:
    models = {
        'flood_risk': joblib.load(os.path.join(MODEL_DIR, "xgb_flood_risk_model.pkl")),
        'area': joblib.load(os.path.join(MODEL_DIR, "xgb_area_affected_model.pkl")),
        'population': joblib.load(os.path.join(MODEL_DIR, "xgb_population_affected_model.pkl")),
        'crops': joblib.load(os.path.join(MODEL_DIR, "xgb_damage_crops_model.pkl")),
        'houses': joblib.load(os.path.join(MODEL_DIR, "xgb_damage_houses_model.pkl"))
    }

    scalers = {
        'area': joblib.load(os.path.join(MODEL_DIR, "scaler_area.pkl")),
        'population': joblib.load(os.path.join(MODEL_DIR, "scaler_population.pkl")),
        'crops': joblib.load(os.path.join(MODEL_DIR, "scaler_crops.pkl")),
        'houses': joblib.load(os.path.join(MODEL_DIR, "scaler_houses.pkl"))
    }

except FileNotFoundError as e:
    print(f"Error loading files: {e}")
    exit()

# Sample input (with already encoded district and river values)
data_input = {
    'District': 5,              # Already encoded district value
    'Rainfall (mm)': 0.731480275,
    'River': 2,                 # Already encoded river value
    'River Level': 2.358254062
}

# Interaction term
data_input["Rainfall × River Level"] = data_input["Rainfall (mm)"] * data_input["River Level"]

# Convert to DataFrame with correct feature order
X_new = pd.DataFrame([data_input])[['River', 'River Level', 'Rainfall (mm)', 'District', 'Rainfall × River Level']]

# Make predictions
try:
    # Flood risk prediction (classification)
    flood_risk_pred = models['flood_risk'].predict(X_new)[0]
    
    # Regression predictions (returns numpy arrays)
    area_pred = models['area'].predict(X_new)
    population_pred = models['population'].predict(X_new)
    crops_pred = models['crops'].predict(X_new)
    houses_pred = models['houses'].predict(X_new)
    
    # Reshape and inverse transform to get original values
    area_original = scalers['area'].inverse_transform(area_pred.reshape(-1, 1))[0][0]
    population_original = scalers['population'].inverse_transform(population_pred.reshape(-1, 1))[0][0]
    crops_original = scalers['crops'].inverse_transform(crops_pred.reshape(-1, 1))[0][0]
    houses_original = scalers['houses'].inverse_transform(houses_pred.reshape(-1, 1))[0][0]

    predictions = {
        'Flood Risk': int(flood_risk_pred),  # Convert to Python int
        'Area affected (m.ha)': float(area_original),
        'Population affected (million)': float(population_original),
        'Damage to Crops': float(crops_original),
        'Damage to Houses': float(houses_original)
    }

    # Print results
    print("\nPrediction Results:")
    print(f"Flood Risk: {predictions['Flood Risk']} (0=No, 1=Yes, 2=Severe)")
    print(f"Area affected (m.ha): {predictions['Area affected (m.ha)']:.4f}")
    print(f"Population affected (million): {predictions['Population affected (million)']:.4f}")
    print(f"Damage to Crops: {predictions['Damage to Crops']:.4f}")
    print(f"Damage to Houses: {predictions['Damage to Houses']:.4f}")

except Exception as e:
    print(f"Error during prediction: {e}")


Prediction Results:
Flood Risk: 0 (0=No, 1=Yes, 2=Severe)
Area affected (m.ha): -0.2181
Population affected (million): -0.2196
Damage to Crops: -0.2226
Damage to Houses: -0.2169


In [6]:
import pandas as pd
import joblib
import numpy as np
import os

# Load models and scalers
MODEL_DIR = "."  # Update path if needed

try:
    models = {
        'flood_risk': joblib.load(os.path.join(MODEL_DIR, "xgb_flood_risk_model.pkl")),
        'area': joblib.load(os.path.join(MODEL_DIR, "xgb_area_affected_model.pkl")),
        'population': joblib.load(os.path.join(MODEL_DIR, "xgb_population_affected_model.pkl")),
        'crops': joblib.load(os.path.join(MODEL_DIR, "xgb_damage_crops_model.pkl")),
        'houses': joblib.load(os.path.join(MODEL_DIR, "xgb_damage_houses_model.pkl"))
    }

    scalers = {
        'area': joblib.load(os.path.join(MODEL_DIR, "scaler_area.pkl")),
        'population': joblib.load(os.path.join(MODEL_DIR, "scaler_population.pkl")),
        'crops': joblib.load(os.path.join(MODEL_DIR, "scaler_crops.pkl")),
        'houses': joblib.load(os.path.join(MODEL_DIR, "scaler_houses.pkl"))
    }

except FileNotFoundError as e:
    print(f"Error loading files: {e}")
    exit()

# Sample input (with already encoded district and river values)
data_input = {
    'District': 5,              # Already encoded district value
    'Rainfall (mm)': 0.731480275,
    'River': 2,                 # Already encoded river value
    'River Level': 2.358254062
}

# Interaction term
data_input["Rainfall × River Level"] = data_input["Rainfall (mm)"] * data_input["River Level"]

# Convert to DataFrame with correct feature order
X_new = pd.DataFrame([data_input])[['River', 'River Level', 'Rainfall (mm)', 'District', 'Rainfall × River Level']]

# Make predictions
try:
    # Flood risk prediction (classification)
    flood_risk_pred = models['flood_risk'].predict(X_new)[0]
    
    # Regression predictions (returns numpy arrays)
    area_pred = models['area'].predict(X_new)
    population_pred = models['population'].predict(X_new)
    crops_pred = models['crops'].predict(X_new)
    houses_pred = models['houses'].predict(X_new)
    
    # Reshape and inverse transform to get original values
    area_original = scalers['area'].inverse_transform(area_pred.reshape(-1, 1))[0][0]
    population_original = scalers['population'].inverse_transform(population_pred.reshape(-1, 1))[0][0]
    crops_original = scalers['crops'].inverse_transform(crops_pred.reshape(-1, 1))[0][0]
    houses_original = scalers['houses'].inverse_transform(houses_pred.reshape(-1, 1))[0][0]

    predictions = {
        'Flood Risk': int(flood_risk_pred),  # Convert to Python int
        'Area affected (m.ha)': float(area_original),
        'Population affected (million)': float(population_original),
        'Damage to Crops': float(crops_original),
        'Damage to Houses': float(houses_original)
    }

    # Print results
    print("\nPrediction Results:")
    print(f"Flood Risk: {predictions['Flood Risk']} (0=No, 1=Yes, 2=Severe)")
    print(f"Area affected (m.ha): {predictions['Area affected (m.ha)']:.4f}")
    print(f"Population affected (million): {predictions['Population affected (million)']:.4f}")
    print(f"Damage to Crops: {predictions['Damage to Crops']:.4f}")
    print(f"Damage to Houses: {predictions['Damage to Houses']:.4f}")

except Exception as e:
    print(f"Error during prediction: {e}")


Prediction Results:
Flood Risk: 0 (0=No, 1=Yes, 2=Severe)
Area affected (m.ha): -0.2181
Population affected (million): -0.2196
Damage to Crops: -0.2226
Damage to Houses: -0.2169
