In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from catboost import CatBoostClassifier
import random
import math
import sys

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [5]:
# DSWD ECT Payout Criteria (Simulated based on common DSWD guidelines)
# The CatBoost model will learn to classify the payout based on these inputs.
PAYOUT_CRITERIA = {
   'Totally Damaged': 10000,  # Max Payout for totally damaged homes
   'Partially Damaged': 5000, # Mid Payout for partially damaged homes
   'Not Damaged': 0,          # No Payout
}

In [12]:
NUM_HOUSEHOLDS = 5000
random.seed(42)
np.random.seed(42)
noise = np.random.choice([0, 1], size=NUM_HOUSEHOLDS, p=[0.9, 0.1])

In [None]:
# Generate base data components
barangays = [f'BGY-{i:03d}' for i in range(20)]
data = {
    # GIS/Location Data
    'Barangay_ID': np.random.choice(barangays, NUM_HOUSEHOLDS),
    'Latitude': np.random.uniform(14.5, 14.7, NUM_HOUSEHOLDS),
    'Longitude': np.random.uniform(120.9, 121.1, NUM_HOUSEHOLDS),

    # Quantifiable Flood and House Dimensions
    'Flood_Depth_Meters': np.random.uniform(0.0, 4.0, NUM_HOUSEHOLDS),
    'House_Height_Meters': np.random.uniform(3.0, 8.0, NUM_HOUSEHOLDS),
    'House_Width_Meters': np.random.uniform(5.0, 15.0, NUM_HOUSEHOLDS),

    # Simulated Output from ML Damage Classification Model
    'Damage_Classification': np.random.choice(
        ['Not Damaged', 'Partially Damaged', 'Totally Damaged'],
        NUM_HOUSEHOLDS,
        p=[0.70, 0.20, 0.10]
    ),

    # Simulated DSWD Vulnerability Data
    'Is_4Ps_Recipient': np.random.choice([0, 1], NUM_HOUSEHOLDS, p=[0.8, 0.2])
}

In [14]:
df_train = pd.DataFrame(data)

In [15]:
# Calculate Flood Height Ratio (strong numerical feature)
df_train['Flood_Height_Ratio'] = np.minimum(df_train['Flood_Depth_Meters'] / df_train['House_Height_Meters'], 1.0)

In [16]:
# Generate the TARGET Variable (Target_ECT_Amount) using vectorized operations
# This replaces the complex assign_payout function and df.apply

# Start with the default: Not Damaged (0)
df_train['Target_ECT_Amount'] = PAYOUT_CRITERIA['Not Damaged']

# 1. Severe Flooding (Ratio >= 0.8) -> Totally Damaged (10000)
mask_severe_flood = df_train['Flood_Height_Ratio'] >= 0.8
df_train.loc[mask_severe_flood, 'Target_ECT_Amount'] = PAYOUT_CRITERIA['Totally Damaged']

# 2. Moderate Flooding (0.4 <= Ratio < 0.8) AND ML confirms damage -> Partially Damaged (5000)
mask_moderate_flood = (df_train['Flood_Height_Ratio'] >= 0.4) & (df_train['Flood_Height_Ratio'] < 0.8)
mask_damage_confirmed = df_train['Damage_Classification'].isin(['Partially Damaged', 'Totally Damaged'])
mask_moderate_payout = mask_moderate_flood & mask_damage_confirmed
df_train.loc[mask_moderate_payout, 'Target_ECT_Amount'] = PAYOUT_CRITERIA['Partially Damaged']

# 3. Low Flooding (Ratio < 0.4) but ML confirms Totally Damaged -> Totally Damaged (10000)
mask_low_flood_total_damage = (df_train['Flood_Height_Ratio'] < 0.4) & (df_train['Damage_Classification'] == 'Totally Damaged')
# Use | (OR) with the existing 'Totally Damaged' assignments to ensure no overwrite
df_train.loc[mask_low_flood_total_damage, 'Target_ECT_Amount'] = PAYOUT_CRITERIA['Totally Damaged']

# 4. Low Flooding (Ratio < 0.4) but ML confirms Partially Damaged -> Partially Damaged (5000)
mask_low_flood_partial_damage = (df_train['Flood_Height_Ratio'] < 0.4) & (df_train['Damage_Classification'] == 'Partially Damaged')
df_train.loc[mask_low_flood_partial_damage, 'Target_ECT_Amount'] = PAYOUT_CRITERIA['Partially Damaged']

print(f"Data generation complete. Target distribution:\n{df_train['Target_ECT_Amount'].value_counts()}")

Data generation complete. Target distribution:
Target_ECT_Amount
0        3252
5000     1060
10000     688
Name: count, dtype: int64


In [27]:
# Define features and target
X = df_train.drop('Target_ECT_Amount', axis=1)
y = df_train['Target_ECT_Amount']

# Identify categorical features for CatBoost
categorical_features_indices = np.where(X.dtypes == 'object')[0].tolist() + [X.columns.get_loc('Is_4Ps_Recipient')]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize and Train CatBoost Classifier
ect_model = CatBoostClassifier(
    iterations=1000,
    depth=6,
    learning_rate=0.1,
    loss_function='MultiClass',
    random_seed=42,
    verbose=500,
    eval_metric='TotalF1',
    cat_features=categorical_features_indices
)

ect_model.fit(X_train, y_train, eval_set=(X_test, y_test))

0:	learn: 0.9751991	test: 0.9742700	best: 0.9742700 (0)	total: 3.17ms	remaining: 3.17s
500:	learn: 1.0000000	test: 1.0000000	best: 1.0000000 (11)	total: 1.2s	remaining: 1.2s
999:	learn: 1.0000000	test: 1.0000000	best: 1.0000000 (11)	total: 2.41s	remaining: 0us

bestTest = 1
bestIteration = 11

Shrink model to first 12 iterations.


<catboost.core.CatBoostClassifier at 0x12c715910>

In [28]:
from sklearn.metrics import accuracy_score, classification_report, f1_score, precision_score, recall_score

# --- Evaluate performance ---
# Assuming 'ect_model' is trained and 'X_test', 'y_test' are available
y_pred = ect_model.predict(X_test).flatten()

# --- Print Diagnostics ---
print(f'Predictions (first 100): {y_pred[:100]}')

# --- Evaluation Metrics ---
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy on Test Data: {accuracy * 100:.2f}%")

# Use 'weighted' average for multi-class metrics
average_method = 'weighted' 

print(f'\nAccuracy: {accuracy_score(y_test, y_pred):.4f}')
print(f'F1 ({average_method}): {f1_score(y_test, y_pred, average=average_method, zero_division=0):.4f}')
print(f'Precision ({average_method}): {precision_score(y_test, y_pred, average=average_method, zero_division=0):.4f}')
print(f'Recall ({average_method}): {recall_score(y_test, y_pred, average=average_method, zero_division=0):.4f}')

print("\nClassification Report:")
# The classification_report handles multi-class automatically
print(classification_report(y_test, y_pred, zero_division=0))

Predictions (first 100): [    0 10000     0     0     0     0     0     0     0     0     0  5000
  5000  5000     0     0  5000  5000     0     0  5000     0  5000     0
     0     0     0     0 10000  5000 10000 10000     0     0     0  5000
     0  5000  5000     0     0     0     0  5000     0     0     0     0
     0     0     0     0  5000     0     0     0     0     0 10000     0
  5000 10000     0     0     0     0  5000     0     0     0     0     0
     0     0  5000     0     0  5000  5000  5000 10000     0     0     0
  5000 10000     0     0     0     0     0     0     0     0     0 10000
     0     0     0     0]
Model Accuracy on Test Data: 100.00%

Accuracy: 1.0000
F1 (weighted): 1.0000
Precision (weighted): 1.0000
Recall (weighted): 1.0000

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       650
        5000       1.00      1.00      1.00       212
       10000       1.00      1.00      1.00   

In [32]:
# Simulate a new batch of real-time data for recommendation
df_new = df_train.sample(n=10, random_state=10).reset_index(drop=True)
df_new['Household_ID'] = [f'HH-{i:05d}' for i in range(10)]
# Drop target and calculated feature before feeding to prediction
df_new = df_new.drop(['Target_ECT_Amount', 'Flood_Height_Ratio'], axis=1, errors='ignore')

# Calculate Flood_Height_Ratio for the new data
df_new['Flood_Height_Ratio'] = np.minimum(df_new['Flood_Depth_Meters'] / df_new['House_Height_Meters'], 1.0)

# Make prediction
predictions = ect_model.predict(df_new.drop('Household_ID', axis=1, errors='ignore')).flatten().astype(int)
recommendations_df = df_new.copy()
recommendations_df['Recommended_ECT_Amount_PHP'] = predictions

In [33]:
# --- LLM-Powered SMS Notification (Inline Logic replacing _simulate_llm_response and df.apply) ---

amount = recommendations_df['Recommended_ECT_Amount_PHP']
household_id = recommendations_df['Household_ID']
barangay = recommendations_df['Barangay_ID']
damage_class = recommendations_df['Damage_Classification']

# Define messages (using string concatenation for array-like column access)
msg_10k = (
    "DSWD-ECT Official: Aprubado ang PHP10,000 (Emergency Cash Transfer) para sa inyo (" + household_id + ") sa " + barangay + " dahil sa lubos na nasira (totally damaged). "
    "Antayin ang abiso ng LGU para sa iskedyul ng payout. #DSWDMayMalasakit"
)
msg_5k = (
    "DSWD-ECT Official: Aprubado ang PHP5,000 (Emergency Cash Transfer) para sa inyo (" + household_id + ") sa " + barangay + " dahil sa bahagyang nasira (partially damaged). "
    "Antayin ang abiso ng LGU para sa iskedyul ng payout. #DSWDMayMalasakit"
)
msg_0 = (
    "DSWD-ECT Official: Na-assess ang inyong tahanan (" + household_id + ") sa " + barangay + " bilang " + damage_class + ". "
    "Wala pong ECT assistance na maibibigay sa ngayon. Para sa apela, makipag-ugnayan sa inyong MSWDO. #DSWDUpdate"
)

# Use np.select to assign the correct SMS string based on the predicted amount
conditions = [
    amount == 10000,
    amount == 5000,
]
choices = [msg_10k, msg_5k]
# If amount is not 10000 or 5000 (i.e., it's 0), default to the msg_0
recommendations_df['SMS_Notification'] = np.select(conditions, choices, default=msg_0)

In [35]:
# Output Summary
allocation_summary = recommendations_df.groupby('Recommended_ECT_Amount_PHP').size().reset_index(name='Households_Count')
total_budget_needed = allocation_summary.apply(lambda row: row['Recommended_ECT_Amount_PHP'] * row['Households_Count'], axis=1).sum()

print(f"\nTotal Budget Required for these {len(recommendations_df)} Households: PHP{total_budget_needed:,.0f}")
print("\nAllocation Summary:")
print(allocation_summary.to_markdown(index=False))

print("\n--- Sample Household SMS Notifications (LLM Generated in Filipino) ---")
print(recommendations_df[['Household_ID', 'Recommended_ECT_Amount_PHP', 'Flood_Depth_Meters', 'House_Height_Meters', 'Flood_Height_Ratio', 'SMS_Notification']].head(5).to_markdown(index=False))


Total Budget Required for these 10 Households: PHP20,000

Allocation Summary:
|   Recommended_ECT_Amount_PHP |   Households_Count |
|-----------------------------:|-------------------:|
|                            0 |                  7 |
|                         5000 |                  2 |
|                        10000 |                  1 |

--- Sample Household SMS Notifications (LLM Generated in Filipino) ---
| Household_ID   |   Recommended_ECT_Amount_PHP |   Flood_Depth_Meters |   House_Height_Meters |   Flood_Height_Ratio | SMS_Notification                                                                                                                                                                                                            |
|:---------------|-----------------------------:|---------------------:|----------------------:|---------------------:|:---------------------------------------------------------------------------------------------------------------------

In [36]:
ect_model.save_model("ect_allocation_model_v1.bin")
print("Model saved as ect_allocation_model_v1.bin")

Model saved as ect_allocation_model_v1.bin


In [38]:
# Example Absolute Path (Replace this path with the actual location on your system)
full_path = "/Users/gyalm/Desktop/ect_allocation_model/models/ect_allocation_model_v1.bin"

def load_model(file_path):
    loaded_model = CatBoostClassifier()
    loaded_model.load_model(file_path)
    print(f"Model loaded from {file_path}")
    return loaded_model

loaded_model = load_model(full_path)

Model loaded from /Users/gyalm/Desktop/ect_allocation_model/models/ect_allocation_model_v1.bin
