# Model Training and Hybrid KRR + ML Engine

This notebook demonstrates the complete pipeline for the Community Service Request Priority System:
1. Load and prepare data
2. Generate synthetic data if needed
3. Train Machine Learning model
4. Create Hybrid Engine combining ML and Knowledge Representation & Reasoning

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import joblib
import os

# Load the dataset
df = pd.read_csv('../data/Brgy_Bagumbayan_Synthetic_Community_Service_Requests_500.csv')
print("Dataset shape:", df.shape)
df.head()

Dataset shape: (500, 15)


Unnamed: 0,request_id,barangay,municipality,province,sitio,request_type,urgency_level,severity_level,impact_scope,location_type,time_reported,weather_condition,past_similar_reports,ml_priority,final_priority
0,1,Bagumbayan,Santa Cruz,Laguna,Sitio 3,Garbage Collection,Low,Severe,29,School Zone,Morning,Storm,1,Low,High
1,2,Bagumbayan,Santa Cruz,Laguna,Sitio 4,Streetlight Repair,Low,Severe,140,Residential Area,Morning,Normal,3,High,High
2,3,Bagumbayan,Santa Cruz,Laguna,Sitio 5,Garbage Collection,Urgent,Minor,130,Public Facility,Afternoon,Normal,7,High,High
3,4,Bagumbayan,Santa Cruz,Laguna,Sitio 6,Tree Obstruction,Low,Minor,72,Commercial Area,Afternoon,Normal,3,High,High
4,5,Bagumbayan,Santa Cruz,Laguna,Sitio 4,Road Damage,Low,Minor,87,Commercial Area,Afternoon,Storm,4,High,High


In [4]:
# Generate additional synthetic data
np.random.seed(42)
additional_rows = 100
synthetic_data = {
    'request_id': range(501, 501 + additional_rows),
    'barangay': ['Bagumbayan'] * additional_rows,
    'municipality': ['Santa Cruz'] * additional_rows,
    'province': ['Laguna'] * additional_rows,
    'sitio': np.random.choice(['Sitio 1', 'Sitio 2', 'Sitio 3', 'Sitio 4', 'Sitio 5'], additional_rows),
    'request_type': np.random.choice(df['request_type'].unique(), additional_rows),
    'urgency_level': np.random.choice(['Low', 'Normal', 'Urgent'], additional_rows),
    'severity_level': np.random.choice(['Minor', 'Moderate', 'Severe'], additional_rows),
    'impact_scope': np.random.randint(1, 201, additional_rows),
    'location_type': np.random.choice(df['location_type'].unique(), additional_rows),
    'time_reported': np.random.choice(['Morning', 'Afternoon', 'Night'], additional_rows),
    'weather_condition': np.random.choice(['Normal', 'Rainy', 'Storm'], additional_rows),
    'past_similar_reports': np.random.randint(0, 11, additional_rows),
    'ml_priority': np.random.choice(['Low', 'Moderate', 'High'], additional_rows),
    'final_priority': np.random.choice(['Low', 'Moderate', 'High'], additional_rows)
}

synthetic_df = pd.DataFrame(synthetic_data)
df_augmented = pd.concat([df, synthetic_df], ignore_index=True)
print("Augmented dataset shape:", df_augmented.shape)

Augmented dataset shape: (600, 15)


In [5]:
# Preprocess data
le_request_type = LabelEncoder()
le_urgency = LabelEncoder()
le_severity = LabelEncoder()
le_location = LabelEncoder()
le_time = LabelEncoder()
le_weather = LabelEncoder()
le_priority = LabelEncoder()

df_augmented['request_type_encoded'] = le_request_type.fit_transform(df_augmented['request_type'])
df_augmented['urgency_encoded'] = le_urgency.fit_transform(df_augmented['urgency_level'])
df_augmented['severity_encoded'] = le_severity.fit_transform(df_augmented['severity_level'])
df_augmented['location_encoded'] = le_location.fit_transform(df_augmented['location_type'])
df_augmented['time_encoded'] = le_time.fit_transform(df_augmented['time_reported'])
df_augmented['weather_encoded'] = le_weather.fit_transform(df_augmented['weather_condition'])
df_augmented['priority_encoded'] = le_priority.fit_transform(df_augmented['ml_priority'])

X = df_augmented[['request_type_encoded', 'urgency_encoded', 'severity_encoded', 'impact_scope', 'location_encoded', 'time_encoded', 'weather_encoded', 'past_similar_reports']]
y = df_augmented['priority_encoded']

# Train ML model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=le_priority.classes_))

Classification Report:
              precision    recall  f1-score   support

        High       0.81      0.98      0.89        81
         Low       0.71      0.26      0.38        19
    Moderate       0.69      0.55      0.61        20

    accuracy                           0.79       120
   macro avg       0.74      0.60      0.63       120
weighted avg       0.78      0.79      0.76       120



In [6]:
# Hybrid KRR + ML Engine
def apply_rules(ml_priority, features):
    final_priority = ml_priority
    reasons = []

    if features['urgency_level'] == 'Urgent':
        final_priority = 'High'
        reasons.append("Urgency level is Urgent, setting priority to High.")

    if features['severity_level'] == 'Severe':
        final_priority = 'High'
        reasons.append("Severity level is Severe, setting priority to High.")

    if features['impact_scope'] > 100:
        if final_priority == 'Low':
            final_priority = 'Moderate'
        elif final_priority == 'Moderate':
            final_priority = 'High'
        reasons.append("Impact scope > 100, increasing priority.")

    if features['location_type'] == 'Highway' and features['time_reported'] == 'Night':
        final_priority = 'High'
        reasons.append("Highway issue at night, setting to High priority.")

    if features['weather_condition'] == 'Storm' and features['request_type'] == 'Road Damage':
        final_priority = 'High'
        reasons.append("Road damage during storm, setting to High priority.")

    return final_priority, reasons

def hybrid_prediction(features):
    # Encode features for ML
    encoded = {
        'request_type_encoded': le_request_type.transform([features['request_type']])[0],
        'urgency_encoded': le_urgency.transform([features['urgency_level']])[0],
        'severity_encoded': le_severity.transform([features['severity_level']])[0],
        'impact_scope': features['impact_scope'],
        'location_encoded': le_location.transform([features['location_type']])[0],
        'time_encoded': le_time.transform([features['time_reported']])[0],
        'weather_encoded': le_weather.transform([features['weather_condition']])[0],
        'past_similar_reports': features['past_similar_reports']
    }
    df_encoded = pd.DataFrame([encoded])
    ml_pred_encoded = model.predict(df_encoded)[0]
    ml_priority = le_priority.inverse_transform([ml_pred_encoded])[0]
    
    # Apply rules
    final_priority, reasons = apply_rules(ml_priority, features)
    
    return ml_priority, final_priority, reasons

# Test the hybrid engine
test_features = {
    'request_type': 'Garbage Collection',
    'urgency_level': 'Urgent',
    'severity_level': 'Severe',
    'impact_scope': 130,
    'location_type': 'Public Facility',
    'time_reported': 'Afternoon',
    'weather_condition': 'Normal',
    'past_similar_reports': 7
}

ml_pred, final_pred, reasons = hybrid_prediction(test_features)
print(f"ML Prediction: {ml_pred}")
print(f"Final Priority: {final_pred}")
print("Reasons:", reasons)

ML Prediction: High
Final Priority: High
Reasons: ['Urgency level is Urgent, setting priority to High.', 'Severity level is Severe, setting priority to High.', 'Impact scope > 100, increasing priority.']


In [7]:
# Save the trained model and encoders
os.makedirs('../models', exist_ok=True)
joblib.dump(model, '../models/priority_model.pkl')
joblib.dump(le_request_type, '../models/le_request_type.pkl')
joblib.dump(le_urgency, '../models/le_urgency.pkl')
joblib.dump(le_severity, '../models/le_severity.pkl')
joblib.dump(le_location, '../models/le_location.pkl')
joblib.dump(le_time, '../models/le_time.pkl')
joblib.dump(le_weather, '../models/le_weather.pkl')
joblib.dump(le_priority, '../models/le_priority.pkl')
print("Model and encoders saved to models/ folder.")

Model and encoders saved to models/ folder.
