# Service Completion Time Prediction - Testing Notebook

This notebook helps you test and understand your service completion time prediction model.

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib
import sys
from datetime import datetime

# Add parent directory to path so we can import from model_predictor.py
sys.path.append('..')

## 2. Load Model and Preprocessing Components

In [None]:
# Define paths
MODEL_DIR = os.path.join(os.path.dirname(os.getcwd()), 'Model', 'Tast1')

# Load model and preprocessing components
model = joblib.load(os.path.join(MODEL_DIR, 'model.pkl'))
scaler = joblib.load(os.path.join(MODEL_DIR, 'scaler.pkl'))
le_task = joblib.load(os.path.join(MODEL_DIR, 'task_label_encoder.pkl'))
le_section = joblib.load(os.path.join(MODEL_DIR, 'section_label_encoder.pkl'))

print("Model and preprocessing components loaded successfully!")

## 3. Examine the Model

In [None]:
# Print model info
print(f"Model type: {type(model).__name__}")

# If it's an XGBoost model
if hasattr(model, 'get_params'):
    print("\nModel parameters:")
    for param, value in model.get_params().items():
        print(f"  {param}: {value}")

## 4. Examine the Preprocessing Components

In [None]:
# Check task label encoder classes
print("Task IDs:")
for i, task_id in enumerate(le_task.classes_):
    print(f"  {i}: {task_id}")

print("\nSection IDs:")
for i, section_id in enumerate(le_section.classes_):
    print(f"  {i}: {section_id}")

## 5. Create a Prediction Function

In [None]:
def predict_completion_time(date, time_str, task_id):
    """Predicts service completion time in minutes"""
    # Convert date to datetime
    date = pd.to_datetime(date)
    
    # Extract features
    hour = int(time_str.split(':')[0])
    minute = int(time_str.split(':')[1])
    weekday = date.weekday()
    is_weekend = 1 if weekday >= 5 else 0
    month = date.month
    
    # Encode task_id
    try:
        task_id_encoded = le_task.transform([str(task_id)])[0]
    except:
        # If task_id not in training data, use a default value
        print(f"Warning: Unknown task_id: {task_id}")
        task_id_encoded = -1
    
    # Use a default section ID
    section_id_encoded = 0
    
    # Set default staff features
    employees_on_duty = 5
    staff_load_ratio = 1.0
    
    # Build feature vector
    features_dict = {
        'appointment_hour': hour,
        'appointment_weekday': weekday,
        'is_weekend': is_weekend,
        'month': month,
        'staff_load_ratio': staff_load_ratio,
        'employees_on_duty': employees_on_duty,
        'task_id_encoded': task_id_encoded,
        'section_id_encoded': section_id_encoded
    }
    
    input_df = pd.DataFrame([features_dict])
    
    # Scale numeric features
    num_features = ['appointment_hour', 'appointment_weekday', 'month', 
                  'staff_load_ratio', 'employees_on_duty']
    
    input_df[num_features] = scaler.transform(input_df[num_features])
    
    # Extract features in the correct order
    features = [
        'appointment_hour', 'appointment_weekday', 'is_weekend', 'month',
        'staff_load_ratio', 'employees_on_duty', 'task_id_encoded', 'section_id_encoded'
    ]
    X = input_df[features]
    
    # Predict
    predicted_minutes = model.predict(X)[0]
    return round(predicted_minutes)

## 6. Test the Model with Example Data

In [None]:
# Test cases
test_cases = [
    {"date": "2025-08-29", "time": "10:30", "task_id": "PASSPORT_RENEWAL"},
    {"date": "2025-09-15", "time": "14:15", "task_id": "VISA_APPLICATION"},
    {"date": "2025-08-25", "time": "09:00", "task_id": "ID_CARD"},
    {"date": "2025-08-30", "time": "11:45", "task_id": "DRIVING_LICENSE"},
]

results = []
for i, case in enumerate(test_cases, 1):
    try:
        prediction = predict_completion_time(case["date"], case["time"], case["task_id"])
        print(f"Test {i}:")
        print(f"  Input: Date={case['date']}, Time={case['time']}, Task={case['task_id']}")
        print(f"  Predicted completion time: {prediction} minutes")
        results.append({**case, "prediction": prediction})
    except Exception as e:
        print(f"Test {i} failed: {e}")

## 7. Visualize the Results

In [None]:
# Create a DataFrame from results
results_df = pd.DataFrame(results)

# Bar chart
plt.figure(figsize=(10, 6))
sns.barplot(x="task_id", y="prediction", data=results_df)
plt.title("Predicted Completion Time by Task Type")
plt.xlabel("Task Type")
plt.ylabel("Completion Time (minutes)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 8. Analyze Factors Influencing Predictions

In [None]:
# Generate data for different times of day
times = [f"{hour:02d}:00" for hour in range(8, 19)]
predictions = []

for time in times:
    pred = predict_completion_time("2025-08-29", time, "PASSPORT_RENEWAL")
    predictions.append({"time": time, "prediction": pred})

time_df = pd.DataFrame(predictions)

# Plot
plt.figure(figsize=(10, 6))
sns.lineplot(x="time", y="prediction", data=time_df, marker='o')
plt.title("Predicted Completion Time by Time of Day")
plt.xlabel("Time of Day")
plt.ylabel("Completion Time (minutes)")
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

## 9. Test the Service using the API

In [None]:
import requests

def test_api(url="http://localhost:5000", test_cases=None):
    """Test the prediction API with various inputs"""
    
    if test_cases is None:
        test_cases = [
            {"date": "2025-08-29", "time": "10:30", "task_id": "PASSPORT_RENEWAL"},
            {"date": "2025-09-15", "time": "14:15", "task_id": "VISA_APPLICATION"},
            {"date": "2025-08-25", "time": "09:00", "task_id": "ID_CARD"}
        ]
    
    # Health check
    try:
        response = requests.get(f"{url}/health")
        print(f"Health check: {response.json()}")
    except Exception as e:
        print(f"Error connecting to API: {e}")
        print("Make sure the API server is running using .\\run_api.bat")
        return
    
    # Run tests
    for i, test_case in enumerate(test_cases, 1):
        try:
            print(f"\nTest Case {i}:")
            print(f"Input: {test_case}")
            
            response = requests.post(
                f"{url}/predict",
                json=test_case,
                headers={"Content-Type": "application/json"}
            )
            
            print(f"Status Code: {response.status_code}")
            print(f"Response: {response.json()}")
            
        except Exception as e:
            print(f"Error in test case {i}: {e}")

# Run the test (uncomment to run)
# test_api()

## 10. Export Model Manually (If Needed)

In [None]:
# If you need to manually export the model again
def export_model_manually():
    """Manually export a model for testing"""
    from xgboost import XGBRegressor
    from sklearn.model_selection import train_test_split
    
    # Create dummy data
    n_samples = 1000
    np.random.seed(42)
    
    # Generate dummy features
    df = pd.DataFrame({
        'appointment_hour': np.random.randint(8, 18, n_samples),
        'appointment_weekday': np.random.randint(0, 7, n_samples),
        'is_weekend': np.random.randint(0, 2, n_samples),
        'month': np.random.randint(1, 13, n_samples),
        'staff_load_ratio': np.random.uniform(0.5, 3.0, n_samples),
        'employees_on_duty': np.random.randint(2, 10, n_samples),
        'task_id_encoded': np.random.randint(0, 5, n_samples),
        'section_id_encoded': np.random.randint(0, 4, n_samples),
    })
    
    # Generate target
    y = 30 + 5 * df['appointment_hour'] - 2 * df['appointment_weekday'] + \
        15 * df['is_weekend'] + 3 * df['month'] - 8 * df['staff_load_ratio'] + \
        4 * df['employees_on_duty'] + np.random.normal(0, 5, n_samples)
    
    # Split train/test
    X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)
    
    # Train XGBoost model
    xgb = XGBRegressor(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )
    
    xgb.fit(X_train, y_train)
    
    # Save model
    joblib.dump(xgb, os.path.join(MODEL_DIR, 'model.pkl'))
    print(f"Model exported to {os.path.join(MODEL_DIR, 'model.pkl')}")

# Uncomment to run
# export_model_manually()