Deployment for Classification and Regression Model using Gradio

In [14]:
import gradio as gr
import joblib
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [15]:
# --- Load data ---
df = pd.read_csv('cleaned_data.csv')

# --- Load models and scaler ---
model_RF_class = joblib.load("results/model_RF_class.joblib")
model_RF_reg = joblib.load("results/model_RF_reg.joblib")
norm_x = joblib.load("results/scaler.joblib")

# --- Define input feature columns (MUST match training order) ---
feature_cols_class = [
    'country', 'city', 'gender', 'age', 'membership_tier', 'tenure_months',
    'avg_session_length', 'sessions_per_month', 'support_tickets', 'last_payment_method',
    'is_mobile_user', 'num_devices', 'email_click_rate', 'referral_count',
    'discount_rate', 'satisfaction_score', 'monthly_spend'
]

feature_cols_reg = ['avg_session_length', 'sessions_per_month', 'engagement_index', 'total_session_time']

# --- Build encoders for CATEGORICAL INPUT features from df ---
categorical_input_cols = ['country', 'city', 'gender', 'membership_tier', 'last_payment_method']
encoders = {}
for col in categorical_input_cols:
    le = LabelEncoder()
    # Fit on all known categories in training data
    le.fit(df[col].astype(str).dropna().unique())
    encoders[col] = le

# --- Helper for min/max ---
def get_min_max(col, fallback_min=0.0, fallback_max=100.0):
    if col in df.columns:
        return float(df[col].min()), float(df[col].max())
    return fallback_min, fallback_max

# --- Churn Prediction Function ---
def predict_churn(
    country, city, gender, age, membership_tier, tenure_months,
    avg_session_length, sessions_per_month, support_tickets, last_payment_method,
    is_mobile_user, num_devices, email_click_rate, referral_count, discount_rate,
    satisfaction_score, monthly_spend
):
    data = {
        'country': [country],
        'city': [city],
        'gender': [gender],
        'age': [age],
        'membership_tier': [membership_tier],
        'tenure_months': [tenure_months],
        'avg_session_length': [avg_session_length],
        'sessions_per_month': [sessions_per_month],
        'support_tickets': [support_tickets],
        'last_payment_method': [last_payment_method],
        'is_mobile_user': [int(is_mobile_user)],
        'num_devices': [num_devices],
        'email_click_rate': [email_click_rate],
        'referral_count': [referral_count],
        'discount_rate': [discount_rate],
        'satisfaction_score': [satisfaction_score],
        'monthly_spend': [monthly_spend]
    }
    X_new = pd.DataFrame(data)

    # Encode categorical inputs
    for col in categorical_input_cols:
        if col in X_new.columns:
            try:
                X_new[col] = encoders[col].transform(X_new[col])
            except ValueError:
                # Handle unseen category → map to most frequent or 0
                X_new[col] = 0  # fallback

    # Scale (ensure column order matches training)
    X_input = X_new[feature_cols_class]
    X_scaled = norm_x.transform(X_input)

    proba = model_RF_class.predict_proba(X_scaled)[0, 1]
    label = "Yes" if proba >= 0.5 else "No"
    return label, float(proba)

# --- Spend Prediction ---
def predict_spend(avg_session_length, sessions_per_month, engagement_index, total_session_time):
    x = pd.DataFrame([{
        'avg_session_length': float(avg_session_length),
        'sessions_per_month': float(sessions_per_month),
        'engagement_index': float(engagement_index),
        'total_session_time': float(total_session_time),
    }])
    x = x.reindex(columns=feature_cols_reg, fill_value=0.0)
    pred = model_RF_reg.predict(x)[0]
    return f"Predicted Average Monthly Spend: RM{pred:.2f}"

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# Customer Churn & Spend Predictor")

    with gr.Tab("📉 Churn Prediction"):
        gr.Markdown("Enter customer details to predict if they will churn.")

        with gr.Row():
            # LEFT COLUMN: Inputs
            with gr.Column():
                country = gr.Dropdown(choices=encoders['country'].classes_.tolist(), label="Country")
                city = gr.Dropdown(choices=encoders['city'].classes_.tolist(), label="City")
                gender = gr.Dropdown(choices=encoders['gender'].classes_.tolist(), label="Gender")
                age = gr.Slider(*get_min_max('age', 18, 90), step=1, label="Age")
                membership_tier = gr.Dropdown(choices=encoders['membership_tier'].classes_.tolist(), label="Membership Tier")
                tenure_months = gr.Slider(*get_min_max('tenure_months'), step=1, label="Tenure (Months)")
                avg_session_length = gr.Slider(*get_min_max('avg_session_length'), step=0.01, label="Avg Session Length")
                sessions_per_month = gr.Slider(*get_min_max('sessions_per_month'), step=1, label="Sessions/Month")
                support_tickets = gr.Slider(*get_min_max('support_tickets', 0, 20), step=1, label="Support Tickets")
                last_payment_method = gr.Dropdown(choices=encoders['last_payment_method'].classes_.tolist(), label="Payment Method")
                is_mobile_user = gr.Checkbox(label="Is Mobile User?")
                num_devices = gr.Slider(*get_min_max('num_devices', 1, 5), step=1, label="Num Devices")
                email_click_rate = gr.Slider(*get_min_max('email_click_rate', 0.0, 1.0), step=0.001, label="Email Click Rate")
                referral_count = gr.Slider(*get_min_max('referral_count', 0, 10), step=1, label="Referral Count")
                discount_rate = gr.Slider(*get_min_max('discount_rate', 0.0, 0.5), step=0.001, label="Discount Rate")
                satisfaction_score = gr.Slider(*get_min_max('satisfaction_score', 1, 5), step=0.01, label="Satisfaction Score")
                monthly_spend = gr.Slider(*get_min_max('monthly_spend'), step=0.01, label="Monthly Spend (RM)")

                btn_cls = gr.Button("Predict Churn", variant="primary")

            # RIGHT COLUMN: Outputs
            with gr.Column():
                out_label = gr.Textbox(label="Churn Prediction", interactive=False)
                out_proba = gr.Number(label="Churn Probability", interactive=False)

        # Connect button
        btn_cls.click(
            fn=predict_churn,
            inputs=[
                country, city, gender, age, membership_tier, tenure_months,
                avg_session_length, sessions_per_month, support_tickets, last_payment_method,
                is_mobile_user, num_devices, email_click_rate, referral_count, discount_rate,
                satisfaction_score, monthly_spend
            ],
            outputs=[out_label, out_proba]
        )

    with gr.Tab("💰 Spend Prediction"):
        gr.Markdown("Predict average monthly spend based on engagement metrics.")

        with gr.Row():
            with gr.Column():
                avg_session_length_reg = gr.Slider(*get_min_max('avg_session_length'), step=0.01, label="Avg Session Length")
                sessions_per_month_reg = gr.Slider(*get_min_max('sessions_per_month'), step=0.01, label="Sessions/Month")
                engagement_index = gr.Slider(*get_min_max('engagement_index', 0, 100), step=0.01, label="Engagement Index")
                total_session_time = gr.Slider(*get_min_max('total_session_time'), step=0.01, label="Total Session Time")

                btn_reg = gr.Button("Predict Spend", variant="primary")

            with gr.Column():
                out_spend = gr.Textbox(label="Prediction Result", interactive=False)

        btn_reg.click(
            fn=predict_spend,
            inputs=[avg_session_length_reg, sessions_per_month_reg, engagement_index, total_session_time],
            outputs=out_spend
        )

if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.
