Deployment for Classification and Regression Model using Gradio

In [25]:
import gradio as gr
import joblib
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# --- Load data ---
df = pd.read_csv('cleaned_data.csv')

# --- Load models and scaler ---
model_RF_class = joblib.load("results/model_RF_class.joblib")
model_RF_reg = joblib.load("results/model_RF_reg.joblib")
norm_x = joblib.load("results/scaler.joblib")

# --- Define feature columns: ALL except target & metadata ---
non_feature_cols = {'churn', 'signup_date', 'customer_id'}  # ← add 'customer_id'
feature_cols_class = [col for col in df.columns if col not in non_feature_cols]
feature_cols_reg = ['avg_session_length', 'sessions_per_month', 'engagement_index', 'total_session_time']


# --- Categorical columns (update if needed) ---
categorical_input_cols = ['country', 'city', 'gender', 'membership_tier', 'last_payment_method']

# --- Build encoders from df ---
encoders = {}
for col in categorical_input_cols:
    if col in df.columns:
        le = LabelEncoder()
        le.fit(df[col].astype(str).dropna().unique())
        encoders[col] = le
    else:
        print(f"[WARNING] Categorical column '{col}' not found in data!")

# --- Helper for min/max ---
def get_min_max(col, fallback_min=0.0, fallback_max=100.0):
    if col in df.columns:
        return float(df[col].min()), float(df[col].max())
    else:
        print(f"[WARNING] Column '{col}' not found. Using fallback range.")
        return fallback_min, fallback_max

# --- Churn Prediction (Dynamic Inputs) ---
def predict_churn(*inputs):
    try:
        # Build input dict from feature list
        data = {}
        for col, val in zip(feature_cols_class, inputs):
            if col in categorical_input_cols:
                data[col] = [str(val)]
            elif col == 'is_mobile_user':
                data[col] = [int(val)]
            else:
                data[col] = [float(val)]
        
        X_new = pd.DataFrame(data)

        # Encode categorical features
        for col in categorical_input_cols:
            if col in X_new.columns and col in encoders:
                known = set(encoders[col].classes_)
                X_new[col] = X_new[col].apply(lambda x: x if x in known else encoders[col].classes_[0])
                X_new[col] = encoders[col].transform(X_new[col])

        # Ensure correct order and type
        X_input = X_new[feature_cols_class].astype(float)
        X_scaled = norm_x.transform(X_input)

        # Predict
        proba = model_RF_class.predict_proba(X_scaled)[0, 1]
        label = "Yes" if proba >= 0.5 else "No"
        return label, float(proba)

    except Exception as e:
        print(f"[PREDICTION ERROR] {e}")
        return "Error", 0.0

# --- Spend Prediction ---
def predict_spend(avg_session_length, sessions_per_month, engagement_index, total_session_time):
    try:
        x = pd.DataFrame([{
            'avg_session_length': float(avg_session_length),
            'sessions_per_month': float(sessions_per_month),
            'engagement_index': float(engagement_index),
            'total_session_time': float(total_session_time),
        }])
        x = x.reindex(columns=feature_cols_reg, fill_value=0.0)
        pred = model_RF_reg.predict(x)[0]
        return f"Predicted Average Monthly Spend: RM{pred:.2f}"
    except Exception as e:
        print(f"[SPEND PREDICTION ERROR] {e}")
        return "Error in spend prediction"

# --- Build Gradio UI ---
with gr.Blocks(title="Customer Churn & Spend Predictor") as demo:
    gr.Markdown("# 📊 Customer Churn & Spend Predictor")

    with gr.Tabs():
        # === CHURN PREDICTION TAB ===
        with gr.Tab("📉 Churn Prediction"):
            gr.Markdown("Enter customer details to predict if they will churn.")

            with gr.Row():
                with gr.Column():
                    input_components = []
                    for col in feature_cols_class:
                        if col in categorical_input_cols and col in encoders:
                            comp = gr.Dropdown(
                                choices=sorted(encoders[col].classes_.tolist()),
                                label=col
                            )
                        elif col == 'is_mobile_user':
                            comp = gr.Checkbox(label=col)
                        else:
                            min_val, max_val = get_min_max(col, 0, 100)
                            step = 0.01 if max_val > 10 else 1
                            comp = gr.Slider(min_val, max_val, step=step, label=col)
                        input_components.append(comp)

                    btn_cls = gr.Button("Predict Churn", variant="primary")

                with gr.Column():
                    out_label = gr.Textbox(label="Churn Prediction", interactive=False)
                    out_proba = gr.Number(label="Churn Probability", interactive=False)

            btn_cls.click(
                fn=predict_churn,
                inputs=input_components,
                outputs=[out_label, out_proba]
            )

        # === SPEND PREDICTION TAB ===
        with gr.Tab("💰 Spend Prediction"):
            gr.Markdown("Predict average monthly spend based on engagement metrics.")

            with gr.Row():
                with gr.Column():
                    avg_session_length_reg = gr.Slider(*get_min_max('avg_session_length', 5, 60), step=0.01, label="Avg Session Length")
                    sessions_per_month_reg = gr.Slider(*get_min_max('sessions_per_month', 1, 50), step=0.01, label="Sessions/Month")
                    engagement_index = gr.Slider(*get_min_max('engagement_index', 0, 100), step=0.01, label="Engagement Index")
                    total_session_time = gr.Slider(*get_min_max('total_session_time', 0, 5000), step=0.01, label="Total Session Time")

                    btn_reg = gr.Button("Predict Spend", variant="primary")

                with gr.Column():
                    out_spend = gr.Textbox(label="Prediction Result", interactive=False)

            btn_reg.click(
                fn=predict_spend,
                inputs=[avg_session_length_reg, sessions_per_month_reg, engagement_index, total_session_time],
                outputs=out_spend
            )

# Launch
if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.


[PREDICTION ERROR] This RandomForestClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.


In [None]:
print("Features expected by model:", model_RF_class.feature_names_in_)

AttributeError: 'RandomForestClassifier' object has no attribute 'feature_names_in_'

[PREDICTION ERROR] The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- customer_id

[PREDICTION ERROR] The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- customer_id

[PREDICTION ERROR] The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- customer_id

