In [1]:
import mlrun
import pandas as pd
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

load_dotenv()

CHURN_FEATURES = [
    'international_plan',
    'voice_mail_plan',
    'sentiment_label',
    'account_length',
    'number_vmail_messages',
    'total_day_minutes',
    'total_day_calls',
    'total_day_charge',
    'total_eve_minutes',
    'total_eve_calls',
    'total_eve_charge',
    'total_night_minutes',
    'total_night_calls',
    'total_night_charge',
    'total_intl_minutes',
    'total_intl_calls',
    'total_intl_charge',
    'number_customer_service_calls',
    'sentiment_score'
]

USER_INFO_FEATURES = [
    "account_length",
    "total_charge_usage",
    "total_minutes_usage",
    "number_customer_service_calls",
    "sentiment_label"
]

SENTIMENT_MAPPING = {0 : "negative", 1: "neutral", 2: "positive"}

In [2]:
project = mlrun.get_or_create_project(
    "churn-agents-webinar",
    parameters={
        "base_image" : "mlrun/mlrun",
        "requirements_file" : "requirements.txt",
        "force_build" : False,
        "source" : "v3io:///bigdata/churn.zip"
    }
)

Project Source: v3io:///bigdata/churn.zip
Exporting project as zip archive to v3io:///bigdata/churn.zip...
> 2025-01-08 19:29:36,836 [info] Project loaded successfully: {"project_name":"churn-agents-webinar"}


### Load data

In [3]:
all_data = mlrun.get_dataitem("store://datasets/churn-agents-webinar/churn#0:latest").as_df()
test_data = mlrun.get_dataitem("store://datasets/churn-agents-webinar/data-process-data_test#0:latest").as_df()

In [4]:
all_data["total_charge"] = all_data["total_day_charge"] + all_data["total_eve_charge"] + all_data["total_night_charge"] + all_data["total_intl_charge"]
all_data["total_charge_usage"] = pd.qcut(all_data['total_charge'], q=5, labels=['low', 'medium-low', 'medium', 'medium-high', 'high'])

all_data["total_minutes"] = all_data["total_day_minutes"] + all_data["total_eve_minutes"] + all_data["total_night_minutes"] + all_data["total_intl_minutes"]
all_data["total_minutes_usage"] = pd.qcut(all_data['total_minutes'], q=5, labels=['low', 'medium-low', 'medium', 'medium-high', 'high'])

In [5]:
test_data = test_data.join(all_data[["total_charge_usage", "total_minutes_usage", "chat_log"]])

In [6]:
test_data.iloc[0]

international_plan                                                             0.0
voice_mail_plan                                                                0.0
sentiment_label                                                                1.0
account_length                                                                 117
number_vmail_messages                                                            0
total_day_minutes                                                            181.5
total_day_calls                                                                 95
total_day_charge                                                             30.86
total_eve_minutes                                                            205.1
total_eve_calls                                                                 88
total_eve_charge                                                             17.43
total_night_minutes                                                          204.0
tota

### Initialize ML and LLM models

In [7]:
serving_fn = project.get_function("serving")

llm = ChatOpenAI(name="gpt-4o-mini", temperature=0)
prompt = PromptTemplate(input_variables=["text"], template="Summarize the following support ticket into 1 or 2 sentences:\n\n{text}\n")
chain = prompt | llm | StrOutputParser()

### Calculate report

In [10]:
def get_customer_insight_report(user_id: int) -> dict:
    
    def format_months(months):
        years = months // 12  # Calculate the number of years
        remaining_months = months % 12  # Calculate the remaining months
        return f"{years} years, {remaining_months} months"
    
    def churn_inference(user_id):
        churn_features = test_data[CHURN_FEATURES].loc[user_id].tolist()
        resp = serving_fn.invoke(
            "/v2/models/churn_model/predict",
            body={
                "inputs": [churn_features]
            }
        )
        churn_pct = round(resp["outputs"][0], 3)
        return churn_pct
    
    def summarize_support_ticket(user_id):
        return chain.invoke({"text" : test_data.loc[user_id, "chat_log"]})
    
    user_info = test_data.loc[user_id, USER_INFO_FEATURES]
    user_info["account_length_months"] = user_info["account_length"]
    user_info["account_length"] = format_months(user_info["account_length"])
    user_info["churn_likelihood_percentage"] = churn_inference(user_id)
    user_info["sentiment_label"] = SENTIMENT_MAPPING[user_info["sentiment_label"]]
    user_info["support_ticket_summary"] = summarize_support_ticket(user_id)
    
    return user_info.to_dict()

In [11]:
get_customer_insight_report(2296)

> 2025-01-08 19:30:02,640 [info] Invoking function: {"method":"POST","path":"http://nuclio-churn-agents-webinar-serving.default-tenant.svc.cluster.local:8080/v2/models/churn_model/predict"}


{'account_length': '9 years, 9 months',
 'total_charge_usage': 'medium',
 'total_minutes_usage': 'medium',
 'number_customer_service_calls': 2,
 'sentiment_label': 'neutral',
 'account_length_months': 117,
 'churn_likelihood_percentage': 0.01,
 'support_ticket_summary': 'The customer has voice, text, and data services and is looking to upgrade their phone due to it being slow for a while.'}

In [12]:
get_customer_insight_report(460)

> 2025-01-08 19:30:04,360 [info] Invoking function: {"method":"POST","path":"http://nuclio-churn-agents-webinar-serving.default-tenant.svc.cluster.local:8080/v2/models/churn_model/predict"}


{'account_length': '6 years, 7 months',
 'total_charge_usage': 'medium-low',
 'total_minutes_usage': 'medium',
 'number_customer_service_calls': 5,
 'sentiment_label': 'neutral',
 'account_length_months': 79,
 'churn_likelihood_percentage': 0.464,
 'support_ticket_summary': 'The customer is concerned about the escalating cost of voice services impacting turnover and is looking for alternative options, but hangs up before the TelCom agent can provide further assistance.'}

In [13]:
get_customer_insight_report(1902)

> 2025-01-08 19:30:06,877 [info] Invoking function: {"method":"POST","path":"http://nuclio-churn-agents-webinar-serving.default-tenant.svc.cluster.local:8080/v2/models/churn_model/predict"}


{'account_length': '10 years, 5 months',
 'total_charge_usage': 'high',
 'total_minutes_usage': 'high',
 'number_customer_service_calls': 1,
 'sentiment_label': 'negative',
 'account_length_months': 125,
 'churn_likelihood_percentage': 0.792,
 'support_ticket_summary': 'Customer is frustrated with the poor customer service they have been receiving for their phone, despite not having any issues with the phone itself for the past three years.'}