In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

RAW_EVENTS_PATH = 'raw_events.csv'
DAILY_RISK_PATH = 'daily_risk_grades.csv'
SMOOTHED_PROBS_PATH = 'smoothed_probabilities.csv'
DEMO_CUSTOMER_ID = 100 # A customer ID I know has churned

def generate_risk_chart(df, customer_id):
    """Generates a chart showing the customer's risk probability over time."""
    
    # filter data for the specific customer
    customer_data = df[df['customer_id'] == customer_id].copy()
    customer_data['event_date'] = pd.to_datetime(customer_data['event_date'])
    
    # set up the plot
    plt.figure(figsize=(12, 6))
    plt.plot(customer_data['event_date'], customer_data['churn_probability'], 
             label='Smoothed Churn Probability', color='blue', linewidth=2)
    
    # add a line for the prior mean (0.15)
    plt.axhline(y=0.15, color='gray', linestyle='--', label='Prior Mean (15% Churn Rate)')
    
    # highlight the 90-day churn period
    churn_events = customer_data[customer_data['actual_churn_in_90_days'] == 1]
    if not churn_events.empty:
        churn_start_date = churn_events['event_date'].min()
        churn_end_date = churn_events['event_date'].max()
        plt.axvspan(churn_start_date, churn_end_date, color='red', alpha=0.1, label='90-Day Churn Window')
    
    # annotate the highest risk point
    max_prob_row = customer_data.loc[customer_data['churn_probability'].idxmax()]
    plt.scatter(max_prob_row['event_date'], max_prob_row['churn_probability'], color='red', zorder=5)
    plt.annotate(f"Max Risk: {max_prob_row['risk_grade']} ({max_prob_row['churn_probability']:.2f})",
                 (max_prob_row['event_date'], max_prob_row['churn_probability']),
                 textcoords="offset points", xytext=(0,10), ha='center', color='red')
    
    # formatting
    plt.title(f"Customer {customer_id} Daily Churn Risk Over Time")
    plt.xlabel("Date")
    plt.ylabel("Churn Probability (0-1)")
    plt.ylim(0, 0.5)
    plt.legend()
    plt.grid(True, linestyle=':', alpha=0.6)
    plt.xticks(rotation=45)
    plt.tight_layout()
    
    # Save the chart
    output_file = f'risk_chart_cust_{customer_id}.png'
    plt.savefig(output_file)
    print(f"Risk chart saved to {output_file}")
    return output_file

def run_demo():
    """Runs the full demo for a single customer."""
    print("\n--- Running Demo for Single Customer ---")
    
    try:
        raw_events_df = pd.read_csv(RAW_EVENTS_PATH)
        daily_risk_df = pd.read_csv(DAILY_RISK_PATH)
        smoothed_df = pd.read_csv(SMOOTHED_PROBS_PATH)
    except FileNotFoundError:
        print("Error: Required data files not found.")
        return

    # 1. Find a good event to showcase (e.g., the highest risk event for the demo customer)
    demo_customer_data = daily_risk_df[daily_risk_df['customer_id'] == DEMO_CUSTOMER_ID]
    if demo_customer_data.empty:
        print(f"Error: Customer ID {DEMO_CUSTOMER_ID} not found in daily risk data.")
        return
        
    # Find the day with the highest risk
    highest_risk_row = demo_customer_data.loc[demo_customer_data['churn_probability'].idxmax()]
    demo_date = highest_risk_row['event_date']
    driving_ngram = highest_risk_row['driving_ngram']
    
    print(f"Showcasing highest risk event for Customer {DEMO_CUSTOMER_ID} on {demo_date}:")
    print(f"Driving N-gram: {driving_ngram}")
    print(f"Final Smoothed Probability: {highest_risk_row['churn_probability']:.4f}")
    print(f"Assigned Risk Grade: {highest_risk_row['risk_grade']}")
    
    # 2. Show the raw event sequence leading up to that date
    raw_cust_events = raw_events_df[raw_events_df['customer_id'] == DEMO_CUSTOMER_ID]
    raw_cust_events['event_date'] = pd.to_datetime(raw_cust_events['event_date'])
    
    # Look back 30 days from the demo date
    demo_date_dt = pd.to_datetime(demo_date)
    lookback_date = demo_date_dt - pd.Timedelta(days=30)
    
    sequence_df = raw_cust_events[
        (raw_cust_events['event_date'] > lookback_date) & 
        (raw_cust_events['event_date'] <= demo_date_dt)
    ].sort_values(by='event_date')

    #Print in a markdown format so that it is readable.
    print("\nRaw Event Sequence (Last 30 Days)")
    print(sequence_df[['event_date', 'event_code']].to_markdown(index=False))
    
    # 3. show the raw and smoothed probability for the driving n-gram
    ngram_stats = smoothed_df[smoothed_df['ngram'] == driving_ngram].iloc[0]
    
    print("\n--- N-gram Statistics for Driving N-gram ---")
    print(f"N-gram: {ngram_stats['ngram']}")
    print(f"Total Occurrences: {ngram_stats['count']}")
    print(f"Churn Occurrences: {ngram_stats['churn_count']}")
    print(f"Raw Churn Rate: {ngram_stats['raw_churn_rate']:.4f}")
    print(f"Smoothed Churn Probability: {ngram_stats['smoothed_probability']:.4f}")
    
    # 4. Generate the risk chart
    generate_risk_chart(daily_risk_df, DEMO_CUSTOMER_ID)

if __name__ == "__main__":

    run_demo()


In [3]:
pip install tabulate

Defaulting to user installation because normal site-packages is not writeable
Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
[0mSuccessfully installed tabulate-0.9.0
Note: you may need to restart the kernel to use updated packages.
