In [4]:
# Import required libraries
import argparse
import os
import sys
import numpy as np
import pandas as pd
import boto3
import sagemaker
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import joblib

# Read the training data from specified path
train_data = pd.read_csv('project_data/train_resource_allocation.csv')

# Calculate customer value
monthly_revenue = train_data['monthly_charges']
tenure_value = train_data['tenure_months'] * 0.5  # Longer tenure adds value
support_cost = train_data['support_tickets'] * 50  # Each ticket costs $50
customer_value = (monthly_revenue * tenure_value) - support_cost

# Prepare features for training
features = ['monthly_charges', 'tenure_months', 'support_tickets']
X = train_data[features]
y = train_data['is_churn']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train Random Forest model
clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
clf.fit(X_scaled, y)

# Create model directory and save model
os.makedirs('./model_output', exist_ok=True)
model = {
    'classifier': clf,
    'scaler': scaler
}
joblib.dump(model, './model_output/model.joblib')

# Print training metrics
print("\nTraining Data Summary:")
print(f"Total customers: {len(train_data)}")
print(f"Average customer value: ${customer_value.mean():.2f}")
print(f"Churn rate: {(y.mean() * 100):.1f}%")

# Create test data
test_data = pd.DataFrame({
    'monthly_charges': [99.99, 45.00, 150.00],
    'tenure_months': [12, 36, 2],
    'support_tickets': [2, 0, 5]
})

# Scale test data
X_test = test_data[features]
X_test_scaled = scaler.transform(X_test)

# Get churn probabilities
churn_probs = clf.predict_proba(X_test_scaled)[:, 1]

# Calculate customer value for test data
test_data['customer_value'] = (test_data['monthly_charges'] * 
                              test_data['tenure_months'] * 0.5 - 
                              test_data['support_tickets'] * 50)

# Add predictions and segments
test_data['churn_probability'] = churn_probs
test_data['value_segment'] = pd.qcut(test_data['customer_value'], q=3, 
                                    labels=['Low', 'Medium', 'High'])
test_data['risk_segment'] = pd.qcut(churn_probs, q=3, 
                                   labels=['Low', 'Medium', 'High'])

# Resource allocation matrix
allocation_matrix = {
    ('High', 'High'): {
        'action': 'Immediate Action',
        'resources': 100,
        'strategy': 'Personal account manager, premium retention offers'
    },
    ('High', 'Medium'): {
        'action': 'Priority Action',
        'resources': 75,
        'strategy': 'Proactive outreach, customized offers'
    },
    ('Medium', 'High'): {
        'action': 'Proactive Action',
        'resources': 80,
        'strategy': 'Regular check-ins, loyalty rewards'
    }
}

# Allocate resources based on segments
def get_allocation(row):
    key = (row['risk_segment'], row['value_segment'])
    return allocation_matrix.get(key, {
        'action': 'Standard Care',
        'resources': 20,
        'strategy': 'Basic customer service'
    })

# Get and display results
print("\nResource Allocation Results:")
for idx, row in test_data.iterrows():
    allocation = get_allocation(row)
    print(f"\nCustomer {idx+1}:")
    print(f"Monthly Charges: ${row['monthly_charges']}")
    print(f"Tenure: {row['tenure_months']} months")
    print(f"Support Tickets: {row['support_tickets']}")
    print(f"Churn Probability: {row['churn_probability']:.2%}")
    print(f"Customer Value: ${row['customer_value']:.2f}")
    print(f"Risk Level: {row['risk_segment']}")
    print(f"Value Segment: {row['value_segment']}")
    print(f"Recommended Action: {allocation['action']}")
    print(f"Strategy: {allocation['strategy']}")
    print(f"Resource Intensity: {allocation['resources']}%")


Training Data Summary:
Total customers: 25
Average customer value: $721.00
Churn rate: 52.0%

Resource Allocation Results:

Customer 1:
Monthly Charges: $99.99
Tenure: 12 months
Support Tickets: 2
Churn Probability: 96.00%
Customer Value: $499.94
Risk Level: Medium
Value Segment: Medium
Recommended Action: Standard Care
Strategy: Basic customer service
Resource Intensity: 20%

Customer 2:
Monthly Charges: $45.0
Tenure: 36 months
Support Tickets: 0
Churn Probability: 0.00%
Customer Value: $810.00
Risk Level: Low
Value Segment: High
Recommended Action: Standard Care
Strategy: Basic customer service
Resource Intensity: 20%

Customer 3:
Monthly Charges: $150.0
Tenure: 2 months
Support Tickets: 5
Churn Probability: 100.00%
Customer Value: $-100.00
Risk Level: High
Value Segment: Low
Recommended Action: Standard Care
Strategy: Basic customer service
Resource Intensity: 20%
