# Chapter 9: Business Alignment

**Purpose:** Align data exploration with business objectives and constraints.

**Outputs:**
- Business context documentation
- Success metrics definition
- Constraints and requirements

---

## 9.1 Setup

In [1]:
from customer_retention.analysis.auto_explorer import ExplorationFindings
from customer_retention.analysis.visualization import display_table
import pandas as pd
from customer_retention.core.config.experiments import FINDINGS_DIR, EXPERIMENTS_DIR, OUTPUT_DIR, setup_experiments_structure
from customer_retention.stages.temporal import TEMPORAL_METADATA_COLS


In [2]:
# === CONFIGURATION ===
from pathlib import Path

# FINDINGS_DIR imported from customer_retention.core.config.experiments

findings_files = [f for f in FINDINGS_DIR.glob("*_findings.yaml") if "multi_dataset" not in f.name]
if not findings_files:
    raise FileNotFoundError(f"No findings files found in {FINDINGS_DIR}. Run notebook 01 first.")

# Prefer aggregated findings (from 01d) over event-level findings
# Pattern: *_aggregated* in filename indicates aggregated data
aggregated_files = [f for f in findings_files if "_aggregated" in f.name]
non_aggregated_files = [f for f in findings_files if "_aggregated" not in f.name]

if aggregated_files:
    # Use most recent aggregated file
    aggregated_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
    FINDINGS_PATH = str(aggregated_files[0])
    print(f"Found {len(aggregated_files)} aggregated findings file(s)")
    print(f"Using: {FINDINGS_PATH}")
    if non_aggregated_files:
        print(f"   (Skipping {len(non_aggregated_files)} event-level findings)")
else:
    # Fall back to most recent non-aggregated file
    non_aggregated_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
    FINDINGS_PATH = str(non_aggregated_files[0])
    print(f"Found {len(findings_files)} findings file(s)")
    print(f"Using: {FINDINGS_PATH}")

findings = ExplorationFindings.load(FINDINGS_PATH)

print(f"\nLoaded findings for {findings.column_count} columns")

Found 1 aggregated findings file(s)
Using: /Users/Vital/python/CustomerRetention/experiments/findings/customer_emails_408768_aggregated_846212_findings.yaml
   (Skipping 1 event-level findings)

Loaded findings for 72 columns


## 9.2 Business Context

Define the business context for this project.

In [3]:
BUSINESS_CONTEXT = {
    "project_name": "Customer Churn Prediction",
    "business_objective": "Reduce customer churn by 20% through proactive retention campaigns",
    "stakeholders": ["Marketing Team", "Customer Success", "Data Science"],
    "timeline": "Q1 2025",
    "budget_constraints": "$50k for retention campaigns per month"
}

print("Business Context:")
for key, value in BUSINESS_CONTEXT.items():
    print(f"  {key}: {value}")

Business Context:
  project_name: Customer Churn Prediction
  business_objective: Reduce customer churn by 20% through proactive retention campaigns
  stakeholders: ['Marketing Team', 'Customer Success', 'Data Science']
  timeline: Q1 2025
  budget_constraints: $50k for retention campaigns per month


## 9.3 Success Metrics

In [4]:
SUCCESS_METRICS = [
    {
        "Metric": "Model AUC",
        "Target": ">= 0.80",
        "Priority": "High",
        "Rationale": "Need strong discrimination to prioritize high-risk customers"
    },
    {
        "Metric": "Precision at 20%",
        "Target": ">= 0.60",
        "Priority": "High",
        "Rationale": "Limited budget means we can only target top 20% of predictions"
    },
    {
        "Metric": "Churn Rate Reduction",
        "Target": "20%",
        "Priority": "High",
        "Rationale": "Primary business objective"
    },
    {
        "Metric": "Model Latency",
        "Target": "< 100ms",
        "Priority": "Medium",
        "Rationale": "Required for real-time scoring"
    },
    {
        "Metric": "Fairness (Demographic Parity)",
        "Target": "Ratio >= 0.8",
        "Priority": "Medium",
        "Rationale": "Ensure equitable treatment across segments"
    }
]

metrics_df = pd.DataFrame(SUCCESS_METRICS)
print("Success Metrics:")
display(metrics_df)

Success Metrics:


Unnamed: 0,Metric,Target,Priority,Rationale
0,Model AUC,>= 0.80,High,Need strong discrimination to prioritize high-...
1,Precision at 20%,>= 0.60,High,Limited budget means we can only target top 20...
2,Churn Rate Reduction,20%,High,Primary business objective
3,Model Latency,< 100ms,Medium,Required for real-time scoring
4,Fairness (Demographic Parity),Ratio >= 0.8,Medium,Ensure equitable treatment across segments


## 9.4 Deployment Requirements

In [5]:
DEPLOYMENT_REQUIREMENTS = {
    "scoring_mode": "Both batch and real-time",
    "batch_frequency": "Daily",
    "real_time_latency": "< 100ms p99",
    "infrastructure": "Databricks",
    "model_registry": "MLflow",
    "monitoring": "Required - drift detection and performance tracking",
    "retraining": "Monthly or on significant drift"
}

print("Deployment Requirements:")
for key, value in DEPLOYMENT_REQUIREMENTS.items():
    print(f"  {key}: {value}")

Deployment Requirements:
  scoring_mode: Both batch and real-time
  batch_frequency: Daily
  real_time_latency: < 100ms p99
  infrastructure: Databricks
  model_registry: MLflow
  monitoring: Required - drift detection and performance tracking
  retraining: Monthly or on significant drift


## 9.5 Data Constraints

In [6]:
DATA_CONSTRAINTS = [
    {
        "Constraint": "PII Handling",
        "Requirement": "No direct PII in features (names, SSN, etc.)",
        "Status": "To verify"
    },
    {
        "Constraint": "Data Freshness",
        "Requirement": "Features must be available within 24 hours",
        "Status": "To verify"
    },
    {
        "Constraint": "Historical Depth",
        "Requirement": "Minimum 12 months of history for training",
        "Status": "To verify"
    },
    {
        "Constraint": "Protected Attributes",
        "Requirement": "Age, gender, race should not be direct features",
        "Status": "To verify"
    }
]

constraints_df = pd.DataFrame(DATA_CONSTRAINTS)
print("Data Constraints:")
display(constraints_df)

Data Constraints:


Unnamed: 0,Constraint,Requirement,Status
0,PII Handling,"No direct PII in features (names, SSN, etc.)",To verify
1,Data Freshness,Features must be available within 24 hours,To verify
2,Historical Depth,Minimum 12 months of history for training,To verify
3,Protected Attributes,"Age, gender, race should not be direct features",To verify


## 9.6 Intervention Strategy

In [7]:
INTERVENTIONS = [
    {
        "Risk Level": "High (>0.8)",
        "Intervention": "Personal call from account manager",
        "Cost": "$50/customer",
        "Expected Effectiveness": "40% retention"
    },
    {
        "Risk Level": "Medium (0.5-0.8)",
        "Intervention": "Personalized email + discount offer",
        "Cost": "$10/customer",
        "Expected Effectiveness": "20% retention"
    },
    {
        "Risk Level": "Low (<0.5)",
        "Intervention": "Automated engagement email",
        "Cost": "$0.50/customer",
        "Expected Effectiveness": "5% retention"
    }
]

interventions_df = pd.DataFrame(INTERVENTIONS)
print("Intervention Strategy:")
display(interventions_df)

Intervention Strategy:


Unnamed: 0,Risk Level,Intervention,Cost,Expected Effectiveness
0,High (>0.8),Personal call from account manager,$50/customer,40% retention
1,Medium (0.5-0.8),Personalized email + discount offer,$10/customer,20% retention
2,Low (<0.5),Automated engagement email,$0.50/customer,5% retention


## 9.7 Save Business Context to Findings

In [8]:
findings.metadata = findings.metadata or {}
findings.metadata["business_context"] = BUSINESS_CONTEXT
findings.metadata["success_metrics"] = SUCCESS_METRICS
findings.metadata["deployment_requirements"] = DEPLOYMENT_REQUIREMENTS

findings.save(FINDINGS_PATH)
print(f"Business context saved to: {FINDINGS_PATH}")

Business context saved to: /Users/Vital/python/CustomerRetention/experiments/findings/customer_emails_408768_aggregated_846212_findings.yaml


---

## Next Steps

Continue to **10_spec_generation.ipynb** to generate production specifications.