# **Customer Churn Prediction Notebook**
This notebook demonstrates how to load the trained Random Forest model and make predictions on new customer data.

In [None]:
import pandas as pd
import joblib
import os

# Define paths
MODEL_PATH = '../models/rf_model.pkl'
DATA_PATH = '../data/processed_churn_data.parquet'

# Load the model
if os.path.exists(MODEL_PATH):
    model = joblib.load(MODEL_PATH)
    print("Model loaded successfully!")
else:
    print(f"Error: Model not found at {MODEL_PATH}. Please run the training notebook first.")

## 1. Prediction using Prepared Data
First, we'll demonstrate a prediction using the processed data generated in `preprocessing.ipynb`.

In [None]:
if os.path.exists(DATA_PATH):
    df = pd.read_parquet(DATA_PATH)
    # Sample a row (excluding Churn)
    sample_row = df.drop('Churn', axis=1).iloc[[0]]
    actual_label = df.iloc[0]['Churn']
    
    prediction = model.predict(sample_row)[0]
    probability = model.predict_proba(sample_row)[0][1]
    
    print(f"Sample Customer Data (Processed):")
    display(sample_row)
    print(f"Actual Churn: {actual_label}")
    print(f"Predicted Churn: {prediction}")
    print(f"Churn Probability: {probability:.2%}")
else:
    print(f"Processed data file not found at {DATA_PATH}.")

## 2. Prediction on Raw Data Sample
In a real-world scenario, you will have raw customer data that needs to be encoded the same way as the training data. This section shows how to handle a raw sample.

In [None]:
def preprocess_raw_sample(raw_data, training_columns):
    """
    Applies the encoding steps used in training to a single raw input sample.
    """
    # 1. Create DataFrame
    df_sample = pd.DataFrame([raw_data])
    
    # 2. Binary Encoding
    binary_map = {'Yes': 1, 'No': 0}
    binary_cols = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
    for col in binary_cols:
        if col in df_sample.columns:
            df_sample[col] = df_sample[col].replace(binary_map)
    
    if 'gender' in df_sample.columns:
        df_sample['gender'] = df_sample['gender'].replace({'Female': 1, 'Male': 0})
        
    # 3. Handle TotalCharges numeric conversion
    if 'TotalCharges' in df_sample.columns:
        df_sample['TotalCharges'] = pd.to_numeric(df_sample['TotalCharges'], errors='coerce')
    
    # 4. One-Hot Encoding for multi-category columns
    multi_cols = [
        'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 
        'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 
        'Contract', 'PaymentMethod'
    ]
    
    df_sample = pd.get_dummies(df_sample, columns=[c for c in multi_cols if c in df_sample.columns], dtype=int)
    
    # 5. Align columns with training data (adding missing dummies as 0)
    for col in training_columns:
        if col not in df_sample.columns:
            df_sample[col] = 0
            
    # Ensure column order matches exactly
    return df_sample[training_columns]

# Example raw input (Missing Churn of course)
raw_input = {
    'gender': 'Female',
    'SeniorCitizen': 0,
    'Partner': 'Yes',
    'Dependents': 'No',
    'tenure': 1,
    'PhoneService': 'No',
    'MultipleLines': 'No phone service',
    'InternetService': 'DSL',
    'OnlineSecurity': 'No',
    'OnlineBackup': 'Yes',
    'DeviceProtection': 'No',
    'TechSupport': 'No',
    'StreamingTV': 'No',
    'StreamingMovies': 'No',
    'Contract': 'Month-to-month',
    'PaperlessBilling': 'Yes',
    'PaymentMethod': 'Electronic check',
    'MonthlyCharges': 29.85,
    'TotalCharges': "29.85"
}

# Get training columns from model
training_features = model.feature_names_in_

processed_sample = preprocess_raw_sample(raw_input, training_features)
prediction = model.predict(processed_sample)[0]
probability = model.predict_proba(processed_sample)[0][1]

print(f"Raw Input Sample Prediction Result:")
print(f"Predicted Churn: {'Yes' if prediction == 1 else 'No'}")
print(f"Confidence Probability: {probability:.2%}")

## 3. App Integration Guide
To use this logic in a web application (like Flask or Streamlit), you can follow these steps:

### Step 1: Export the Logic
You can save the `preprocess_raw_sample` function in a `.py` file (e.g., `utils.py`) so your app can import it.

### Step 2: Sample Integration (Flask Example)
```python
from flask import Flask, request, jsonify
import joblib
import pandas as pd
from utils import preprocess_raw_sample

app = Flask(__name__)
model = joblib.load('models/rf_model.pkl')
training_features = model.feature_names_in_

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json  # Get raw JSON from frontend
    processed_df = preprocess_raw_sample(data, training_features)
    
    prediction = model.predict(processed_df)[0]
    probability = model.predict_proba(processed_df)[0][1]
    
    return jsonify({
        'churn': bool(prediction),
        'probability': float(probability)
    })
```