# **Testing the Saved Pipeline**

Testing the pi

### **Method 1: Using the ChurnPredictor Wrapper Class**

In [35]:
import joblib
import pandas as pd
from churn_pipeline import ColumnNameCleaner
from churn_pipeline import NumericConverter
from churn_pipeline import CategoricalCleaner
from churn_pipeline import TotalServicesCreator
from churn_pipeline import FeatureDropper
from churn_pipeline import LowServicesFeature
from churn_pipeline import CustomEncoder
from churn_pipeline import CustomOneHotEncoder

from churn_pipeline import ChurnPredictor

# Load the saved pipeline
predictor = ChurnPredictor('churn_prediction_pipeline.pkl')

✓ Pipeline loaded successfully from churn_prediction_pipeline.pkl


### **Method 2: Direct Loading with Joblib**

In [37]:
loaded_pipeline = joblib.load('churn_prediction_pipeline.pkl')
print("✓ Pipeline loaded successfully")
print(f"Pipeline steps: {list(loaded_pipeline.named_steps.keys())}")

✓ Pipeline loaded successfully
Pipeline steps: ['preprocessing', 'model']


### **Create Sample Test Data**

In [38]:
# Create a small sample of new customers to test predictions
# This simulates real-world usage where new data arrives

sample_customers = pd.DataFrame({
    'customerID': ['TEST-001', 'TEST-002', 'TEST-003'],
    'gender': ['Female', 'Male', 'Female'],
    'SeniorCitizen': [0, 1, 0],
    'Partner': ['Yes', 'No', 'Yes'],
    'Dependents': ['No', 'No', 'Yes'],
    'tenure': [1, 34, 2],
    'PhoneService': ['Yes', 'Yes', 'Yes'],
    'MultipleLines': ['No phone service', 'Yes', 'No'],
    'InternetService': ['DSL', 'Fiber optic', 'DSL'],
    'OnlineSecurity': ['No', 'No', 'Yes'],
    'OnlineBackup': ['Yes', 'No', 'Yes'],
    'DeviceProtection': ['No', 'Yes', 'No'],
    'TechSupport': ['No', 'No', 'Yes'],
    'StreamingTV': ['No', 'No', 'Yes'],
    'StreamingMovies': ['No', 'Yes', 'No'],
    'Contract': ['Month-to-month', 'One year', 'Month-to-month'],
    'PaperlessBilling': ['Yes', 'No', 'Yes'],
    'PaymentMethod': ['Electronic check', 'Mailed check', 'Electronic check'],
    'MonthlyCharges': [29.85, 56.95, 53.85],
    'TotalCharges': ['29.85', '1889.5', '108.15']
})

sample_customers

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,TEST-001,Female,0,Yes,No,1,Yes,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85
1,TEST-002,Male,1,No,No,34,Yes,Yes,Fiber optic,No,No,Yes,No,No,Yes,One year,No,Mailed check,56.95,1889.5
2,TEST-003,Female,0,Yes,Yes,2,Yes,No,DSL,Yes,Yes,No,Yes,Yes,No,Month-to-month,Yes,Electronic check,53.85,108.15


### **Make Predictions with Loaded Pipeline**

In [39]:
# Test predictions using the loaded pipeline

predictions = loaded_pipeline.predict(sample_customers)

probabilities = loaded_pipeline.predict_proba(sample_customers)

# Create results dataframe
results = pd.DataFrame({
    'CustomerID': sample_customers['customerID'],
    'Churn_Prediction': ['Yes' if p == 1 else 'No' for p in predictions],
    'Churn_Probability': probabilities[:, 1],
    'No_Churn_Probability': probabilities[:, 0]
})

results

Unnamed: 0,CustomerID,Churn_Prediction,Churn_Probability,No_Churn_Probability
0,TEST-001,Yes,0.75316,0.24684
1,TEST-002,No,0.382543,0.617457
2,TEST-003,Yes,0.732355,0.267645


### **Verify Pipeline On the Original Dataset**

In [40]:
# Verify that the loaded pipeline produces the same results as the original
df = pd.read_csv('data.csv')

X = df.drop('Churn', axis=1)
y = df['Churn'].map({'Yes': 1, 'No': 0})

# Make predictions with loaded pipeline
loaded_predictions = loaded_pipeline.predict(X)
loaded_probabilities = loaded_pipeline.predict_proba(X)[:, 1]

In [41]:
loaded_pipeline.fit(X, y)

# Pipeline
y_test_pred = loaded_pipeline.predict(X)
y_test_proba = loaded_pipeline.predict_proba(X)[:, 1]

In [42]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

test_metrics = {
    'Accuracy': accuracy_score(y, y_test_pred),
    'Precision': precision_score(y, y_test_pred),
    'Recall': recall_score(y, y_test_pred),
    'F1-Score': f1_score(y, y_test_pred),
    'ROC-AUC': roc_auc_score(y, y_test_proba)
}

test_results_df = pd.DataFrame([test_metrics]).T
test_results_df.columns = ['Score']
test_results_df

Unnamed: 0,Score
Accuracy,0.717876
Precision,0.482759
Recall,0.883895
F1-Score,0.624457
ROC-AUC,0.860863


### **Test ChurnPredictor Wrapper**

In [43]:
# Test the ChurnPredictor wrapper class

# Make predictions using wrapper
wrapper_predictions = predictor.predict(sample_customers)
wrapper_probabilities = predictor.predict_proba(sample_customers)

# Display results
results_wrapper = pd.DataFrame({
    'CustomerID': sample_customers['customerID'],
    'Prediction': ['Churn' if p == 1 else 'Stay' for p in wrapper_predictions],
    'Churn_Risk': [f"{prob[1]:.1%}" for prob in wrapper_probabilities],
    'Risk_Category': ['High Risk' if prob[1] > 0.5 else 'Low Risk' for prob in wrapper_probabilities]
})

results_wrapper

Unnamed: 0,CustomerID,Prediction,Churn_Risk,Risk_Category
0,TEST-001,Churn,75.3%,High Risk
1,TEST-002,Stay,38.3%,Low Risk
2,TEST-003,Churn,73.2%,High Risk
