In [15]:
# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Step 2: Load dataset
data = pd.read_csv("customer_churn_dataset-testing-master.csv")

# Step 3: Encode categorical columns
le = LabelEncoder()
for col in data.select_dtypes(include='object').columns:
    data[col] = le.fit_transform(data[col])

# Step 4: Split features and target
X = data.drop("Churn", axis=1)
y = data["Churn"]

# Step 5: Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Scale numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 7: Train the model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Step 8: Evaluate model
y_pred = model.predict(X_test_scaled)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 9: User input (interactive in Jupyter)
print("\n🔍 Customer Churn Prediction\n")

age = float(input("Enter Age: "))
tenure = float(input("Enter Tenure: "))
usage = float(input("Enter Monthly Usage: "))
spend = float(input("Enter Total Spend: "))
contract_length = float(input("Enter Contract Length: "))
gender = input("Enter Gender (Male/Female): ")
last_interaction = float(input("Enter Last Interaction (days): "))
subscription_type = input("Enter Subscription Type (Basic/Premium/Other): ")

# Step 10: Match training feature structure
# Create a DataFrame with all required features
input_data = pd.DataFrame({
    "Age": [age],
    "Tenure": [tenure],
    "Monthly Usage": [usage],
    "Total Spend": [spend],
    "Contract Length": [contract_length],
    "Gender": [1 if gender.lower() == "male" else 0],
    "Last Interaction": [last_interaction],
    "Subscription Type": [1 if subscription_type.lower() == "premium" else 0]
})

# Step 11: Apply same scaling
input_scaled = scaler.transform(input_data)

# Step 12: Predict churn
prediction = model.predict(input_scaled)[0]
probability = model.predict_proba(input_scaled)[0][1]

# Step 13: Output result
if prediction == 1:
    print(f"\n⚠️ The customer is likely to CHURN. (Probability: {probability:.2f})")
else:
    print(f"\n✅ The customer is NOT likely to churn. (Probability: {probability:.2f})")


✅ Accuracy: 0.8653980582524272

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.87      0.87      6793
           1       0.86      0.86      0.86      6082

    accuracy                           0.87     12875
   macro avg       0.86      0.86      0.86     12875
weighted avg       0.87      0.87      0.87     12875


🔍 Customer Churn Prediction



Enter Age:  22
Enter Tenure:  2003
Enter Monthly Usage:  23
Enter Total Spend:  20000
Enter Contract Length:  3232
Enter Gender (Male/Female):  male
Enter Last Interaction (days):  2
Enter Subscription Type (Basic/Premium/Other):  Basic


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Monthly Usage
Feature names seen at fit time, yet now missing:
- CustomerID
- Payment Delay
- Support Calls
- Usage Frequency
