In [7]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Set random seed
np.random.seed(30)

# Load data (✅ corrected path)
data = pd.read_csv(r'G:\churn prediction\WA_Fn-UseC_-Telco-Customer-Churn.csv')

# Handle TotalCharges: convert to numeric and drop rows with errors
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data = data.dropna()

# Encode categorical features
label_encoder = LabelEncoder()
for col in data.select_dtypes(include=['object']).columns:
    if col != 'customerID':
        data[col] = label_encoder.fit_transform(data[col])

# Features and target
X = data.drop(['Churn', 'customerID'], axis=1)
y = data['Churn']

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=30, stratify=y)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression model
model = LogisticRegression(class_weight='balanced')
model.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("✅ Model Accuracy:", accuracy)
print("\n✅ Confusion Matrix:\n", conf_matrix)
print("\n✅ Classification Report:\n", class_report)

# -----------------------
# Business logic: Discount based on visits
# -----------------------
VISIT_THRESHOLD = 20
customer_visit_data = pd.DataFrame({
    'customerID': [1001, 1002, 1003],
    'website_visits': [25, 15, 22],
    'last_purchase_days': [120, 30, 90],
    'discount_sent': [0, 0, 0]
})

for idx, row in customer_visit_data.iterrows():
    if row['website_visits'] > VISIT_THRESHOLD and row['last_purchase_days'] > 60:
        print(f"🎯 Discount sent to customer {row['customerID']} due to high site visits without recent purchases")
        customer_visit_data.at[idx, 'discount_sent'] = 1

print("\n✅ Updated customer visit data with discount status:")
print(customer_visit_data)

# -----------------------
# Predict churn for a new customer
# -----------------------
# Prepare new customer data (same structure as training data)
new_customer_data = pd.DataFrame({
    'gender': [1],
    'SeniorCitizen': [0],
    'Partner': [1],
    'Dependents': [0],
    'tenure': [12],
    'PhoneService': [1],
    'MultipleLines': [0],
    'InternetService': [1],
    'OnlineSecurity': [0],
    'OnlineBackup': [1],
    'DeviceProtection': [0],
    'TechSupport': [0],
    'StreamingTV': [1],
    'StreamingMovies': [0],
    'Contract': [2],
    'PaperlessBilling': [1],
    'PaymentMethod': [1],
    'MonthlyCharges': [80.5],
    'TotalCharges': [960.0]
})

# Fill any missing columns if needed
missing_cols = set(X.columns) - set(new_customer_data.columns)
for col in missing_cols:
    new_customer_data[col] = 0
new_customer_data = new_customer_data[X.columns]

# Scale new customer data
new_customer_scaled = scaler.transform(new_customer_data)

# Predict churn probability
churn_probability = model.predict_proba(new_customer_scaled)[0][1]
print(f"\n🔍 Probability of losing new customer: {churn_probability:.2f}")


✅ Model Accuracy: 0.7526652452025586

✅ Confusion Matrix:
 [[756 277]
 [ 71 303]]

✅ Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.73      0.81      1033
           1       0.52      0.81      0.64       374

    accuracy                           0.75      1407
   macro avg       0.72      0.77      0.72      1407
weighted avg       0.81      0.75      0.77      1407

🎯 Discount sent to customer 1001 due to high site visits without recent purchases
🎯 Discount sent to customer 1003 due to high site visits without recent purchases

✅ Updated customer visit data with discount status:
   customerID  website_visits  last_purchase_days  discount_sent
0        1001              25                 120              1
1        1002              15                  30              0
2        1003              22                  90              1

🔍 Probability of losing new customer: 0.45
