# Customer Churn Prediction (Telecom)
This project uses a logistic regression model to predict customer churn using Telco customer data.
We use feature engineering, data cleaning, and a full classification pipeline.

In [None]:
# Step 1: Load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [None]:
# Step 2: Load data
df = pd.read_csv('https://raw.githubusercontent.com/blastchar/telco-customer-churn/master/WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.head()

In [None]:
# Step 3: Data Cleaning and Feature Engineering
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna()
df = df.drop(['customerID'], axis=1)
df = pd.get_dummies(df, drop_first=True)

In [None]:
# Step 4: Train/Test Split
X = df.drop('Churn_Yes', axis=1)
y = df['Churn_Yes']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 5: Train Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
# Step 6: Evaluation
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print('ROC AUC:', roc_auc_score(y_test, model.predict_proba(X_test)[:,1]))