# Customer Churn Prediction
This notebook covers the task of predicting customer churn for a subscription-based service.
We will use the Telco Customer Churn dataset.
The notebook includes dataset download, preprocessing, model training, and evaluation.

In [None]:
# Install necessary libraries
!pip install -q pandas scikit-learn numpy


In [None]:
# Download dataset
import os
import urllib.request

dataset_url = 'https://raw.githubusercontent.com/blastchar/telco-customer-churn/master/WA_Fn-UseC_-Telco-Customer-Churn.csv'
dataset_path = 'telco_customer_churn.csv'

if not os.path.exists(dataset_path):
    print('Downloading dataset...')
    urllib.request.urlretrieve(dataset_url, dataset_path)
    print('Download complete.')
else:
    print('Dataset already exists.')


In [None]:
# Load dataset
import pandas as pd

df = pd.read_csv(dataset_path)
df.head()

## Data Preprocessing
- Handle missing values
- Convert categorical variables using one-hot encoding
- Encode target variable


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Drop customerID column
df.drop('customerID', axis=1, inplace=True)

# Convert TotalCharges to numeric, coerce errors
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Fill missing TotalCharges with median
df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)

# Encode target
label_encoder = LabelEncoder()
df['Churn'] = label_encoder.fit_transform(df['Churn'])

# One-hot encode categorical variables
df = pd.get_dummies(df, drop_first=True)

# Split data
X = df.drop('Churn', axis=1)
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Model Training
We will train a Gradient Boosting classifier to predict churn.

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score

model = GradientBoostingClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print('Accuracy:', accuracy_score(y_test, y_pred))
print('\nClassification Report:\n', classification_report(y_test, y_pred))