# 🔄 Customer Churn Prediction Project

In [None]:

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder


In [None]:

# Load dataset (replace this with actual dataset path)
df = pd.read_csv("Customer_data.csv")
df.head()


In [None]:

# Encode binary and categorical columns
df.replace(" ", np.nan, inplace=True)
df.dropna(inplace=True)
binary_cols = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
               'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
               'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']
df[binary_cols] = df[binary_cols].apply(LabelEncoder().fit_transform)
categorical_cols = ['InternetService', 'Contract', 'PaymentMethod']
df[categorical_cols] = df[categorical_cols].apply(LabelEncoder().fit_transform)


In [None]:

# Convert TotalCharges to float
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)


In [None]:

# Feature selection
X = df.drop(['customerID', 'Churn'], axis=1)
y = df['Churn']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


In [None]:

# Evaluation metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))


In [None]:

# Feature importance
plt.figure(figsize=(10,6))
sns.barplot(x=clf.feature_importances_, y=X.columns)
plt.title("Feature Importance")
plt.show()
