<a href="https://colab.research.google.com/github/farzad-mos/Churn_prediction/blob/main/Churn_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 1:	Import	Necessary	Libraries	and	Load	Dataset
read the file in a .csv file

In [None]:
#  Data Handling import pandas  as pd import numpy as np

#  Visualization
import matplotlib.pyplot as plt
import seaborn as sns

#  Preprocessing
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

#  Model Evaluation
from sklearn.metrics import classification_report, confusion_matrix

#  Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

#  Load dataset
df = pd.read_csv('Telco-Customer-Churn.csv')

#  Display top rows
df.head()

### 2: Exploratory Data Analysis (EDA)

In [None]:
#  Dataset structure
df.info()

#  Check for missing values
df.isnull().sum()

#  TotalCharges has 11  blank values - investigate
df[df['TotalCharges'].isnull()]

#  Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

#  Drop rows with missing TotalCharges
df.dropna(subset=['TotalCharges'], inplace=True)

#  Check for class balance
sns.countplot(data=df, x='Churn')
plt.title("Class Distribution (Churn)") plt.show()

#  Quick correlation heatmap
plt.figure(figsize=(12, 6))
sns.heatmap(df.corr(numeric_only=True), annot=True)
plt.title("Correlation Heatmap")
plt.show()


### 3: Feature Engineering and Encoding

In [None]:
#  Drop customerID
df.drop('customerID', axis=1, inplace=True)

#  Binary features: Label Encoding
binary_cols = ['gender', 'Partner', 'Dependents', 'PhoneService',
               'PaperlessBilling', 'Churn']

le = LabelEncoder()
for col in binary_cols:
    df[col] = le.fit_transform(df[col])

#  Multicategory columns: One-Hot Encoding
multi_cat_cols = ['MultipleLines', 'InternetService', 'OnlineSecurity',
                  'OnlineBackup', 'DeviceProtection', 'TechSupport',
                  'StreamingTV', 'StreamingMovies', 'Contract', 'PaymentMethod']

df = pd.get_dummies(df, columns=multi_cat_cols)

#  Feature Scaling (tenure, MonthlyCharges, TotalCharges)
scaler = StandardScaler()
df[['tenure', 'MonthlyCharges', 'TotalCharges']] = scaler.fit_transform(df[[
    'tenure', 'MonthlyCharges', 'TotalCharges']])

#  Separate features and target
X = df.drop('Churn', axis=1)
y  = df['Churn']


### 4: Train-Test Split and Validation Set

In [None]:
#  Train-test split (80/20 with validation in training set)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                  stratify=y, random_state=42)

#  Further split training set to get validation data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,
                              test_size=0.2, stratify=y_train, random_state=42)





###   5: Build and Train ANN with Dropout Regularization

In [None]:
#  Define model. Here I used Dropout randomly drop 30%  of Neurons from the previous layer because the model over
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

#  Compile model
model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

#  Train model
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=50, batch_size=64, verbose=1)


### 6: Visualize Loss and Accuracy Curves

In [None]:
#  Accuracy plot
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch') plt.ylabel('Accuracy')
plt.legend()
plt.show()

#  Loss plot
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


### 7: Evaluate on Test Data

In [None]:
#  Predictions
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

#  Confusion Matrix
cm = confusion_matrix(y_test, y_pred) sns.heatmap(cm, annot=True,
    fmt="d", cmap="Blues") plt.title("Confusion Matrix") plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

#  Classification Report
print(classification_report(y_test, y_pred, target_names=['No Churn', 'Churn']))