<a href="https://colab.research.google.com/github/azamjon98/final_project/blob/main/Customer_churn_telecom_data_set.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report,roc_auc_score, accuracy_score, f1_score, precision_score, recall_score, ConfusionMatrixDisplay
from tensorflow.keras.layers import Normalization,Dense, InputLayer
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
!pip install bayesian-optimization
from bayes_opt import BayesianOptimization
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [None]:
url_link='https://github.com/myasmin/Teleco-Churn-Data-Analysis/raw/main/Telco_customer_churn.xlsx'
df=pd.read_excel(url_link)
df.head()

### We should drop columns including  `'CustomerID'`,`'Churn Score'`,`'CLTV'`,`'Churn Reason'`. Because 1st one expresses just id of the customer and the othersexpresses exit interview results.




In [None]:
df.drop(columns=['CustomerID','Churn Score','CLTV','Churn Reason'],inplace=True)
df.head()

In [None]:
df['Count'].unique(), df['State'].unique(), df['Country'].unique()

### We can also drop columns consist of single values: `'Count','Country','State','Lat Long'`

In [None]:
df.drop(columns=['Count','Country','State','Lat Long'],inplace=True)
df.head()

### Column `Churn Label` is the same as `Churn Value` so we can drop it also

In [None]:
df.drop(columns=['Churn Label'],inplace=True)
df.head()

### We extract numerical and categorical data

In [None]:
df_cat=df.select_dtypes(include='object')
df_cat.head()

### `Column Total Charges` must be numerical we convert into numerical values.

In [None]:
df_cat['Total Charges']=df_cat['Total Charges'].replace(" ",0,regex=True)
df_cat['Total Charges']=pd.to_numeric(df_cat['Total Charges'])
df_cat.head()

### we have coordinates of the customer so that we do not need `Zip Code` of their address

In [None]:
df_num=df.select_dtypes(exclude='object')
df_num.drop(columns=['Zip Code'],inplace=True)
df_num.head()

In [None]:
df=pd.concat([df_cat,df_num],axis=1)
df.head()

**We should replace all free spaces `" "` with `'_'`**

In [None]:
df.columns=df.columns.str.replace(' ','_')

In [None]:
df['City']=df['City'].str.replace(' ','_')
df['Internet Service']=df['Internet Service'].str.replace(' ','_')
df['Payment Method']=df['Payment Method'].str.replace(' ','_')
df['Contract']=df['Contract'].str.replace(' ','_')
df.head()

### Specify the features and the target values

In [None]:
X=df.drop(columns=['Churn_Value'])
y=df['Churn_Value']

**For categorical data we use `One-Hot Encoding`**

In [None]:
X=pd.get_dummies(X,drop_first=True)
X.head()

In [None]:
y.value_counts(normalize=True)

**We have imbalanced data so that we should use stratified sampling**

**We are going to train with deep neural networks and compare the results with classical ML algorithm**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,stratify=y, random_state=42) # divide into test and train sets

X_train=tf.constant(X_train) # convert into tensor values
y_train=tf.constant(y_train)

normalizer=Normalization(axis=-1) # normalize them they bust be in the same range
norm_array=tf.constant([np.arange(3,1180),  # data has 1177 features so we need numbers between 3 and 1180
                        np.arange(4,1181)]
                        )

normalizer.adapt(norm_array)
normalizer(norm_array)
normalizer.adapt(X_train)
X_train=normalizer(X_train)

def create_model():  # model function consist of input, normaliation, and hidden dense layers with activation which is relu
    normalizer = Normalization(axis=-1)
    normalizer.adapt(X_train)

    model = tf.keras.Sequential([
        InputLayer(input_shape=(1177,)),
        normalizer,
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(1,activation='sigmoid')  # output layer with activation which is sigmoid
    ])



    model.compile(optimizer='adam',    # we use adam optimizer and target is binary classification so we use binary_crossentropy as a loss function
                 loss='binary_crossentropy',
                  metrics=['AUC']) # data is imbalanced so using AUC is better

    return model

# Create and train model with negative MSE loss
model_clf = create_model()
print(model_clf.summary())

In [None]:
# Train the model in 20 epochs
history = model_clf.fit(X_train, y_train,
                   epochs=20,
                   batch_size=32,
                   validation_split=0.2,
                   verbose=1) # verbose 1 means it shows all results in each epoch

**Check the model with test data**

In [None]:
X_test=tf.constant(X_test)
y_test=tf.constant(y_test)

normalizer.adapt(X_test)
X_test=normalizer(X_test)

pred2=np.round(model_clf.predict(X_test))
cm=confusion_matrix(y_test,pred2)
tn, fp, fn, tp = cm.ravel()

scores = {
        'ROC-AUC': roc_auc_score(y_test, pred2),
        'Accuracy': accuracy_score(y_test, pred2),
        'F1': f1_score(y_test, pred2),
        'Precision': precision_score(y_test, pred2),
        'Sensitivity (TPR / Recall)': recall_score(y_test, pred2),
        'Specificity (TNR)': tn / (tn+fp)
    }

print('Testing Set Scores:')
for metric, score in scores.items():
    print(f'- {metric}: {score:.4f}')
    print()

ConfusionMatrixDisplay(cm, display_labels=['Not Churned', 'Churned']).plot(cmap=plt.cm.Blues, colorbar=False)
plt.title('Confusion Matrix');

**we will use Classic ML algorithm**

**XGBOOST classifier without defining optimization parameters**

In [None]:
clf=XGBClassifier()
clf.fit(X,y)
pred1=clf.predict(X)
cm=confusion_matrix(y,pred1)
tn, fp, fn, tp = cm.ravel()
scores = {
        'ROC-AUC': roc_auc_score(y, pred1),
        'Accuracy': accuracy_score(y, pred1),
        'F1': f1_score(y, pred1),
        'Precision': precision_score(y, pred1),
        'Sensitivity (TPR / Recall)': recall_score(y, pred1),
        'Specificity (TNR)': tn / (tn+fp)
    }

print('Testing Set Scores:')
for metric, score in scores.items():
    print(f'- {metric}: {score:.4f}')
    print()

ConfusionMatrixDisplay(cm, display_labels=['Not Churned', 'Churned']).plot(cmap=plt.cm.Blues, colorbar=False)
plt.title('Confusion Matrix');

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,stratify=y, random_state=42)

**XGBOOST classifier without defining optimization parameters. we are going to use bayesian optimisation to find best parameters and use StratifiedKFold cross validation**

In [None]:
stratified_kfold = StratifiedKFold(n_splits=5)

def xgb_cv(learning_rate, n_estimators, scale_pos_weight, max_depth, gamma, subsample, colsample_bytree, reg_lambda):
    """
    Calculate cross-validated ROC AUC score for an XGBoost classifier with given hyperparameters.
    Returns: Mean ROC AUC score of the cross-validated model (float).
    """
    model = XGBClassifier(scale_pos_weight=scale_pos_weight,
                          max_depth=int(max_depth),
                          gamma=gamma,
                          subsample=subsample,
                          colsample_bytree=colsample_bytree,
                          reg_lambda=reg_lambda,
                          learning_rate=learning_rate,
                          n_estimators=int(n_estimators),
                          random_state=42,
                          eval_metric='auc')
    return np.mean(cross_val_score(model, X_train, y_train, cv=stratified_kfold, scoring='roc_auc'))

def optimize_xgb():
    '''
    Optimize hyperparameters for an XGBoost classifier using Bayesian Optimization.
    Returns: Dictionary containing the best hyperparameters found by the optimization process.
    '''
    def xgb_crossval(learning_rate, n_estimators, scale_pos_weight, max_depth, gamma, subsample, colsample_bytree, reg_lambda):
        '''
        Function to be maximized using Bayesian Optimization.
        '''
        return xgb_cv(learning_rate, n_estimators, scale_pos_weight, max_depth, gamma, subsample, colsample_bytree, reg_lambda)

    optimizer = BayesianOptimization(
        f=xgb_crossval,
        pbounds={
            'scale_pos_weight': (3.6, 3.6),
            'max_depth': (3, 3),
            'gamma': (5.4, 5.4),
            'subsample': (1, 1),
            'colsample_bytree': (0.4, 0.4),
            'reg_lambda': (14, 14),
            'learning_rate': (0.07, 0.07),
            'n_estimators':(240, 240)
        },
        random_state=42,
    )
    optimizer.maximize(n_iter=20)
    return optimizer.max

best_params = optimize_xgb()['params']
print('Best Hyperparameters found by Bayesian Optimization:\n', best_params, '\n')

# Train the XGBoost classifier with the best hyperparameters
best_xgb = XGBClassifier(
    scale_pos_weight=best_params['scale_pos_weight'],
    max_depth=int(best_params['max_depth']),
    gamma=best_params['gamma'],
    subsample=best_params['subsample'],
    colsample_bytree=best_params['colsample_bytree'],
    reg_lambda=best_params['reg_lambda'],
    learning_rate=best_params['learning_rate'],
    n_estimators=int(best_params['n_estimators']),
    random_state=42
)
best_xgb.fit(X_train, y_train)

In [None]:
def get_results(model):
    '''
    Calculate and print various performance metrics based on the predictions made by the model on the test set.

    Parameters:
    model: The trained machine learning model

    Returns: None
    '''
    y_pred = model.predict(X_test)
    y_score = model.predict_proba(X_test)[:, 1]

    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = cm.ravel()

    scores = {
        'ROC-AUC': roc_auc_score(y_test, y_score),
        'Accuracy': accuracy_score(y_test, y_pred),
        'F1': f1_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Sensitivity (TPR / Recall)': recall_score(y_test, y_pred),
        'Specificity (TNR)': tn / (tn+fp)
    }

    print('Testing Set Scores:')
    for metric, score in scores.items():
        print(f'- {metric}: {score:.4f}')
    print()

    ConfusionMatrixDisplay(cm, display_labels=['Not Churned', 'Churned']).plot(cmap=plt.cm.Blues, colorbar=False)
    plt.title('Confusion Matrix');

In [None]:
get_results(best_xgb)

**Data is small so Classical ML algorithm is better than deep neural networks**