<a href="https://colab.research.google.com/github/azamjon98/final_project/blob/main/Customer_churn_telecom_data_set.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# 3 dice probability distribution
stat_d=[]
for i in range(1,7):
    for j in range(1,7):
        for k in range(1,7):
            stat_d.append(i+j+k)

In [None]:
x = np.array(stat_d)
unique, counts = np.unique(x, return_counts=True)
plt.figure(figsize=(10, 6))
plt.plot(unique, counts/216*100, '-o')
plt.grid()
plt.xlabel('Sum of dice')
plt.ylabel('Probability  %')
plt.title('Probability distribution of the sum of three dice')
plt.xticks(np.arange(3,19))
plt.yticks(np.arange(0,15,1))
plt.show()

In [None]:
exp=np.sum(unique*counts/216)
print('Expected value',exp)
print('standard veviation:',np.std(x))
print('Lower bound:',round(exp -np.std(x)))
print('Upper bound:',round(exp +np.std(x)))

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(unique, counts.cumsum()/216*100, '-o')
plt.grid()
plt.xlabel('Sum of dice')
plt.ylabel('Probability  %')
plt.title('Cumulative Probability distribution of the sum of three dice')
plt.xticks(np.arange(3,19))
plt.yticks(np.arange(0,101,5))
plt.show()

In [None]:
url_link='https://github.com/myasmin/Teleco-Churn-Data-Analysis/raw/main/Telco_customer_churn.xlsx'
df=pd.read_excel(url_link)
df.head()

In [None]:
df.drop(columns=['CustomerID','Churn Score','CLTV','Churn Reason'],inplace=True)
df.head()

In [None]:
df['Count'].unique(), df['State'].unique(), df['Country'].unique()

In [None]:
df.drop(columns=['Count','Country','State','Lat Long'],inplace=True)
df.head()

In [None]:
df.drop(columns=['Churn Label'],inplace=True)
df.head()

In [None]:
df_cat=df.select_dtypes(include='object')
df_cat.head()

In [None]:
df_cat['Total Charges']=df_cat['Total Charges'].replace(" ",0,regex=True)
df_cat['Total Charges']=pd.to_numeric(df_cat['Total Charges'])
df_cat.head()

In [None]:
df_num=df.select_dtypes(exclude='object')
df_num.drop(columns=['Zip Code'],inplace=True)
df_num.head()

In [None]:
df=pd.concat([df_cat,df_num],axis=1)
df.head()

In [None]:
df['City']=df['City'].str.replace(' ','_')
df['Internet Service']=df['Internet Service'].str.replace(' ','_')
df['Payment Method']=df['Payment Method'].str.replace(' ','_')
df['Contract']=df['Contract'].str.replace(' ','_')
df.head()

In [None]:
df.columns=df.columns.str.replace(' ','_')

In [None]:
df.head()

In [None]:
X=df.drop(columns=['Churn_Value'])
y=df['Churn_Value']

In [None]:
X=pd.get_dummies(X, dtype=float)
X.head()

In [None]:
y.value_counts(normalize=True)

In [None]:
import tensorflow as tf
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report,roc_auc_score, accuracy_score, f1_score, precision_score, recall_score, ConfusionMatrixDisplay
from tensorflow.keras.layers import Normalization,Dense, InputLayer
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

In [None]:
X1=X.copy()
y1=y.copy()

In [None]:
X1=tf.constant(X1)
y1=tf.constant(y1)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,stratify=y, random_state=42)

X_train=tf.constant(X_train)
y_train=tf.constant(y_train)

normalizer=Normalization(axis=-1)
norm_array=tf.constant([np.arange(3,1180),
                        np.arange(4,1181)]
                        )

normalizer.adapt(norm_array)
normalizer(norm_array)
normalizer.adapt(X_train)
X_train=normalizer(X_train)

def create_model():
    normalizer = Normalization(axis=-1)
    normalizer.adapt(X_train)

    model = tf.keras.Sequential([
        InputLayer(input_shape=(1177,)),
        normalizer,
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(1,activation='sigmoid')
    ])



    model.compile(optimizer='adam',
                 loss='binary_crossentropy',
                  metrics=['AUC'])

    return model

# Create and train model with negative MSE loss
model_clf = create_model()
print(model_clf.summary())

In [None]:
# Train the model
history = model_clf.fit(X_train, y_train,
                   epochs=20,
                   batch_size=32,
                   validation_split=0.2,
                   verbose=1)

In [None]:
X_test=tf.constant(X_test)
y_test=tf.constant(y_test)

normalizer.adapt(X_test)
X_test=normalizer(X_test)

pred2=np.round(model_clf.predict(X_test))
cm=confusion_matrix(y_test,pred2)
tn, fp, fn, tp = cm.ravel()

scores = {
        'ROC-AUC': roc_auc_score(y_test, pred2),
        'Accuracy': accuracy_score(y_test, pred2),
        'F1': f1_score(y_test, pred2),
        'Precision': precision_score(y_test, pred2),
        'Sensitivity (TPR / Recall)': recall_score(y_test, pred2),
        'Specificity (TNR)': tn / (tn+fp)
    }

print('Testing Set Scores:')
for metric, score in scores.items():
    print(f'- {metric}: {score:.4f}')
    print()

ConfusionMatrixDisplay(cm, display_labels=['Not Churned', 'Churned']).plot(cmap=plt.cm.Blues, colorbar=False)
plt.title('Confusion Matrix');

In [None]:
clf=XGBClassifier()
clf.fit(X,y)
pred1=clf.predict(X)
cm=confusion_matrix(y,pred1)
tn, fp, fn, tp = cm.ravel()
scores = {
        'ROC-AUC': roc_auc_score(y, pred1),
        'Accuracy': accuracy_score(y, pred1),
        'F1': f1_score(y, pred1),
        'Precision': precision_score(y, pred1),
        'Sensitivity (TPR / Recall)': recall_score(y, pred1),
        'Specificity (TNR)': tn / (tn+fp)
    }

print('Testing Set Scores:')
for metric, score in scores.items():
    print(f'- {metric}: {score:.4f}')
    print()

ConfusionMatrixDisplay(cm, display_labels=['Not Churned', 'Churned']).plot(cmap=plt.cm.Blues, colorbar=False)
plt.title('Confusion Matrix');

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
!pip install bayesian-optimization
from bayes_opt import BayesianOptimization

In [None]:
X

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,stratify=y, random_state=42)

In [None]:
stratified_kfold = StratifiedKFold(n_splits=5)

def xgb_cv(learning_rate, n_estimators, scale_pos_weight, max_depth, gamma, subsample, colsample_bytree, reg_lambda):
    """
    Calculate cross-validated ROC AUC score for an XGBoost classifier with given hyperparameters.
    Returns: Mean ROC AUC score of the cross-validated model (float).
    """
    model = XGBClassifier(scale_pos_weight=scale_pos_weight,
                          max_depth=int(max_depth),
                          gamma=gamma,
                          subsample=subsample,
                          colsample_bytree=colsample_bytree,
                          reg_lambda=reg_lambda,
                          learning_rate=learning_rate,
                          n_estimators=int(n_estimators),
                          random_state=42,
                          eval_metric='auc')
    return np.mean(cross_val_score(model, X_train, y_train, cv=stratified_kfold, scoring='roc_auc'))

def optimize_xgb():
    '''
    Optimize hyperparameters for an XGBoost classifier using Bayesian Optimization.
    Returns: Dictionary containing the best hyperparameters found by the optimization process.
    '''
    def xgb_crossval(learning_rate, n_estimators, scale_pos_weight, max_depth, gamma, subsample, colsample_bytree, reg_lambda):
        '''
        Function to be maximized using Bayesian Optimization.
        '''
        return xgb_cv(learning_rate, n_estimators, scale_pos_weight, max_depth, gamma, subsample, colsample_bytree, reg_lambda)

    optimizer = BayesianOptimization(
        f=xgb_crossval,
        pbounds={
            'scale_pos_weight': (3.6, 3.6),
            'max_depth': (3, 3),
            'gamma': (5.4, 5.4),
            'subsample': (1, 1),
            'colsample_bytree': (0.4, 0.4),
            'reg_lambda': (14, 14),
            'learning_rate': (0.07, 0.07),
            'n_estimators':(240, 240)
        },
        random_state=42,
    )
    optimizer.maximize(n_iter=20)
    return optimizer.max

best_params = optimize_xgb()['params']
print('Best Hyperparameters found by Bayesian Optimization:\n', best_params, '\n')

# Train the XGBoost classifier with the best hyperparameters
best_xgb = XGBClassifier(
    scale_pos_weight=best_params['scale_pos_weight'],
    max_depth=int(best_params['max_depth']),
    gamma=best_params['gamma'],
    subsample=best_params['subsample'],
    colsample_bytree=best_params['colsample_bytree'],
    reg_lambda=best_params['reg_lambda'],
    learning_rate=best_params['learning_rate'],
    n_estimators=int(best_params['n_estimators']),
    random_state=42
)
best_xgb.fit(X_train, y_train)

In [None]:
def get_results(model):
    '''
    Calculate and print various performance metrics based on the predictions made by the model on the test set.

    Parameters:
    model: The trained machine learning model

    Returns: None
    '''
    y_pred = model.predict(X_test)
    y_score = model.predict_proba(X_test)[:, 1]

    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = cm.ravel()

    scores = {
        'ROC-AUC': roc_auc_score(y_test, y_score),
        'Accuracy': accuracy_score(y_test, y_pred),
        'F1': f1_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Sensitivity (TPR / Recall)': recall_score(y_test, y_pred),
        'Specificity (TNR)': tn / (tn+fp)
    }

    print('Testing Set Scores:')
    for metric, score in scores.items():
        print(f'- {metric}: {score:.4f}')
    print()

    ConfusionMatrixDisplay(cm, display_labels=['Not Churned', 'Churned']).plot(cmap=plt.cm.Blues, colorbar=False)
    plt.title('Confusion Matrix');

In [None]:
get_results(best_xgb)