# Customer Churn 

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:
df

In [None]:
df.info()

In [None]:
del df['customerID']

 **Quick EDA using sweetviz**

In [None]:
import sweetviz as sv

#EDA using Autoviz
sweet_report = sv.analyze(df)


In [None]:
sweet_report.show_html('sweet_report.html')


In [None]:
print(df.value_counts(['Churn']))
plt.pie(df.value_counts(['Churn']),labels=['no','yes'],autopct='%.1f%%')
plt.show()

In [None]:
df['TotalCharges'].value_counts()

In [None]:
df.shape

In [None]:
df1 = df[df['TotalCharges'] != ' ']


In [None]:
df1.shape

In [None]:
df1['TotalCharges'] = df1['TotalCharges'].astype('float')

In [None]:
df1['TotalCharges']

In [None]:
obj_col = df1.select_dtypes('object').columns

In [None]:
for col in obj_col:
    print(col,':',df[col].unique())

In [None]:
df1.replace('No internet service','No',inplace = True)
df1.replace('No phone service','No',inplace=True)
df1.replace('DSL','Yes',inplace=True)
df1.replace('Fiber optic','Yes',inplace=True)

In [None]:
col = list(obj_col)

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [None]:
df1[col] = df1[col].apply(le.fit_transform)

In [None]:
col_scale = ['tenure', 'MonthlyCharges','TotalCharges']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df1[col_scale] = scaler.fit_transform(df1[col_scale])

In [None]:
df1

In [None]:
df1.DeviceProtection.value_counts()

In [None]:
X = df1.drop('Churn', axis = 1)
y = df1['Churn']

In [None]:
X.shape,y.shape

In [None]:
# Selecting Features using selectkbest gives best features based on chi-square test

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

In [None]:
bestfeatures = SelectKBest(score_func=chi2, k=6)
fit = bestfeatures.fit(X,y)
dfscores = pd.DataFrame(fit.scores_)

dfcolumns = pd.DataFrame(X.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Features','Score']
print(featureScores.nlargest(6,'Score'))

In [None]:
X_new= df1[['Contract','OnlineSecurity','TechSupport','tenure','OnlineBackup','MonthlyCharges']].values

In [None]:
X_new.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X_new,y,test_size = 0.3,random_state = 35,stratify=y)

In [None]:
# since the target(Churn) is imbalanced,it may not be a generalized model,so we're using SMOTE Upsampling to balance the data

In [None]:
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42, sampling_strategy = 'minority')
X_train_, y_train_ = sm.fit_resample(X_train, y_train)

In [None]:
X_train_.shape,y_train_.shape

In [None]:
y_train_.value_counts()

In [None]:
import dabl
aml = dabl.SimpleClassifier(random_state = 42)


In [None]:
aml.fit(X_train_,y_train_)

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(max_depth = 5,class_weight = 'balanced')
dt.fit(X_train_,y_train_)

In [None]:
from sklearn.ensemble import AdaBoostClassifier
adb  = AdaBoostClassifier(dt)
adb.fit(X_train_,y_train_)

In [None]:
y_pred_ml = adb.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix , classification_report

print(classification_report(y_test,y_pred_ml))

In [None]:
import tensorflow as tf

**Build a neural network model in tensorflow/keras**

In [None]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization

In [None]:
model = Sequential()
model.add(Dense(6, input_shape=(6,), activation='relu'))
model.add(Dense(18, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(14, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
model.fit(X_train_, y_train_, epochs=25,validation_split= 0.2)

In [None]:

plt.plot(model.history.history['accuracy'])
plt.plot(model.history.history['val_accuracy'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['accuracy', 'validation accuracy'])

In [None]:
model.evaluate(X_test, y_test)

In [None]:
model.summary()

In [None]:
yp= model.predict(X_test)

In [None]:
y_pred = []
for element in yp:
    if element > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
len(y_pred)

In [None]:
from sklearn.metrics import confusion_matrix , classification_report

print(classification_report(y_test,y_pred))