<a href="https://colab.research.google.com/github/apriandito/dl-python/blob/main/ANN_Predicting_Customer_Churn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Predicting Customer Churn**

### **Import Packages**

In [None]:
# Import Packages
import pandas as pd
import matplotlib.pyplot as plt


# Import Modules
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics

### **Load Data**

In [None]:
# Import the files to Google Colab
url = 'https://raw.githubusercontent.com/rc-dbe/bigdatacertification/master/dataset/churn.csv'
df = pd.read_csv(url, sep=';',)

In [None]:
# Show 5 first row
df.head()

In [None]:
# Show data information
df.info()

### **Data Preprocessing**

##### **Handling Missing Values**

In [None]:
# Search for Median Value
median = df['TotalCharges'].median()

# Use Median to Replace Missing Values
df['TotalCharges'].fillna(median, inplace=True)

# Check for Missing Values
df.info()

##### **Encode Categorical Variable**

In [None]:
# Encoder
encoder = OneHotEncoder(sparse=False)

# Encode Categorical Data
df2 = pd.DataFrame(encoder.fit_transform(df[['gender', 'SeniorCitizen', 'Partner',	'Dependents',	'PhoneService', 'InternetService',	'MultipleLines',	'OnlineSecurity',	'OnlineBackup',	'DeviceProtection',	'TechSupport',	'StreamingTV',	'StreamingMovies', 'Contract',	'PaperlessBilling', 'PaymentMethod']]))
df2.columns = encoder.get_feature_names(['gender', 'SeniorCitizen', 'Partner',	'Dependents',	'PhoneService', 'InternetService',	'MultipleLines',	'OnlineSecurity',	'OnlineBackup',	'DeviceProtection',	'TechSupport',	'StreamingTV',	'StreamingMovies', 'Contract',	'PaperlessBilling', 'PaymentMethod'])

# Replace Categorical Data with Encoded Data
df_encoded = df.drop(['gender', 'SeniorCitizen', 'Partner',	'Dependents',	'PhoneService', 'InternetService',	'MultipleLines',	'OnlineSecurity',	'OnlineBackup',	'DeviceProtection',	'TechSupport',	'StreamingTV',	'StreamingMovies', 'Contract',	'PaperlessBilling', 'PaymentMethod'] ,axis=1, inplace=True)
df_encoded = pd.concat([df, df2], axis=1)

# Replace Churn Values
df_encoded['Churn'].replace(['No','Yes'],[0,1],inplace=True)

# Drop Unwanted Column
df_encoded = df_encoded.drop("customerID", axis=1)

In [None]:
# Show Encoded Dataframe
df_encoded.head()

##### **Normalization**

In [None]:
# Select Columns
column_names = df_encoded.columns.tolist()
column_names.remove('Churn')
column_names

# Initialize min-max scaler
mm_scaler = MinMaxScaler()
df_norm = df_encoded.copy()

# Transform all attributes
df_norm[column_names] = mm_scaler.fit_transform(df_norm[column_names])
df_norm.sort_index(inplace=True)

In [None]:
# Show Normalized Dataframe
df_norm.head()

### **Set Feature**

In [None]:
# Selecting the Feature, by remove the unused feature 
feature = ['Churn']
train_feature = df_norm.drop(feature, axis=1)

# Set The Target
train_target = df_norm["Churn"]

In [None]:
# Show the feature
train_feature.head(5)

### **Split Data**

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(train_feature ,train_target, shuffle = True, test_size=0.3, random_state=1)

In [None]:
# Show the training data
X_train.head()

### **Training**

In [None]:
# Fitting Model
mlp = MLPClassifier(hidden_layer_sizes=(5,5,5), 
                    activation = 'relu',
                    solver = 'adam',
                    max_iter= 10000,
                    verbose = True).fit(X_train,y_train)

In [None]:
print('Number of Layer =', mlp.n_layers_)
print('Number of Iteration =', mlp.n_iter_)
print('Current loss computed with the loss function =', mlp.loss_)

### **Evaluation**

In [None]:
# Prediction to Test Dataset
y_pred = mlp.predict(X_test)

In [None]:
# Confussion Matrix
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix

In [None]:
# Show the Accuracy, Precision, Recall, and F1 Score
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("F1 Score:", metrics.f1_score(y_test, y_pred))

In [None]:
# Set Size and Style
plt.rcParams['figure.figsize'] = (10, 10)
plt.style.use('ggplot')

# ROC Curve
y_pred_proba = mlp.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="MLP, auc="+str(auc))
plt.title('ROC Curve - MLP')
plt.xlabel('false positive rate') 
plt.ylabel('true positive rate')
plt.legend(loc=4)
plt.show()