<a href="https://colab.research.google.com/github/emincingoz/Machine-Learning-Collection/blob/main/Customer_Churn_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Customer Churn Analysis

## Data Preprocessing

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/emincingoz/Machine-Learning-Collection/main/Datasets/Churn_Modelling.csv')
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


### Check Missing Values

In [3]:
print(data.isnull().sum())

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64


### Data Slicing

In [4]:
x = data.iloc[:, 3:13].values     # [CreditScore, EstimatedSalary]
y = data.iloc[:, 13].values       # Exited

### Data Encoding

In [5]:
from sklearn.preprocessing import LabelEncoder

le_geography = LabelEncoder()
x[:, 1] = le_geography.fit_transform(x[:, 1])

le_gender = LabelEncoder()
x[:, 2] = le_gender.fit_transform(x[:, 2])   # Female -> 0, Male -> 1


from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

ohe =ColumnTransformer([("ohe", OneHotEncoder(dtype = float), [1])], remainder = "passthrough")
x = ohe.fit_transform(x)
x = x[:, 1:]     # Dummy variable removed
print(x)

[[0.0 0.0 619 ... 1 1 101348.88]
 [0.0 1.0 608 ... 0 1 112542.58]
 [0.0 0.0 502 ... 1 0 113931.57]
 ...
 [0.0 0.0 709 ... 0 1 42085.58]
 [1.0 0.0 772 ... 1 0 92888.52]
 [0.0 0.0 792 ... 1 0 38190.78]]


In [6]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state = 0)

### Data Scaling

In [7]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.fit_transform(x_test)

## Artificial Neural Network Build

In [8]:
import keras
from keras.models import Sequential
from keras.layers import Dense         # for layers

# Create an ann model
ann = Sequential()

# Adding a hidden Layer to artificial neural network model
# units: Number of neuron
# activation: Activation function
  # relu: Rectified Linear Unit Activation Function (max(x, 0))
ann.add(Dense(units = 6, 
              #init = 'uniform', 
              activation = 'relu', 
              input_dim = 11))

# Hidden Layer
ann.add(Dense(units = 6, 
              #init = 'uniform', 
              activation = 'relu'))

# Output Layer
ann.add(Dense(units = 1,
              #init = 'uniform',
              activation = 'sigmoid'))

ann.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 6)                 72        
                                                                 
 dense_1 (Dense)             (None, 6)                 42        
                                                                 
 dense_2 (Dense)             (None, 1)                 7         
                                                                 
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________


### Compiling Layers and Training


In [9]:
ann.compile(optimizer = 'adam',
            loss = 'binary_crossentropy',
            metrics = ['accuracy'])

ann.fit(X_train, y_train, epochs = 50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7faea9111950>

### Making a Prediction

In [10]:
y_pred = ann.predict(X_test)
print(y_pred)

[[0.315771  ]
 [0.29263633]
 [0.13466904]
 ...
 [0.20480934]
 [0.57217294]
 [0.03467488]]


In [11]:
y_pred = (y_pred > 0.5)
print(y_pred)

[[False]
 [False]
 [False]
 ...
 [False]
 [ True]
 [False]]


### Confusion Matrix

In [15]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
 
cm = confusion_matrix(y_pred, y_test)
print(cm)

[[2503  364]
 [ 114  319]]
