# **Churn modelling using ANN**

## Setting up the development environment by importing required libraries and modules.

In [None]:
import pandas as pd 
import numpy as np 
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score

#### Numpy: It will provide the support for efficient numerical computation.
#### Pandas: It is convenient library that supports dataframes. Working with pandas will bring ease in many crucial data operations.
### sklearn.preprocessing:
####  * LabelEncoder:LabelEncoder encode labels with a value between 0 and n_classes-1 where n is the number of distinct labels.
####  * OneHotEncoder:One-hot encoding is used in machine learning as a method to quantify categorical data. In short, this method produces a vector with length equal to the number of categories in the data set.
#### * StandardScaler:Standardize features by removing the mean and scaling to unit variance.
### sklearn.compose:
#### * This estimator allows different columns or column subsets of the input to be transformed separately and the features generated by each transformer will be concatenated to form a single feature space.
### sklearn.model_selection:
#### * train_test_split: It splits data in training and test set as x_train,x_test,y_train,y_test. 
### sklearn.metrics:
#### * confusion_matrix:Compute confusion matrix to evaluate the accuracy of a classification.
#### * accuracy_score:In multilabel classification, this function computes subset accuracy: the set of labels predicted for a sample must exactly match the corresponding set of labels in y_true.

## importing dataset


In [None]:
data=pd.read_csv("../input/churnmodelling/Churn_Modelling.csv")

In [None]:
data.head()

In [None]:
data.shape

## About dataset 
 ### * This data set contains details of a bank's customers and the target variable is a binary variable reflecting the fact whether the customer left the bank (closed his account) or he continues to be a customer.
 ### * Dataset consist of 14 columns out of which 13 are independent variable and 14th column is dependent variabll.
 ### * It consist of 10000 rows

# Data Preprocessing

## seprating independent variable (x) and dependent variable(y) .

In [None]:
x=data.iloc[:,3:-1]
y=data.iloc[:,-1]
print("x:")
print(x.head())
print("  y:")
print(y.head())

### As we can see we have two categorical features 1:Geography ,2:Gender
### checking there unique names so to encode them.

In [None]:
print(x.iloc[:,1].unique())
print(x.iloc[:,2].unique())

### Coverting the data in numpy-array by taking only the values of each column

In [None]:
x=data.iloc[:,3:-1].values
y=data.iloc[:,-1].values
x

### From the above we can see that gender has two values(male and females) so we will do label encoding for gender column and Geography column have 3 values (France,Spain,Germany) so we will do OneHotEndcoding for that.

In [None]:
Le=LabelEncoder()
x[:,2]=Le.fit_transform(x[:,2])
print(Le.classes_)


In [None]:
ct=ColumnTransformer(transformers=[( 'OneHotEncoder',OneHotEncoder(), [1])], remainder='passthrough')
x=np.array(ct.fit_transform(x))
x

### So now after encoding we get values as follow:
 ### female:0 , Male:1
 ### France: 1 0 0,  Spain: 0 0 1  , Germany: 0 1 0

### Spliting data in train test split.

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size = 0.2, random_state = 0)

## Scaling the training data so that it can be input in neural networks

In [None]:
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.fit_transform(x_test)

# Building Artificial Neural Network

## Initializing ANN

In [None]:
ann=tf.keras.models.Sequential()

##  hidden Layers

In [None]:
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))

## Output Layer

In [None]:
ann.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))


# Training ANN

## Compiling ANN

In [None]:
ann.compile(optimizer='adam',loss='binary_crossentropy' ,metrics=['accuracy'])

## Fitting ANN

In [None]:
ann.fit(x_train,y_train,batch_size=64,epochs=200)

## As we can see that it is giving 86% accuracy

## Making prediction and evaluating results.

In [None]:
y_pred=ann.predict(x_test)
y_pred=(y_pred>0.5)
y_pred


### calculating accuracy and confusion matrix

In [None]:
cm=confusion_matrix(y_test,y_pred)
print('confusion_matrix:')
print(cm)
accuracy=accuracy_score(y_test,y_pred)
print("acuuracy:",accuracy)

### Geography: spain
### Credit Score : 600
### Gender: Male
### Age: 40 years old
### Tenure: 3 years
### Balance: 60000 usd
### Number of Products: 2
### Does this customer have a credit card ? Yes
### Is this customer an Active Member: Yes
### Estimated Salary: 50000 usd

In [None]:
new_pred=ann.predict(sc.transform(np.array([[1,0,0,600,1,40,3,60000,2,1,1,50000]])))
new_pred=(new_pred>0.5)
print(new_pred)