<a href="https://colab.research.google.com/github/agnel-kf/Disney-Dataset-Creation-Project/blob/main/Churn_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import tensorflow as tf
import numpy as np

Data Preprocessing

In [5]:
dataset = pd.read_csv('/Churn_Modelling.csv')
X = dataset.iloc[:,3:-1].values
y = dataset.iloc[:,-1].values
y.reshape(len(y),1)
print(X)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


# Encoding
## Label Encoding

In [6]:
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.compose import ColumnTransformer
le = LabelEncoder()
X[:,2] = le.fit_transform(X[:,2])
print(X)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


## One Hot Encoding

In [7]:
ct = ColumnTransformer( transformers = [
                         ('encoder',OneHotEncoder(),[1])
],remainder = 'passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


In [8]:
from sklearn.model_selection import train_test_split
train_X,val_X,train_y,val_y = train_test_split(X,y,test_size = 0.25,random_state = 1)

# Feature Scaling

In [19]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
train_X = sc.fit_transform(train_X)
val_X = sc.transform(val_X)

In [10]:
print(train_X)

[[ 1.00080032 -0.58207179 -0.57324462 ...  0.64262086  0.98333878
  -1.48050131]
 [ 1.00080032 -0.58207179 -0.57324462 ...  0.64262086 -1.01694352
  -1.56523002]
 [-0.99920032 -0.58207179  1.74445599 ...  0.64262086 -1.01694352
  -1.18210827]
 ...
 [ 1.00080032 -0.58207179 -0.57324462 ...  0.64262086  0.98333878
  -0.13896318]
 [ 1.00080032 -0.58207179 -0.57324462 ...  0.64262086  0.98333878
   0.01979185]
 [-0.99920032  1.71800114 -0.57324462 ...  0.64262086 -1.01694352
  -1.15605488]]


# Building the model
## Initialising the model

In [11]:
ann = tf.keras.models.Sequential()

## Adding the input layer and hidden layer

In [12]:
ann.add(tf.keras.layers.Dense(units = 6,activation = 'relu'))

## Adding the second hidden layer

In [13]:
ann.add(tf.keras.layers.Dense(units = 6,activation = 'relu'))

## Adding the output layer

In [14]:
ann.add(tf.keras.layers.Dense(units = 1,activation = 'sigmoid'))

# Training ANN on the Training Test
## Compiling the ANN

In [15]:
ann.compile(optimizer = 'adam',loss = 'binary_crossentropy',metrics = ['accuracy'])

In [None]:
ann.fit(train_X,train_y,batch_size =32,epochs = 100 )

# Predicting and Evaluating the Test Set

In [24]:
y_preds = ann.predict(val_X)
y_preds = (y_preds > 0.5)
print(np.concatenate((y_preds.reshape(len(y_preds),1), val_y.reshape(len(val_y),1)),1))

[[1 0]
 [1 0]
 [1 0]
 ...
 [1 1]
 [1 0]
 [1 0]]


## Predicting an individual row

In [28]:
example = sc.transform([[ 1,0,0,1,600,40,3,60000,2,1,1,50000]])
example_prediction = ann.predict(example)
print(example_prediction)

[[1.]]
