In [None]:
import warnings
warnings.filterwarnings("ignore") #used to stop unwanted warnings from showing when you execute codes

#import necessary libraries 
#pandas for data manipulation and analysis
#matplotlib and seaborn for visualization
#numpy for manipulation of numeric data adn arrays

import pandas as pd, matplotlib.pyplot as plt, numpy as np
import seaborn as sns

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

In [None]:
from tensorflow import keras
from tensorflow.keras.models import Sequential , load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.utils import to_categorical

In [None]:
#import and read the dataset
data = pd.read_csv('/kaggle/input/churn-modelling/Churn_Modelling.csv')
data.head() #shows only first 5 rows by default

In [None]:
data.isnull().sum() #checking for null values in the dataset

In [None]:
data.duplicated().sum() #checking for duplicated values in the dataset

In [None]:
data.info() #information about the data

In [None]:
data.describe() #returns a descriptive statistics summary 

In [None]:
cols = ["RowNumber", "CustomerId", "Surname"]
data = data.drop(columns = cols , axis=1)

In [None]:
data.head()

In [None]:
plt.figure(figsize=(12, 5))
data.boxplot()
plt.show()

In [None]:
#splitting data into target and features
x= data.iloc[: , :-1] #features
y=data.iloc[:, -1] #target

In [None]:
x.head()

In [None]:
y.head()

### Removing outliers

In [None]:
x.skew(numeric_only=None)

if skew < 0.5 => there are no outliers => normal distribution

we can notice that the variables "Age"  have many outliers 

In [None]:
x.boxplot(column=['Age'], return_type='axes');

In [None]:
from scipy.stats import norm
sns.distplot(data.Age, fit = norm) 
#displays the plot of the dataset compared to the normal distribution

In [None]:
age_log = np.log(data.Age) #applying log function to the dataset

In [None]:
sns.distplot(age_log, fit = norm) 

### Converting categorical variables:

In [None]:
data.select_dtypes(exclude='number')

#### Transform column : gender

In [None]:
ohe = OneHotEncoder() #initialize one hot encoder object
transformed = ohe.fit_transform(x[['Gender']]) #fit transform the feature "state"
#print(transformed.toarray())

In [None]:
print(ohe.categories_) #print categorical values of the features "state"

In [None]:
x[ohe.categories_[0]] = transformed.toarray() 
#convert the transformed categorical variables into a numpy array and add them to the dataset x
x.head()

#### Transform column Geography:

In [None]:
data.Geography.unique()

In [None]:
ohe1 = OneHotEncoder() #initialize one hot encoder object
transformed1 = ohe1.fit_transform(x[['Geography']]) #fit transform the feature "state"
#print(transformed1.toarray())

In [None]:
print(ohe1.categories_) #print categorical values of the features "state"

In [None]:
x[ohe1.categories_[0]] = transformed1.toarray() 
#convert the transformed categorical variables into a numpy array and add them to the dataset x
x.head()

In [None]:
x = x.drop("Spain", axis=1)
x.head()

In [None]:
cols=["Geography", "Gender", "Male"]
x = x.drop(columns= cols, axis=1)
x.head()

In [None]:
x.shape

In [None]:
#split the two datasets x and y into test and training sets
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)
#test_size = 0.2 : 80% training 20% testing

### Standard Scaler

In [None]:
scaler = StandardScaler() #initialize the standard scaler object

cols=['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary'] #choose columns on which you want to apply scaling
X_train[cols]=scaler.fit_transform(X_train[cols]) #fit transform the columns chosen in the training set
X_test[cols]=scaler.transform(X_test[cols]) #fit transform the columns chosen in the testing set

In [None]:
X_train.head()

In [None]:
X_train.shape

### Implementing our first Deep learning model :

In [None]:
model = Sequential()

In [None]:
model.add(Dense(6, input_shape=(11,))) #6 = (sum(input variables+target variables)/2)
model.add(Activation('sigmoid'))

model.add(Dense(6))
model.add(Activation('sigmoid'))

model.add(Dense(1))
model.add(Activation('sigmoid'))

In [None]:
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
hist = model.fit(X_train, y_train, 
                batch_size= 10,
                epochs= 100, 
                verbose=2,
                validation_split= 0.2)
#we have 10000 => 8000 train + 1000 test => 0.2 validation 
# => 1600 of training set  => we have 6400 training set remaining  => 6400= 640/epoche

val_loss> loss value : change activation function

In [None]:
model.summary()

dense : param = 11(input v.) *6 = 6(layer_0)*6(layer_1) +6(layer_1) =72

dense_1 : param = 6(layer_1)*6(layer_2) + 6(layer_2) =42


## Changing the activation function:

In [None]:
model.add(Dense(6, input_shape=(11,))) #6 = (sum(input variables+target variables)/2)
model.add(Activation('relu'))

model.add(Dense(6))
model.add(Activation('relu'))

model.add(Dense(1))
model.add(Activation('sigmoid'))

In [None]:
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')

hist = model.fit(X_train, y_train, 
                batch_size= 10,
                epochs= 100, 
                verbose=2,
                validation_split= 0.2)

In [None]:
y_pred = model.predict(X_test)
y_pred = (y_pred>0.5)

In [None]:
#y_test = y_test.values

In [None]:
for i in range(10):
    print(y_test[i], y_pred[i])

In [None]:
"""
confusion matrix
"""

### Changing parameters of Adam Optimizer

In [None]:
opt = keras.optimizers.legacy.Adam(learning_rate=0.003, decay=  0.0001)
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=opt)

In [None]:
hist = model.fit(X_train, y_train, 
                batch_size= 10,
                epochs= 100, 
                verbose=2,
                validation_split= 0.2)

In [None]:
#create model , + gridsearch : parameters : batch size [10,20] epoche [100 120] 

In [None]:
def create_model():
    model.add(Dense(6, input_shape=(11,))) #6 = (sum(input variables+target variables)/2)
    model.add(Activation('relu'))

    model.add(Dense(6))
    model.add(Activation('relu'))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    opt = keras.optimizers.legacy.Adam(learning_rate=0.003, decay=  0.0001)
    model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=opt)
    
    return model

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier

model = KerasClassifier(build_tn = create_model)
parameters = {'batch_size':[10,20],
             'epochs':[100,120]}
grid_search = GridSearchCV(estimator = model, 
                           param_grid = parameters, 
                           scoring='accuracy',
                           cv=10)
grid_search= grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_