In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [54]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,InputLayer,Input
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard


In [4]:
data = pd.read_csv('Data/Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [8]:
useful_features = [col for col in data.columns if col not in ['RowNumber','CustomerId','Surname']]
useful_features

['CreditScore',
 'Geography',
 'Gender',
 'Age',
 'Tenure',
 'Balance',
 'NumOfProducts',
 'HasCrCard',
 'IsActiveMember',
 'EstimatedSalary',
 'Exited']

In [12]:
data[useful_features].describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
CreditScore,10000,,,,650.529,96.6533,350.0,584.0,652.0,718.0,850.0
Geography,10000,3.0,France,5014.0,,,,,,,
Gender,10000,2.0,Male,5457.0,,,,,,,
Age,10000,,,,38.9218,10.4878,18.0,32.0,37.0,44.0,92.0
Tenure,10000,,,,5.0128,2.89217,0.0,3.0,5.0,7.0,10.0
Balance,10000,,,,76485.9,62397.4,0.0,0.0,97198.5,127644.0,250898.0
NumOfProducts,10000,,,,1.5302,0.581654,1.0,1.0,1.0,2.0,4.0
HasCrCard,10000,,,,0.7055,0.45584,0.0,0.0,1.0,1.0,1.0
IsActiveMember,10000,,,,0.5151,0.499797,0.0,0.0,1.0,1.0,1.0
EstimatedSalary,10000,,,,100090.0,57510.5,11.58,51002.1,100194.0,149388.0,199992.0


In [13]:
X = data[useful_features].drop('Exited',axis=1)
y=data[useful_features]['Exited']

In [19]:
missing_cols = [col for col in X.columns if X[col].isnull().any()]
missing_cols

[]

In [20]:
y.value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [23]:
cat_cols = [col for col in X.columns if X[col].dtype=='object']
num_cols = [col for col in X.columns if X[col].dtype in ['int64','float64']]

In [27]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.impute import SimpleImputer

In [36]:
numerical_transformer = Pipeline(steps=
                                 [
                                     ('imputer',SimpleImputer(strategy='mean')),
                                     ('scaler',StandardScaler())
                                 ])
categorical_transformer = Pipeline(steps=
                                   [
                                       ('imputer',SimpleImputer(strategy='most_frequent')),
                                       ('one_hot',OneHotEncoder(handle_unknown='ignore'))
                                   ]
                                   )
preprocessor = ColumnTransformer(transformers=[('num',numerical_transformer,num_cols),
                                 ('cat',categorical_transformer,cat_cols)])

In [45]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [46]:
X_train.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
9254,686,France,Male,32,6,0.0,2,1,1,179093.26
1561,632,Germany,Male,42,4,119624.6,2,1,1,195978.86
1670,559,Spain,Male,24,3,114739.92,1,1,0,85891.02
6087,561,France,Female,27,9,135637.0,1,1,0,153080.4
6669,517,France,Male,56,9,142147.32,1,0,0,39488.04


In [52]:
X_train = pd.DataFrame(preprocessor.fit_transform(X_train))
X_train = pd.DataFrame(preprocessor.transform(X_test))

In [53]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,-0.577496,-0.655786,-0.695393,0.329937,0.808436,-1.540351,-1.025834,-1.019605,0.0,1.0,0.0,0.0,1.0
1,-0.297297,0.390011,-1.389442,-1.218471,0.808436,0.649203,0.974817,0.798883,1.0,0.0,0.0,0.0,1.0
2,-0.525607,0.485083,-0.348369,-1.218471,0.808436,0.649203,-1.025834,-0.72798,0.0,0.0,1.0,1.0,0.0
3,-1.511492,1.91117,1.039728,0.689272,0.808436,0.649203,0.974817,1.221387,0.0,1.0,0.0,0.0,1.0
4,-0.951094,-1.131148,0.692704,0.782839,-0.916688,0.649203,0.974817,0.24756,0.0,0.0,1.0,1.0,0.0


In [58]:
input = Input(shape=(X_train.shape[1],))
model = Dense(64,activation='relu')(input)
model = Dense(32,activation='relu')(model)
model = Dense(1,activation='sigmoid')(model)


AttributeError: 'Tensor' object has no attribute 'summary'

In [56]:
help(Dense)

Help on class Dense in module tensorflow.python.keras.layers.core:

class Dense(tensorflow.python.keras.engine.base_layer.Layer)
 |  Just your regular densely-connected NN layer.
 |  
 |  `Dense` implements the operation:
 |  `output = activation(dot(input, kernel) + bias)`
 |  where `activation` is the element-wise activation function
 |  passed as the `activation` argument, `kernel` is a weights matrix
 |  created by the layer, and `bias` is a bias vector created by the layer
 |  (only applicable if `use_bias` is `True`).
 |  
 |  Note: If the input to the layer has a rank greater than 2, then
 |  it is flattened prior to the initial dot product with `kernel`.
 |  
 |  Example:
 |  
 |  ```python
 |  # as first layer in a sequential model:
 |  model = Sequential()
 |  model.add(Dense(32, input_shape=(16,)))
 |  # now the model will take as input arrays of shape (*, 16)
 |  # and output arrays of shape (*, 32)
 |  
 |  # after the first layer, you don't need to specify
 |  # the size 