In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, OneHotEncoder,StandardScaler
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [27]:
data = pd.read_csv("Churn_Modelling.csv")

In [15]:
data = data.drop(["RowNumber","CustomerId","Surname"], axis=1)

In [16]:
label_encoder_gender = LabelEncoder()
data["Gender"] = label_encoder_gender.fit_transform(data["Gender"])

In [17]:
onehot_encoder_geo = OneHotEncoder(handle_unknown="ignore")
geo_encoder = onehot_encoder_geo.fit_transform(data[["Geography"]]).toarray()
geo_encoder_df = pd.DataFrame(geo_encoder,columns=onehot_encoder_geo.get_feature_names_out(["Geography"]))
geo_encoder_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [22]:
data = pd.concat([data.drop("Geography", axis=1), geo_encoder_df], axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [23]:
X = data.drop("Exited", axis=1)
y = data["Exited"]

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [25]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [28]:
with open("label_encoder_gender.pkl", "wb") as file:
    pickle.dump(label_encoder_gender, file)
    
    
with open("onehot_encoder_geo.pkl", "wb") as file:
    pickle.dump(onehot_encoder_geo,file)
    
with open("scaler.pkl", "wb") as file:
    pickle.dump(scaler,file)

In [35]:
#define a function to create a model and try different parameter()

def create_models(neurons=32, layers=1):
    model=Sequential()
    model.add(Dense(neurons, activation="relu",input_shape= (X_train.shape[1], )))


    for _ in range(layers-1):
        model.add(Dense(neurons, activation="relu"))
        
        
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    
    return model   

In [42]:
model = KerasClassifier(
    model=create_models,
    layers=1,
    neurons=32,
    fit__epochs=50,
    fit__batch_size=10,
    verbose=0
)


In [44]:
param_grid = {
    "neurons": [16, 32, 64],       
    "layers": [1, 2, 3],           
    "fit__epochs": [20, 50]
}

In [45]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train,y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best: 0.856874 using {'fit__epochs': 20, 'layers': 1, 'neurons': 16}
