In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle





In [2]:
data = pd.read_csv('Churn_Modelling.csv')

In [3]:
data = data.drop(['RowNumber','CustomerId',
                  'Surname'], axis = 1)

# Encode categorical variables
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data.Gender)

# OneHotEncode 'Geography'
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
geo_encoder = ohe.fit_transform(data[['Geography']])

geo_encoded_df = pd.DataFrame(geo_encoder.toarray(), columns=ohe.get_feature_names_out(['Geography']))

## Combine one hot encoder columns with original data
data = pd.concat([data.drop('Geography',axis=1),geo_encoded_df],axis=1)

# Divide the dataset into independent and dependent features
X = data.drop('Exited',axis=1)
y = data['Exited']

## Split the data in training and testing set
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

## Scale these feature
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
## Save the encoders and scaler

with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('one_hot_encoder_geo.pkl','wb') as file:
    pickle.dump(ohe,file)

with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [16]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Your model builder function
def create_model(neurons=32, layers=1):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=(X_train.shape[1],)))
    for _ in range(layers - 1):
        model.add(Dense(neurons, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Correct usage with scikeras
model = KerasClassifier(model=create_model,verbose=0)

# Correct param grid using `model__` prefix for model-level args
param_grid = {
    'model__neurons': [16, 32, 64],
    'model__layers': [1,2,3],
    'epochs': [50,100],
    'batch_size': [16,32,64]
}

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train)

In [17]:
# Print the best parameters
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.858500 using {'batch_size': 32, 'epochs': 50, 'model__layers': 1, 'model__neurons': 16}
