In [13]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier
import pickle

In [4]:
data = pd.read_csv('./Churn_Modelling.csv')

data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

ohe_geography = OneHotEncoder(handle_unknown='ignore')
geo_encoded = ohe_geography.fit_transform(data[['Geography']]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded, columns=ohe_geography.get_feature_names_out(['Geography']))

data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

X = data.drop('Exited', axis=1)
y = data['Exited']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)
    
with open('ohe_geography.pkl', 'wb') as file:
    pickle.dump(ohe_geography, file)
    
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [22]:
# Define a function to create the model (different way of creating a model)
def create_model(neurons=32, layers=1):
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)))
    model.add(Dense(neurons, activation='relu'))
    
    for _ in range(layers - 1): 
        model.add(Dense(neurons, activation='relu'))
        
    model.add(Dense(1, activation='sigmoid')) # Output layer
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Compile model
    
    return model

In [23]:
# Create a KerasClassifier
model = KerasClassifier(layers=1, neurons=32, model=create_model, epochs=50, batch_size=10, verbose=0)

In [24]:
# Define the grid search parameters
param_grid = {
    'neurons': [16, 32, 64, 128], 
    'layers': [1, 2],
    'epochs': [50, 100]
}

In [25]:
# Perform grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train)

# Print the best parameters
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

2024-10-23 21:42:37.753090: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-10-23 21:42:37.763558: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-10-23 21:42:37.780396: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024

Best: 0.856750 using {'epochs': 50, 'layers': 1, 'neurons': 16}
