In [136]:
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier  # ✅ Correct
# from scikeras.wrappers import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasRegressor

In [137]:
df=pd.read_csv('./Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [138]:
data=df.drop(['RowNumber','CustomerId','Surname'],axis=1)

In [139]:
label_encoder_gender=LabelEncoder()

data['Gender']=label_encoder_gender.fit_transform(data['Gender'])

In [140]:
# One hot encoder Geography

one_hot_enc_geo=OneHotEncoder(handle_unknown='ignore')
geo_encoded=one_hot_enc_geo.fit_transform(data[['Geography']]).toarray()
_df=pd.DataFrame(geo_encoded,columns=one_hot_enc_geo.get_feature_names_out(['Geography']))

_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [141]:
data=pd.concat([data.drop('Geography',axis=1),_df],axis=1)
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


In [142]:
# split the data
X=data.drop('EstimatedSalary',axis=1)
y=data['EstimatedSalary']

In [143]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
# scale
scalar=StandardScaler()

X_train=scalar.fit_transform(X_train)
X_test=scalar.transform(X_test)
y_train
# y_train=scalar.fit_transform(y_train)


9254    179093.26
1561    195978.86
1670     85891.02
6087    153080.40
6669     39488.04
          ...    
5734     69381.05
5191       706.50
5390     92220.12
860      97508.04
7270     53581.14
Name: EstimatedSalary, Length: 8000, dtype: float64

In [144]:
import pickle
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)
with open('one_ht_enc_geo.pkl','wb') as file:
    pickle.dump(one_hot_enc_geo,file)
with open('scalar.pkl','wb') as file:
    pickle.dump(scalar,file)

In [145]:
# Define functions create model and tray diffrent parameter(KerasClassifier)

def create_model(neurons=32,layers=1):
    model=Sequential()
    model.add(Dense(neurons,activation='relu',input_shape=(X_train.shape[1], )))
    
    # adding hidden layer as much of needed
    for _ in range(layers-1):#we have to create remaining layer means layer-1
        model.add(Dense(neurons,activation='relu'))
        
        
    # Now Adding output layer
    model.add(Dense(1,activation='sigmoid'))
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    # model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

    
    return model
    

In [147]:
# create keras classifier
model=KerasClassifier(layers=1,neurons=32,build_fn=create_model,epochs=50,batch_size=10,verbose=0)
# Batch size: after how many epoches of batch weight should be updated
# verbose:control the level of output by default it is 0

# model = KerasRegressor(model=create_model, epochs=50, batch_size=10, verbose=0)

In [148]:
# # Define the grid search parameter
param_grid={
    'build_fn__neuron':[16,32,64,128],
    # 'layers':[1,2,3],
    # 'batch_size':[10,20],
    'epochs':[50,100]
}


# param_grid = {
#     'build_fn__neurons': [16, 32, 64, 128],
#     'build_fn__layer': [1, 2, 3],
#     'epochs': [50, 100]
# }


In [149]:
# Perform Grid Search CV
grid=GridSearchCV(estimator=model,param_grid=param_grid,n_jobs=-1,cv=3,verbose=1)
# print(X_train, y_train)
# Error occuring due to compitability issue you need to use scikit-learn 1.5.2


grid_result=grid.fit(X_train, y_train)

# Print the best parameter
# print("Best : %f using %s"%(grid_result.best_score_,grid_result.best_params_))

Fitting 3 folds for each of 8 candidates, totalling 24 fits


AttributeError: 'function' object has no attribute 'set_params'