In [17]:
%reset -f

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, RobustScaler
from sklearn.compose import TransformedTargetRegressor
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasRegressor
import Helpers

#import cleaned_data.csv to pandas.

data = pd.read_csv('cleaned_data.csv')

X_train, X_test, y_train, y_test = Helpers.split_data(data)


In [18]:
# Define the features that need encoding and those that don't
categorical_features = ['Geography', 'Gender']
numerical_features = ['CreditScore', 'Age', 'Balance', 'NumOfProducts','HasCrCard']


In [19]:
# Create transformers for the pipeline
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Adding RobustScaler for numerical features
numerical_transformer = Pipeline(steps=[
    ('scaler', RobustScaler())
])

def create_model(input_dim):
    model = Sequential()
    model.add(Dense(10, input_shape=(input_dim,), activation='relu'))
    model.add(Dense(200, activation='relu'))
    model.add(Dense(400, activation='relu'))
    model.add(Dense(1, activation='linear'))  # Using linear output for regression
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model



In [20]:
# Combine transformers into a single ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', numerical_transformer, numerical_features)
    ])

# Define Keras model in the pipeline
model = KerasRegressor(build_fn=lambda: create_model(input_dim=preprocessor.transform(X_train).shape[1]),
                       epochs=100, batch_size=10, verbose=1)

# Wrap the Keras model with target transformation
target_scaler = RobustScaler()
tt_reg = TransformedTargetRegressor(regressor=model, transformer=target_scaler)




In [21]:
# Create the pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', tt_reg)
])

# Apply the pipeline to the data
pipeline.fit(X_train, y_train)


Epoch 1/100


  X, y = self._initialize(X, y)
  self.units = units


[1m10042/10042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 884us/step - loss: 0.3821
Epoch 2/100
[1m10042/10042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 914us/step - loss: 0.3807
Epoch 3/100
[1m 9225/10042[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 890us/step - loss: 0.3817

KeyboardInterrupt: 

In [16]:
# Predict and evaluate
from sklearn.metrics import mean_squared_error
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

[1m2511/2511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 545us/step
Mean Squared Error: 2505925203.129788
