# Using Lasso to feature select then hyperparameter tuning using tensorflow

In [6]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from scikeras.wrappers import KerasRegressor

# Import the functions from the module
from data_processing_module import load_data, preprocess_data, split_data

# Cell 2: Use the imported functions
file_path = 'merged3.csv'  # Update this path to the location of your file
target_column = 'SalePrice_x'
drop_columns = ['Unnamed: 0', 'PID', 'Index']

# Load the data
data = load_data(file_path)

# Preprocess the data
X_preprocessed, y, preprocessor = preprocess_data(data, target_column, drop_columns)

# Split the data
X_train, X_test, y_train, y_test = split_data(X_preprocessed, y)

# Feature selection using Lasso
lasso = Lasso(alpha=0.01, max_iter=10000)
lasso.fit(X_train, y_train)

# Get the selected features
selected_features = np.where(lasso.coef_ != 0)[0]
X_train_selected = X_train[:, selected_features]
X_test_selected = X_test[:, selected_features]

# Define the create_model function
def create_model(optimizer='adam', activation='relu', dropout_rate=0.0):
    model = Sequential()
    model.add(Input(shape=(X_train_selected.shape[1],)))
    model.add(Dense(64, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mean_squared_error'])
    return model

# Create the KerasRegressor
model = KerasRegressor(model=create_model, verbose=0)

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'model__optimizer': ['rmsprop'],
    'model__activation': ['relu'],
    'model__dropout_rate': [0.0],
    'batch_size': [16],
    'epochs': [500, 1000]
}

# Hyperparameter tuning with GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring='r2', cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train_selected, y_train)

# Get the best model
best_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

# Make predictions with the best model
y_pred = best_model.predict(X_test_selected)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Model Mean Squared Error: {mse}')
print(f'Model R-squared: {r2}')




Fitting 3 folds for each of 2 candidates, totalling 6 fits


2024-07-24 22:10:19.179737: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-07-24 22:10:19.179772: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-07-24 22:10:19.179779: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-07-24 22:10:19.179798: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-07-24 22:10:19.179811: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2024-07-24 22:10:19.179949: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-07-24 22:10:19.179977: I metal_plugin/src/device/metal_device.cc:2

[CV] END batch_size=16, epochs=500, model__activation=relu, model__dropout_rate=0.0, model__optimizer=rmsprop; total time= 5.3min
[CV] END batch_size=16, epochs=500, model__activation=relu, model__dropout_rate=0.0, model__optimizer=rmsprop; total time= 5.3min
[CV] END batch_size=16, epochs=500, model__activation=relu, model__dropout_rate=0.0, model__optimizer=rmsprop; total time= 5.3min
Best Parameters: {'batch_size': 16, 'epochs': 500, 'model__activation': 'relu', 'model__dropout_rate': 0.0, 'model__optimizer': 'rmsprop'}
Model Mean Squared Error: 375966208.51770777
Model R-squared: 0.9368091225624084
[CV] END batch_size=16, epochs=1000, model__activation=relu, model__dropout_rate=0.0, model__optimizer=rmsprop; total time= 8.7min
[CV] END batch_size=16, epochs=1000, model__activation=relu, model__dropout_rate=0.0, model__optimizer=rmsprop; total time= 8.8min
[CV] END batch_size=16, epochs=1000, model__activation=relu, model__dropout_rate=0.0, model__optimizer=rmsprop; total time= 8.8m