# Using GridSearchCV with Tensorflor: Regression

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

from scikeras.wrappers import KerasRegressor

In [None]:
# df = pd.read_excel("housing_data.xlsx")
url = "https://raw.githubusercontent.com/ie-ai-class/ai-2566-2/main/T6%20-%20ANN/S02%20-%20Regression/housing_data.csv"
df = pd.read_csv(url)
df.head()

In [None]:
# Shuffle data
df = df.sample(frac=1, random_state=1)

# Extract data

colsX = [i for i in df.columns if i != "MEDV"]
X = df[colsX].values
y = df["MEDV"].values
print(X.shape)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Standardization
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Scale y data
nm = MinMaxScaler()
y_train = y_train.reshape(-1, 1)  # Required for sklearn api
y_test = y_test.reshape(-1, 1)
nm.fit(y_train)
y_train_scaled = nm.transform(y_train)
y_test_scaled = nm.transform(y_test)

In [None]:
def get_clf_model(hidden_layer_sizes, meta, compile_kwargs):

    model = tf.keras.Sequential()
    input_layer = tf.keras.layers.Input(shape=(meta["n_features_in_"]))
    model.add(input_layer)

    for i, hidden_layer_size in enumerate(hidden_layer_sizes):
        hidden_layer = tf.keras.layers.Dense(
            units=hidden_layer_size, activation="relu", name=f"hidden{i+1}"
        )
        model.add(hidden_layer)

    output_layer = tf.keras.layers.Dense(units=1, name="output")
    model.add(output_layer)

    model.compile(
        optimizer=compile_kwargs["optimizer"],
        loss="mean_squared_error",
        metrics=[tf.keras.metrics.mean_absolute_percentage_error],
    )

    return model

In [None]:
tf.keras.backend.clear_session()
tf.keras.backend.set_epsilon(1)

In [None]:
# Note that I am setting validation_split=0, because already use GridSearchCV which has cross-validation.

clf = KerasRegressor(
    model=get_clf_model,
    hidden_layer_sizes=(16,),
    optimizer="adam",
    optimizer__learning_rate=0.001,
    epochs=200,
    batch_size=16,
    validation_split=0.2,  
    verbose=1,
)

In [None]:
set1 = {
    "optimizer__learning_rate": [0.01, 0.1],
    "hidden_layer_sizes": [(16, 3), (8, 3)],
}

param_grid = [set1]

In [None]:
gs = GridSearchCV(estimator=clf, 
                  param_grid=param_grid, 
                  scoring='neg_mean_squared_error', 
                  cv=5,
                  n_jobs=-1)

In [None]:
gs.fit(X_train_std,y_train)

In [None]:
df = pd.DataFrame(gs.cv_results_)
df = df.sort_values(by=['rank_test_score'])
display(df.head())

In [None]:
print(gs.best_score_)
print(gs.best_params_)

In [None]:
y_pred = gs.predict(X_test_std)
print(mean_absolute_percentage_error(y_test, y_pred))