# Using GridSearchCV with Tensorflor: Regression

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

from scikeras.wrappers import KerasRegressor

In [10]:
# df = pd.read_excel("housing_data.xlsx")
url = "https://raw.githubusercontent.com/ie-ai-class/ai-2566-2/main/T6%20-%20ANN/S02%20-%20Regression/housing_data.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [11]:
# Shuffle data
df = df.sample(frac=1, random_state=1)

# Extract data

colsX = [i for i in df.columns if i != "MEDV"]
X = df[colsX].values
y = df["MEDV"].values
print(X.shape)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Standardization
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Scale y data
nm = MinMaxScaler()
y_train = y_train.reshape(-1, 1)  # Required for sklearn api
y_test = y_test.reshape(-1, 1)
nm.fit(y_train)
y_train_scaled = nm.transform(y_train)
y_test_scaled = nm.transform(y_test)

(506, 13)


In [12]:
def get_clf_model(hidden_layer_sizes, meta, compile_kwargs):

    model = tf.keras.Sequential()
    input_layer = tf.keras.layers.Input(shape=(meta["n_features_in_"]))
    model.add(input_layer)

    for i, hidden_layer_size in enumerate(hidden_layer_sizes):
        hidden_layer = tf.keras.layers.Dense(
            units=hidden_layer_size, activation="relu", name=f"hidden{i+1}"
        )
        model.add(hidden_layer)

    output_layer = tf.keras.layers.Dense(units=1, name="output")
    model.add(output_layer)

    model.compile(
        optimizer=compile_kwargs["optimizer"],
        loss="mean_squared_error",
        metrics=[tf.keras.metrics.mean_absolute_percentage_error],
    )

    return model

In [13]:
tf.keras.backend.clear_session()
tf.keras.backend.set_epsilon(1)

In [14]:
# Note that I am setting validation_split=0, because already use GridSearchCV which has cross-validation.

clf = KerasRegressor(
    model=get_clf_model,
    hidden_layer_sizes=(16,),
    optimizer="adam",
    optimizer__learning_rate=0.001,
    epochs=200,
    batch_size=16,
    validation_split=0.2,  
    verbose=1,
)

In [15]:
set1 = {
    "optimizer__learning_rate": [0.01, 0.1],
    "hidden_layer_sizes": [(16, 3), (8, 3)],
}

param_grid = [set1]

In [16]:
gs = GridSearchCV(estimator=clf, 
                  param_grid=param_grid, 
                  scoring='neg_mean_squared_error', 
                  cv=5,
                  n_jobs=-1)

In [17]:
gs.fit(X_train_std,y_train)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [18]:
df = pd.DataFrame(gs.cv_results_)
df = df.sort_values(by=['rank_test_score'])
display(df.head())

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_hidden_layer_sizes,param_optimizer__learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,20.149172,0.225232,0.206127,0.050131,"(8, 3)",0.01,"{'hidden_layer_sizes': (8, 3), 'optimizer__lea...",-18.916374,-19.816294,-17.995046,-13.184765,-16.526369,-17.28777,2.322086,1
1,20.113577,0.253949,0.225431,0.042137,"(16, 3)",0.1,"{'hidden_layer_sizes': (16, 3), 'optimizer__le...",-11.590407,-31.522482,-26.945024,-20.004303,-23.142973,-22.641038,6.733871,2
3,11.585274,4.247267,0.103682,0.05368,"(8, 3)",0.1,"{'hidden_layer_sizes': (8, 3), 'optimizer__lea...",-21.931763,-25.40671,-38.883499,-15.388269,-19.38606,-24.19926,8.037746,3
0,20.17942,0.207322,0.235651,0.039468,"(16, 3)",0.01,"{'hidden_layer_sizes': (16, 3), 'optimizer__le...",-19.712135,-20.701989,-95.798145,-103.22606,-16.081027,-51.103871,39.62478,4


In [19]:
print(gs.best_score_)
print(gs.best_params_)

-17.287769574065674
{'hidden_layer_sizes': (8, 3), 'optimizer__learning_rate': 0.01}


In [20]:
y_pred = gs.predict(X_test_std)
print(mean_absolute_percentage_error(y_test, y_pred))

0.12311050304987613
