In [11]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# custom functions
import utils

# supress warnings
tf.get_logger().setLevel("ERROR")
tf.autograph.set_verbosity(0)

In [7]:
# load dataset from text file
data = np.loadtxt('./data/data_w3_ex1.csv', delimiter=',')

# split the inputs and outputs in separate arrays
x = data[:,0]
y = data[:,1]

# convert 1D arrays to 2D as commands will require it later
x = np.expand_dims(x, axis=1)
y = np.expand_dims(y, axis=1)

print(f"shape of input x: {x.shape}")
print(f"shape of targets y: {y.shape}")

shape of input x: (50, 1)
shape of targets y: (50, 1)


In [8]:
# splitting data in training, cross validation and test set
# 60% - training, 20% - cross validation / dev set, 20% - test set

# split 60% dataset as the training set. Put 40% in temp variables: x_ and y_
x_train, x_, y_train, y_ = train_test_split(x, y, test_size=0.40, random_state=1)

# split 40% subset of the above into two: one half for dev set, other for test set
x_cv, x_test, y_cv, y_test = train_test_split(x_, y_, test_size=0.50, random_state=1)

# delete temp variables
del x_, y_

print(f"the shape of the training set (input) is: {x_train.shape}")
print(f"the shape of the training set (target) is: {y_train.shape}\n")
print(f"the shape of the cross validation set (input) is: {x_cv.shape}")
print(f"the shape of the cross validation set (target) is: {y_cv.shape}\n")
print(f"the shape of the test set (input) is: {x_test.shape}")
print(f"the shape of the test set (target) is: {y_test.shape}")

the shape of the training set (input) is: (30, 1)
the shape of the training set (target) is: (30, 1)

the shape of the cross validation set (input) is: (10, 1)
the shape of the cross validation set (target) is: (10, 1)

the shape of the test set (input) is: (10, 1)
the shape of the test set (target) is: (10, 1)


#### NNs can learn non-linear relationships so we can opt to skip polynomial features
The code is still included below in case you want to try later and see what effect it will have on your results. The default `degree` is set to `1` to indicate that it will just use `x_train`, `x_cv`, and `x_test` as is (i.e. without any additional polynomial features).

In [9]:
# Add polynomial features
degree = 1
poly = PolynomialFeatures(degree, include_bias=False)
X_train_mapped = poly.fit_transform(x_train)
X_cv_mapped = poly.transform(x_cv)
X_test_mapped = poly.transform(x_test)

Next, you will scale the input features to help gradient descent converge faster. Again, notice that you are using the mean and standard deviation computed from the training set by just using `transform()` in the cross validation and test sets instead of `fit_transform()`.

In [10]:
# scaling the features using z-score
scaler = StandardScaler()
X_train_mapped_scaled = scaler.fit_transform(X_train_mapped)
X_cv_mapped_scaled = scaler.transform(X_cv_mapped)
X_test_mapped_scaled = scaler.transform(X_test_mapped)

In [16]:
# build and train models
nn_train_mses = []
nn_cv_mses = []

# Build the models
nn_models = utils.build_models()

for model in nn_models:

    # setup the loss and optimizer
    model.compile(
        loss="mse",
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.1)
    )

    print(f"training {model.name}")

    # train the model
    model.fit(
        X_train_mapped_scaled, y_train,
        epochs = 500,
        verbose = 0
    )

    print("Done!\n")

    # record the training MSEs
    yhat = model.predict(X_train_mapped_scaled)
    train_mse = mean_squared_error(y_train, yhat) / 2
    nn_train_mses.append(train_mse)

    # record the cross validation MSEs
    yhat = model.predict(X_cv_mapped_scaled)
    cv_mse = mean_squared_error(y_cv, yhat) / 2
    nn_cv_mses.append(cv_mse)

# print results
print("RESULTS: ")
for model_num in range(len(nn_train_mses)):
    print(
        f"Model {model_num + 1}: Training MSE: {nn_train_mses[model_num]:.2f}, " + 
        f"CV MSE {nn_cv_mses[model_num]:.2f}"
    )

training model_1
Done!

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
training model_2
Done!

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
training model_3
Done!

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
RESULTS: 
Model 1: Training MSE: 406.19, CV MSE 551.78
Model 2: Training MSE: 73.40, CV MSE 112.29
Model 3: Training MSE: 406.19, CV MSE 551.78


In [17]:
# select the model with lowest CV_MSE
model_num = 2

yhat = nn_models[model_num - 1].predict(X_test_mapped_scaled)
test_mse = mean_squared_error(y_test, yhat) / 2

print(f"Selected Model: {model_num}")
print(f"Training MSE: {nn_train_mses[model_num-1]:.2f}")
print(f"Cross Validation MSE: {nn_cv_mses[model_num-1]:.2f}")
print(f"Test MSE: {test_mse:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Selected Model: 2
Training MSE: 73.40
Cross Validation MSE: 112.29
Test MSE: 131.57
