Model Evaluation and Selection

In [83]:
import numpy as np
from sklearn.linear_model import LinearRegression
import tensorflow as tf
from tensorflow import keras 
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error

In [22]:
X, y = datasets.make_regression(n_samples=500, n_features=1, noise=0.1)
print(X.shape)

(500, 1)


In [25]:
#Split dataset for train, validation and test 
X_train, X_,y_train,y_ = train_test_split(X,y,test_size=0.4, random_state=1)
X_cv, X_test, y_cv, y_test = train_test_split(X_,y_,test_size=0.2,random_state=1)


Feature Scaling

In [77]:
scale_linear = StandardScaler()
X_train_scaled = scale_linear.fit_transform(X_train)

In [33]:
l_r = LinearRegression()
l_r.fit(X_train_scaled,y_train)

### Evaluate the Model$$J_{train}(\vec{w}, b) = \frac{1}{2m_{train}}\left[\sum_{i=1}^{m_{train}}(f_{\vec{w},b}(\vec{x}_{train}^{(i)}) - y_{train}^{(i)})^2\right]$$



In [38]:
# Feed the scaled training set and get the predictions
yhat = l_r.predict(X_train_scaled)

# Use scikit-learn's utility function and divide by 2
print(f"training MSE (using sklearn function): {mean_squared_error(y_train, yhat) / 2}")

# for-loop implementation
total_squared_error = 0

for i in range(len(yhat)):
    total_squared_error += (yhat[i] - y_train[i])**2                                            

mse = total_squared_error / (2*len(yhat))

print(f"training MSE (for-loop implementation): {mse.squeeze()}")

training MSE (using sklearn function): 0.0042610469232920644
training MSE (for-loop implementation): 0.004261046923292068


In [76]:
# Scale the cross validation set using the mean and standard deviation of the training set
X_cv_scaled = scale_linear.transform(X_cv)

print(f"Mean used to scale the CV set: {scale_linear.mean_.squeeze():.2f}")
print(f"Standard deviation used to scale the CV set: {scale_linear.scale_.squeeze():.2f}")

# Feed the scaled cross validation set
yhat = l_r.predict(X_cv_scaled)
print(f"Cross validation MSE: {mean_squared_error(y_cv, yhat) / 2}")

Mean used to scale the CV set: 0.13
Standard deviation used to scale the CV set: 1.05
Cross validation MSE: 0.004252644390496841


## Adding Polynomial Features


In [50]:
poly = PolynomialFeatures(degree=2, include_bias=False)

X_train_mapped = poly.fit_transform(X_train)

print(X_train_mapped[:5])

scale_poly = StandardScaler()

X_train_mapped_scaled = scale_poly.fit_transform(X_train_mapped)
print(X_train_mapped_scaled[:5])

[[ 2.18054717  4.75478595]
 [ 4.34185038 18.8516647 ]
 [-0.71742755  0.51470229]
 [-1.05853579  1.12049803]
 [-0.24239356  0.05875464]]
[[ 1.95309326e+00  2.06644947e+00]
 [ 4.00911016e+00  1.00836353e+01]
 [-8.03708737e-01 -3.44973595e-01]
 [-1.12820014e+00 -4.45061067e-04]
 [-3.51815677e-01 -6.04280434e-01]]


### Choose best model

In [66]:
train_mses = []
cv_mses = []
models = []
scalers = []

for degree in range(1,5):
    
    # Add polynomial features to the training set
    poly = PolynomialFeatures(degree, include_bias=False)
    X_train_mapped = poly.fit_transform(X_train)
    
    # Scale the training set
    scaler_poly = StandardScaler()
    X_train_mapped_scaled = scaler_poly.fit_transform(X_train_mapped)
    scalers.append(scaler_poly)
    
    # Create and train the model
    model = LinearRegression()
    model.fit(X_train_mapped_scaled, y_train )
    models.append(model)
    
    # Compute the training MSE
    yhat = model.predict(X_train_mapped_scaled)
    train_mse = mean_squared_error(y_train, yhat) / 2
    train_mses.append(train_mse)
    
    # Add polynomial features and scale the cross validation set
    poly = PolynomialFeatures(degree, include_bias=False)
    X_cv_mapped = poly.fit_transform(X_cv)
    X_cv_mapped_scaled = scaler_poly.transform(X_cv_mapped)
    
    # Compute the cross validation MSE
    yhat = model.predict(X_cv_mapped_scaled)
    cv_mse = mean_squared_error(y_cv, yhat) / 2
    cv_mses.append(cv_mse)
print(cv_mses)

[0.004252644390496841, 0.004272857651702618, 0.004275313424276005, 0.004344272626305992]


In [72]:
degree = np.argmin(cv_mses) + 1
print(f"Lowest CV MSE is found in the model with degree={degree}")

Lowest CV MSE is found in the model with degree=1


In [73]:
poly = PolynomialFeatures(degree, include_bias=False)
X_test_mapped = poly.fit_transform(X_test)

# Scale the test set
X_test_mapped_scaled = scalers[degree-1].transform(X_test_mapped)

# Compute the test MSE
yhat = models[degree-1].predict(X_test_mapped_scaled)
test_mse = mean_squared_error(y_test, yhat) / 2

print(f"Training MSE: {train_mses[degree-1]:.5f}")
print(f"Cross Validation MSE: {cv_mses[degree-1]:.5f}")
print(f"Test MSE: {test_mse:.5f}")

Training MSE: 0.00426
Cross Validation MSE: 0.00425
Test MSE: 0.00419


### Neural network

In [81]:
scale_linear = StandardScaler()
X_train_scaled = scale_linear.fit_transform(X_train)

X_cv_scaled = scale_linear.transform(X_cv)

In [100]:
models = []
train_mses = []
cv_mses = []

for i in range(5):

    model = keras.Sequential([
    keras.layers.Dense(i+6, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    keras.layers.Dense(i+3, activation='relu'),
    keras.layers.Dense(1, activation='linear')  #linear for regression
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    model.fit(X_train_scaled,y_train, epochs=5)
    
    y_hat = model.predict(X_train_scaled)
    mse = mean_squared_error(y_train,y_hat)/2
    train_mses.append(mse)

    y_hat = model.predict(X_cv_scaled)
    mse = mean_squared_error(y_cv,y_hat)/2
    cv_mses.append(mse)

    models.append(model)

    

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Fidning best model

In [110]:
m_i = np.argmin(cv_mses)
print(f"Best model {models[m_i].summary()}")
X_test_scaled = scale_linear.transform(X_test)


Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_66 (Dense)            (None, 7)                 14        
                                                                 
 dense_67 (Dense)            (None, 4)                 32        
                                                                 
 dense_68 (Dense)            (None, 1)                 5         
                                                                 
Total params: 51 (204.00 Byte)
Trainable params: 51 (204.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Best model None


[0.0, 0.0]

In [115]:
print(f"accuracy {models[m_i].evaluate(X_test_scaled,y_test)[1]:.2f} %")

accuracy 80.51 %
