### 6. Forward selection

##### 1.1 Forward selection algorithm

In [1]:
import numpy as np
from os import path
import util
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold

data = np.genfromtxt(path.join('data/GPUbenchmark.csv'), delimiter=',', dtype=float)

y = data[:, 6]
X = np.array([data[:, 0], data[:, 1], data[:, 2], data[:, 3], data[:, 4], data[:, 5]]).T
Xe = util.extendMatrix(X)

count = 0
forward_set = np.empty((18, 1))
data_sets = []

for k in range(0, X.shape[1]):
    best_mse = -1  # Starting value for best mean square error
    for p in range(k, X.shape[1]):  # starting from k is the same as p-k

        count += 1  # Keep track of models trained
        temp = forward_set.copy()  # Create deep copy of forward model to avoid changing the optimized model
        X_train = np.c_[temp, X[:, p]]  # Concat previous forward model with new feature for training
        beta = util.calcBeta(X_train, y)  # Calculate beta
        mse = util.cost(X_train, y, beta)  # Calculate cost
        print(f"Training MSE: {mse}, Column: {p}")

        # Save best feature if found
        if mse < best_mse or best_mse < 0:
            best_mse = mse  # assign new best mse
            best_index = p  # index of best column
            keep_feature = X[:, p]  # actual feature

    # Add the column to M+1
    forward_set = np.c_[forward_set, keep_feature]
    # Move chosen feature to index k to avoid singular matrix when dotting and inversing
    X[:, [k, best_index]] = X[:, [best_index, k]]
    print(f"Lowest MSE for {k + 1} features: {best_mse}")
    data_sets.append(forward_set)  # Save the best 18 x n sets to a list

print(f"Feature selection iterations: {count}")



Training MSE: 314.6663620242806, Column: 0
Training MSE: 790.2893804532456, Column: 1
Training MSE: 823.3111903599925, Column: 2
Training MSE: 277.2023525115445, Column: 3
Training MSE: 113.79190012368582, Column: 4
Training MSE: 254.72858164292313, Column: 5
Lowest MSE for 1 features: 113.79190012368582
Training MSE: 109.10136698634071, Column: 1
Training MSE: 108.13583641724789, Column: 2
Training MSE: 102.30662970774905, Column: 3
Training MSE: 45.60003264460982, Column: 4
Training MSE: 44.877148351027714, Column: 5
Lowest MSE for 2 features: 44.877148351027714
Training MSE: 28.94321706350117, Column: 2
Training MSE: 29.306382095054296, Column: 3
Training MSE: 43.05032819811991, Column: 4
Training MSE: 29.839557771498498, Column: 5
Lowest MSE for 3 features: 28.94321706350117
Training MSE: 26.703498055034814, Column: 3
Training MSE: 14.19990378140934, Column: 4
Training MSE: 28.924675042132726, Column: 5
Lowest MSE for 4 features: 14.19990378140934
Training MSE: 12.778051418804358, 

##### 1.2 Apply algorithm and use 3-fold cross validation

##### Results: 
Model 5 is the best model because it has the lowest generalization (validation) error for 
3-fold CV.

The most important feature according to the forward selection algorithm is feature 4 which is 
***Memory Config***.

In [2]:
linreg = LinearRegression(fit_intercept=False)  # column 1 is already added
kf = KFold(n_splits=3)
for idx, set in enumerate(data_sets):
    M_i = linreg.fit(X=set, y=y)
    score = cross_val_score(M_i, set, y, cv=kf, scoring='neg_mean_squared_error') * -1
    print(f"Model_{idx+1} 3-fold MSE: {score.mean()}")


Model_1 3-fold MSE: 347.09190366265057
Model_2 3-fold MSE: 124.65212014718725
Model_3 3-fold MSE: 100.3940261066494
Model_4 3-fold MSE: 46.93936905803836
Model_5 3-fold MSE: 34.19240753366419
Model_6 3-fold MSE: 42.68779923885409
