# 5. Regresia liniară, Ridge și Lasso

In [10]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
import numpy as np
import sys

In [11]:
def normalize_data(train_data, type=None):

    scaled_x_train = train_data

    if type == 'standard':
        scaler = StandardScaler()
        scaler.fit(train_data)
        scaled_x_train = scaler.transform(train_data)
    elif type == 'l1':
        scaled_x_train = train_data / np.expand_dims(np.sum(abs(train_data), axis=1), axis=1)
    elif type == 'l2':
        scaled_x_train = train_data / np.expand_dims(np.sqrt(np.sum(train_data ** 2, axis=1)), axis=1)

    return scaled_x_train

In [26]:
def training_pipeline(train_data, train_labels, test_data, test_labels, model):

    model.fit(train_data, train_labels)
    predicted_labels = model.predict(test_data)

    mse_value = mean_squared_error(test_labels, predicted_labels)
    mae_value = mean_absolute_error(test_labels, predicted_labels)

    return mse_value, mae_value

In [4]:
training_data = np.load('data/training_data.npy')
prices = np.load('data/prices.npy')

training_data, prices = shuffle(training_data, prices, random_state=0)

### Cross validation

In [20]:
num_samples = int(len(training_data) / 3)

normalized_train_data = normalize_data(training_data, 'standard')

train1 = normalized_train_data[:num_samples]
prices1 = prices[:num_samples]

train2 = normalized_train_data[num_samples : num_samples * 2]
prices2 = prices[num_samples : num_samples * 2]

train3 = normalized_train_data[2 * num_samples :]
prices3 = prices[2 * num_samples :]


In [33]:
linear_regression_model = LinearRegression()

mse1, mae1 = training_pipeline(np.concatenate((train1, train2)), np.concatenate((prices1, prices2)), train3, prices3, linear_regression_model)
mse2, mae2 = training_pipeline(np.concatenate((train1, train3)), np.concatenate((prices1, prices3)), train2, prices2, linear_regression_model)
mse3, mae3 = training_pipeline(np.concatenate((train2, train3)), np.concatenate((prices2, prices3)), train1, prices1, linear_regression_model)

print(f'Mean MSE: {(mse1 + mse2 + mse3) / 3}\nMean MAE: {(mae1 + mae2 + mae3) / 3}\n')

Mean MSE: 3.167414665222168
Mean MAE: 1.3203471501668294


In [38]:
best_alpha = 1
minimum_mean_MSE = sys.maxsize

for alphaValue in [1, 10, 100, 1000]:

    ridge_regression_model = Ridge(alpha=alphaValue)


    mse1, mae1 = training_pipeline(np.concatenate((train1, train2)), np.concatenate((prices1, prices2)), train3, prices3, ridge_regression_model)
    mse2, mae2 = training_pipeline(np.concatenate((train1, train3)), np.concatenate((prices1, prices3)), train2, prices2, ridge_regression_model)
    mse3, mae3 = training_pipeline(np.concatenate((train2, train3)), np.concatenate((prices2, prices3)), train1, prices1, ridge_regression_model)

    mean_MSE = (mse1 + mse2 + mse3) / 3
    mean_MAE = (mae1 + mae2 + mae3) / 3

    if mean_MSE < minimum_mean_MSE:
        best_alpha = alphaValue
        minimum_mean_MSE = mean_MSE

    print(f'------- Alpha {alphaValue} ------')
    print(f'Mean MSE: {mean_MSE}\nMean MAE: {mean_MAE}\n')

print(best_alpha)

------- Alpha 1 ------
Mean MSE: 3.1674696604410806
Mean MAE: 1.3195749918619792

------- Alpha 10 ------
Mean MSE: 3.167327562967936
Mean MAE: 1.3193677266438801

------- Alpha 100 ------
Mean MSE: 3.17224915822347
Mean MAE: 1.318570613861084

------- Alpha 1000 ------
Mean MSE: 3.432858149210612
Mean MAE: 1.366493860880534

10


In [39]:
model = Ridge(alpha=10)
model.fit(normalized_train_data, prices)
print(f'Coefs: {model.coef_}')
print(f'Bias: {model.intercept_}')
print(f'Most significant feature: {np.argmax(model.coef_) +1}')
print(f'Least significant feature: {np.argmin(model.coef_) +1}') 

Coefs: [ 1.6635172  -0.15532357 -0.46031442  0.40461838  1.3357221   0.13253653
 -0.0868261   0.          0.36664298 -0.36663988  0.          0.
 -0.229365    0.22936533]
Bias: 5.695129871368408
Most significant feature: 1
Least significant feature: 3
