In [15]:
import numpy as np 
from sklearn.utils import shuffle 

# load training data 
data = np.load('data_lab6/data/training_data.npy') 
prices = np.load('data_lab6/data/prices.npy')

# shuffle 
data, prices = shuffle(data, prices, random_state=0)

In [16]:
# 1
def normalize(training_data, testing_data):
    mean_training_data = np.mean(training_data, axis=0)
    std_training_data = np.std(training_data, axis=0)
    std_training_data = np.add(std_training_data, 1e-10)
    normalized_training_data = np.divide(np.subtract(training_data, mean_training_data), std_training_data)
    mean_testing_data = np.mean(testing_data, axis=0)
    std_testing_data = np.std(testing_data, axis=0)
    std_testing_data = np.add(std_testing_data, 1e-10)
    normalized_testing_data = np.divide(np.subtract(testing_data, mean_testing_data), std_testing_data)
    return normalized_training_data, normalized_testing_data

In [17]:
# split data into training and testing data
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)

In [18]:
# 2
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

linear_regression_model = LinearRegression()

average_mse = 0
average_mae = 0

for train_index, test_index in kf.split(data):
    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = prices[train_index], prices[test_index]
    X_train, X_test = normalize(X_train, X_test)
    linear_regression_model.fit(X_train, y_train)
    y_pred = linear_regression_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    average_mse += mse
    average_mae += mae
    
average_mse /= 3
average_mae /= 3
print(f'Average MSE: {average_mse}, Average MAE: {average_mae}')

Average MSE: 3.1793898741404214, Average MAE: 1.3224879105885823


In [19]:
# 3
from sklearn.linear_model import Ridge

alpha_values = [1, 10, 100, 1000]

for alpha in alpha_values:
    ridge_regression_model = Ridge(alpha=alpha)
    average_mse = 0
    average_mae = 0
    
    for train_index, test_index in kf.split(data):
        X_train, X_test = data[train_index], data[test_index]
        y_train, y_test = prices[train_index], prices[test_index]
        X_train, X_test = normalize(X_train, X_test)
        ridge_regression_model.fit(X_train, y_train)
        y_pred = ridge_regression_model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        average_mse += mse
        average_mae += mae
        
    average_mse /= 3
    average_mae /= 3
    print(f'Alpha: {alpha}, Average MSE: {average_mse}, Average MAE: {average_mae}')

Alpha: 1, Average MSE: 3.179354429244995, Average MAE: 1.3224618037541707
Alpha: 10, Average MSE: 3.179126739501953, Average MAE: 1.3222633600234985
Alpha: 100, Average MSE: 3.183377265930176, Average MAE: 1.3216320276260376
Alpha: 1000, Average MSE: 3.441384236017863, Average MAE: 1.368357261021932


In [23]:
# 4
ridge_regression_model = Ridge(alpha=10)
data, _ = normalize(data, [])
ridge_regression_model.fit(data, prices)

coefficients = ridge_regression_model.coef_
bias = ridge_regression_model.intercept_

print(f'Coefficients: {coefficients}')
print(f'Bias: {bias}')

abs_coefficients = np.abs(coefficients)
sorted_indices = np.argsort(abs_coefficients)[::-1]
print('Most significant feature:', sorted_indices[0] + 1)
print('Second most significant feature:', sorted_indices[1] + 1)
print('Least significant feature:', sorted_indices[-1] + 1)

Coefficients: [ 1.6635175  -0.15533401 -0.46034163  0.40463796  1.3356767   0.13251601
 -0.08683655  0.          0.36667582 -0.36666915  0.          0.
 -0.22941917  0.22933263]
Bias: 5.69512939453125
Most significant feature: 1
Second most significant feature: 5
Least significant feature: 8


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
