# Read Data

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_predict, train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error,r2_score, make_scorer
from sklearn.linear_model import ElasticNet
import matplotlib.pyplot as plt
from numpy import arange
from pandas import read_csv
from sklearn.linear_model import MultiTaskElasticNetCV
from sklearn.model_selection import RepeatedKFold


In [None]:
def ReadFile(s):
    column=['Id','i1','i2','i3','i4','i5','i6','i7','i8','i9','i10','Y1','Y2','Y3']
    dataset=pd.read_csv(s,sep=",", names=column,skiprows=7)
    dataset.set_index('Id', inplace=True)
    return dataset

data=ReadFile("Dataset_Cup/ML-CUP23-TR.csv")

In [None]:
features=data.iloc[:,0:10]
targets=data.iloc[:,10:13]
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.25, random_state=42)

# Define function for MEE (Mean Eucledian Error) and print

In [None]:
def mean_euclidean_error(y_true, y_pred):
     errors= np.sqrt(np.sum((y_true - y_pred) ** 2, axis=1))
     return np.mean(errors)

In [None]:
def print_results(model, X_train, y_train, X_test, y_test):
# Split to print Train, validation and test
  X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

  model.fit(X_tr,y_tr)

  #Traininig
  predictions_test=model.predict(X_tr)
  r2_train = r2_score(y_tr, predictions_test)
  MEE_train=mean_euclidean_error(y_tr,predictions_test)
  MSE_train=mean_squared_error(y_tr, predictions_test)
  print(f"Score R² Training: {r2_train}")
  print(f"Score MEE Training: {MEE_train}")
  print(f"Score MSE Training: {MSE_train}")

  print("\n")

  #Validation
  predictions_test=model.predict(X_val)
  r2_val = r2_score(y_val, predictions_test)
  MEE_val=mean_euclidean_error(y_val,predictions_test)
  MSE_val=mean_squared_error(y_val, predictions_test)
  print(f"Score R² Validation: {r2_val}")
  print(f"Score MEE Validation: {MEE_val}")
  print(f"Score MSE Validation: {MSE_val}")

  print("\n")

  # Test
  predictions_test = model.predict(X_test)
  r2_test = r2_score(y_test, predictions_test)
  MEE_test=mean_euclidean_error(y_test,predictions_test)
  MSE_test=mean_squared_error(y_test, predictions_test)
  print(f"Score R² Test: {r2_test}")
  print(f"Score MEE Test: {MEE_test}")
  print(f"Score MSE Test: {MSE_test}")


# ElasticNet application

## Initial test with fixed hyperparameters

In [None]:
# Create and train the ElasticNet regression model
alpha = 0.1  # L1 regularization parameter
l1_ratio = 0.5  # Mixing parameter between L1 and L2 regularization
elastic_net_1 = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
elastic_net_1.fit(X_train, y_train)

y_pred = elastic_net_1.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mee = mean_euclidean_error(y_test, y_pred)

# # Print the model coefficients
# print("ElasticNet Coefficients:", elastic_net_1.coef_)
# print("Intercept:", elastic_net_1.intercept_)

Printing results

In [None]:
elastic_net_1 = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
print_results(elastic_net_1, X_train, y_train, X_test, y_test)

In [None]:
# Score R² Training: 0.9547931816284594
# Score MEE Training: 6.692036192113055
# Score MSE Training: 20.815623774474215


# Score R² Validation: 0.9571797782878969
# Score MEE Validation: 6.764854631297817
# Score MSE Validation: 20.579885097463862


# Score R² Test: 0.9545637482998135
# Score MEE Test: 6.780192014900597
# Score MSE Test: 22.207825323786604

Test with grid search + cross validation

In [None]:
# grid search hyperparameters for the elastic net
# load the dataset
# define model
model = ElasticNet()
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['alpha'] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0, 1.0, 10.0, 100.0]
grid['l1_ratio'] = arange(0, 1, 0.01)
# define search
search = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# perform the search
resultsMAE = search.fit(X_train, y_train)
# summarize
# print('MAE: %.3f' % resultsMAE.best_score_)
# print('Config: %s' % resultsMAE.best_params_)

#268.86 seconds
#params: alpa 0.01 l1_ratio:0.99

In [None]:
elastic_net_2 = resultsMAE.best_estimator_
print_results(elastic_net_2, X_train, y_train, X_test, y_test)


In [None]:
# Score R² Training: 0.962495086434597
# Score MEE Training: 6.105611159484347
# Score MSE Training: 18.051686330553537


# Score R² Validation: 0.9655414547518287
# Score MEE Validation: 6.243976710144644
# Score MSE Validation: 17.38034507309764


# Score R² Test: 0.9604116964227306
# Score MEE Test: 6.2972452453922765
# Score MSE Test: 19.371079092559786

Using MSE as metric

In [None]:
# grid search hyperparameters for the elastic net
# load the dataset
# define model
model = ElasticNet()
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['alpha'] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0, 1.0, 10.0, 100.0]
grid['l1_ratio'] = arange(0, 1, 0.01)
# define search
search = GridSearchCV(model, grid, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
# perform the search
resultsMSE = search.fit(X_train, y_train)
# summarize
# print('MSE: %.3f' % resultsMSE.best_score_)
# print('Config: %s' % resultsMSE.best_params_)

#254.36 seconds
#params alpha:0.001, l1:0.0

In [None]:
# Use the best model from the grid search
elastic_net_MSE = resultsMSE.best_estimator_
print_results(elastic_net_MSE, X_train, y_train, X_test, y_test)

In [None]:

# Score R² Training: 0.9625191542554449
# Score MEE Training: 6.106804360611419
# Score MSE Training: 18.048827258647616


# Score R² Validation: 0.9655258848986317
# Score MEE Validation: 6.249612319706419
# Score MSE Validation: 17.393419137094963


# Score R² Test: 0.9603150565532338
# Score MEE Test: 6.297340849273157
# Score MSE Test: 19.341945024913297

Grid Search with specific library for ElasticNet (Uses R^2 as metric)

In [None]:
# use automatically configured elastic net algorithm
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# define model
ratios = arange(0, 1, 0.01)
alphas = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0, 1.0, 10.0, 100.0]
model = MultiTaskElasticNetCV(l1_ratio=ratios, alphas=alphas, cv=cv, n_jobs=-1)

In [None]:
model.fit(X_train, y_train)
#111 seconds

In [None]:

# summarize chosen configuration
# print('alpha: %f' % model.alpha_)
# print('l1_ratio_: %f' % model.l1_ratio_)

# params
# alpha: 0.01
# l1_ratio_: 0.91

In [None]:
elastic_netLibrary = ElasticNet(alpha=model.alpha_, l1_ratio=model.l1_ratio_, random_state=42)
print_results(elastic_netLibrary, X_train, y_train, X_test, y_test)

In [None]:
# Score R² Training: 0.9624782198083132
# Score MEE Training: 6.107932564763391
# Score MSE Training: 18.056579885055363


# Score R² Validation: 0.9655356007546686
# Score MEE Validation: 6.244120916617933
# Score MSE Validation: 17.383572677145995


# Score R² Test: 0.9604744074115272
# Score MEE Test: 6.296618977387395
# Score MSE Test: 19.349389300052437