# **Support Vector Regression on dataset CUP**




In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_predict, train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error,r2_score, make_scorer
from sklearn.model_selection import cross_val_score, KFold


# Read the Dataset

In [None]:
def ReadFile(s):
    column=['Id','i1','i2','i3','i4','i5','i6','i7','i8','i9','i10','Y1','Y2','Y3']
    dataset=pd.read_csv(s,sep=",", names=column,skiprows=7)
    dataset.set_index('Id', inplace=True)
    return dataset

data=ReadFile("Dataset_Cup/ML-CUP23-TR.csv")

In [None]:
features=data.iloc[:,0:10]
targets=data.iloc[:,10:13]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

# Define function for MEE (Mean Eucledian Error)

In [None]:
def mean_euclidean_error(y_true, y_pred):
     errors= np.sqrt(np.sum((y_true - y_pred) ** 2, axis=1))
     return np.mean(errors)

In [None]:
scoring=make_scorer(mean_euclidean_error,greater_is_better=False)

#Classic
Results are mean after cross validation

In [None]:

svr = SVR()
multioutput_regressor = MultiOutputRegressor(svr)
num_folds = 5
kf = KFold(n_splits=num_folds)

# Lists to store scores for each fold
r2_scores = []
mse_scores = []
mee_scores = []

for train_index, test_index in kf.split(X_train):
    X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[test_index]

    multioutput_regressor.fit(X_train_fold, y_train_fold)
    predictions_val_fold = multioutput_regressor.predict(X_val_fold)

    r2_fold = r2_score(y_val_fold, predictions_val_fold)
    mse_fold = mean_squared_error(y_val_fold, predictions_val_fold)
    mee_fold = mean_euclidean_error(y_val_fold, predictions_val_fold)

    r2_scores.append(r2_fold)
    mse_scores.append(mse_fold)
    mee_scores.append(mee_fold)

    print(f"Fold R2 Score: {r2_fold}, MSE: {mse_fold}, MEE: {mee_fold}")

# Calculate and print the mean scores
mean_r2 = sum(r2_scores) / num_folds
mean_mse = sum(mse_scores) / num_folds
mean_mee = sum(mee_scores) / num_folds

print(f"\nMean R2 Score: {mean_r2}, Mean MSE: {mean_mse}, Mean MEE: {mean_mee}")

In [None]:
svr_CV_results = [mean_r2, mean_mee, mean_mse]

# Grid Search + Cross validation

## 2 parameters

In [None]:

svr = SVR()
multioutput_regressor = MultiOutputRegressor(svr)

#parameters
param_grid = {
    'estimator__kernel': ['linear', 'rbf'],
    'estimator__C': [1, 10, 100]
}

grid_search = GridSearchCV(multioutput_regressor, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Best model
best_estimator = grid_search.best_estimator_

fit + print


In [None]:
# Split to print Train, validation and test
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

best_estimator.fit(X_tr,y_tr)

#Traininig
predictions_test=best_estimator.predict(X_tr)
r2_train = r2_score(y_tr, predictions_test)
MEE_train=mean_euclidean_error(y_tr,predictions_test)
MSE_train=mean_squared_error(y_tr, predictions_test)
print(f"Score R² Training: {r2_train}")
print(f"Score MEE Training: {MEE_train}")
print(f"Score MSE Training: {MSE_train}")

print("\n")

#Validation
predictions_test=best_estimator.predict(X_val)
r2_val = r2_score(y_val, predictions_test)
MEE_val=mean_euclidean_error(y_val,predictions_test)
MSE_val=mean_squared_error(y_val, predictions_test)
print(f"Score R² Validation: {r2_val}")
print(f"Score MEE Validation: {MEE_val}")
print(f"Score MSE Validation: {MSE_val}")

print("\n")

# Test
predictions_test = best_estimator.predict(X_test)
r2_test = r2_score(y_test, predictions_test)
MEE_test=mean_euclidean_error(y_test,predictions_test)
MSE_test=mean_squared_error(y_test, predictions_test)
print(f"Score R² rispetto ai dati di test: {r2_test}")
print(f"Score MEE rispetto ai dati di test: {MEE_test}")
print(f"Score MSE rispetto ai dati di test: {MSE_test}")


In [None]:
svr_GSCV2_results = [r2_test, MSE_test, MEE_test]

## 4 parameters

In [None]:
svr = SVR()
multioutput_regressor = MultiOutputRegressor(svr)

#parameters
param_grid2 = {
    'estimator__C': [100, 250, 500],
    'estimator__kernel': ['linear', 'rbf'],
    'estimator__gamma': [0.1, 1, 'scale'],
    'estimator__epsilon': [0.1, 0.2]
}


# Cerca i migliori parametri usando la GridSearchCV con cross-validation
grid_search = GridSearchCV(multioutput_regressor, param_grid2, cv=5)
grid_search.fit(X_train, y_train)

# Prendi il miglior modello trovato dalla GridSearchCV
best_estimator2 = grid_search.best_estimator_

In [None]:
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

best_estimator2.fit(X_tr,y_tr)

#Traininig
predictions_test=best_estimator2.predict(X_tr)
r2_train = r2_score(y_tr, predictions_test)
MEE_train=mean_euclidean_error(y_tr,predictions_test)
MSE_train=mean_squared_error(y_tr, predictions_test)
print(f"Score R² Training: {r2_train}")
print(f"Score MEE Training: {MEE_train}")
print(f"Score MSE Training: {MSE_train}")

print("\n")

#Validation
predictions_test=best_estimator2.predict(X_val)
r2_val = r2_score(y_val, predictions_test)
MEE_val=mean_euclidean_error(y_val,predictions_test)
MSE_val=mean_squared_error(y_val, predictions_test)
print(f"Score R² Validation: {r2_val}")
print(f"Score MEE Validation: {MEE_val}")
print(f"Score MSE Validation: {MSE_val}")

print("\n")

# Test
predictions_test = best_estimator2.predict(X_test)
r2_test = r2_score(y_test, predictions_test)
MEE_test=mean_euclidean_error(y_test,predictions_test)
MSE_test=mean_squared_error(y_test, predictions_test)
print(f"Score R² test: {r2_test}")
print(f"Score MEE test: {MEE_test}")
print(f"Score MSE test: {MSE_test}")

In [None]:
svr_GSCV4_results = [r2_test, MSE_test, MEE_test]

# Graphics

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

labels = ['R²', 'MSE', 'MEE']

sns.set_palette("Set2")

bar_width = 0.2
index = range(len(labels))

fig, ax = plt.subplots(figsize=(8, 5))

bar1 = ax.bar(index, svr_CV_results, bar_width, label='CV', color=sns.color_palette()[0])
bar2 = ax.bar([i + bar_width for i in index], svr_GSCV2_results, bar_width, label='GSCV - 2 params', color=sns.color_palette()[1])
bar3 = ax.bar([i + 2* bar_width for i in index], svr_GSCV4_results, bar_width, label='GSCV - 4 params', color=sns.color_palette()[2])

ax.set_xlabel('Metrics')
ax.set_ylabel('Values')
ax.set_title('Comparing different settings of SVR among Test')
ax.set_xticks([i + bar_width for i in index])
ax.set_xticklabels(labels)
ax.legend()

# Add values above the bars
for bar, value in zip(bar1 + bar2 + bar3, svr_CV_results + svr_GSCV2_results + svr_GSCV4_results):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width() / 2, height, f'{round(value, 3)}', ha='center', va='bottom', fontsize=10)

ax.grid(axis='both', linestyle='--', alpha=0.7)

ax.set_yticks(range(int(ax.get_ylim()[0]), int(ax.get_ylim()[1]) + 1, 1))

plt.show()
