In [1]:
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV

abalone = fetch_ucirepo(id=1)
X = abalone.data.features
y = abalone.data.targets

In [2]:
X1= X.copy()
X1['Sex'].replace('M', 0, inplace = True)
X1['Sex'].replace('F', 1, inplace = True)
X1['Sex'].replace('I', 2, inplace = True)

In [3]:
scaler = StandardScaler()
X1 = scaler.fit_transform(X1)
y1 = y.values.ravel()
X_train, X_test, y_train, y_test = train_test_split(X1, y1, test_size=0.3, random_state=1)

In [6]:
svr = SVR()
svr.fit(X_train, y_train)
y_pred = svr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean squared error (default_svr): {mse}")

Mean squared error (default_svr): 4.756407810404979


In [5]:
param_grid = {
    'C': [1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': [0.1, 1],
    'degree': [1]
}

outer_kf = KFold(n_splits=10, shuffle=True, random_state=1)
inner_kf = KFold(n_splits=10, shuffle=True, random_state=1)
grid_search = GridSearchCV(SVR(), param_grid, cv=inner_kf, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X1, y1)
best_svr = grid_search.best_estimator_
nested_scores = cross_val_score(grid_search, X1, y1, cv=outer_kf, scoring='neg_mean_squared_error')

print(f"Mean squared error (Nested K-fold): {-nested_scores.mean()}")

best_svr.fit(X_train, y_train)
y_pred = best_svr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean squared error (best_svr): {mse}")

Mean squared error (Nested K-fold): 4.494939624809039
Mean squared error (best_svr): 4.526985451821141
