## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV


## Import Dataset

In [2]:
df_pen = sns.load_dataset('penguins')

In [3]:
df_pen = df_pen.sample(frac=1).reset_index(drop=True)

In [4]:
X = df_pen.drop(['body_mass_g'], axis = 1)
y = df_pen['body_mass_g']

## Data Pre-processing

In [5]:
categorical_x = ['species', 'island', 'sex']
numerical_x = X.drop(categorical_x, axis = 1).columns

In [6]:
## If y is categorical:
# y.fillna(y.mode(), inplace= True)
##If y is numerical
y.fillna(y.mean(), inplace= True)
for i in numerical_x:
    X[i].fillna(X[i].mean(), inplace = True)

for i in categorical_x:
    X[i].fillna(X[i].mode().iloc[0], inplace = True)
    
categoricas = pd.get_dummies(X[categorical_x], drop_first=True)
X = pd.concat([categoricas, X[numerical_x]], axis = 1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=45)

In [8]:
escalador = StandardScaler()
escalador.fit(X_train)

X_train = escalador.transform(X_train)
X_test = escalador.transform(X_test)

## Model implementation

In [9]:
base_model = SVR()

In [10]:
base_model.fit(X_train,y_train)

SVR()

In [11]:
base_preds = base_model.predict(X_test)

### Evaluation

In [12]:
print(f'Mean Absolute Error: {metrics.mean_absolute_error(y_test, base_preds)}')
print(f'Mean Squared Error: {metrics.mean_squared_error(y_test, base_preds)}')
print(f'Root Mean Squared Error: {np.sqrt(metrics.mean_squared_error(y_test, base_preds))}')

Mean Absolute Error: 667.8253728804834
Mean Squared Error: 660690.06728429
Root Mean Squared Error: 812.8284365622859


## Grid Search

In [13]:
parametros = {'C':[0.001,0.01,0.1,0.5,1],
             'kernel':['linear','rbf','poly'],
              'gamma':['scale','auto'],
              'degree':[1,2,3,4],
              'epsilon':[0,0.01,0.1,0.5,1,2]}

In [14]:
svr = SVR()
grid = GridSearchCV(svr,param_grid=parametros, scoring = 'neg_mean_squared_error')

In [15]:
grid.fit(X_train,y_train)

GridSearchCV(estimator=SVR(),
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1],
                         'degree': [1, 2, 3, 4],
                         'epsilon': [0, 0.01, 0.1, 0.5, 1, 2],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']},
             scoring='neg_mean_squared_error')

In [16]:
grid.best_params_

{'C': 1, 'degree': 1, 'epsilon': 0.5, 'gamma': 'scale', 'kernel': 'linear'}

In [17]:
grid_preds = grid.predict(X_test)

### Evaluation

In [18]:
print(f'Mean Absolute Error: {metrics.mean_absolute_error(y_test, grid_preds)}')
print(f'Mean Squared Error: {metrics.mean_squared_error(y_test, grid_preds)}')
print(f'Root Mean Squared Error: {np.sqrt(metrics.mean_squared_error(y_test, grid_preds))}')

Mean Absolute Error: 383.99742267398824
Mean Squared Error: 215296.67087939047
Root Mean Squared Error: 464.00072292981446
