In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('AmesHousing_final.csv')

In [3]:
X = df.drop('SalePrice', axis=1)
y = df['SalePrice']

### Train Test Split

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

### Standard Scaler

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Support Vector Regression

In [8]:
from sklearn.svm import SVR, LinearSVR

Try with base model (default hyperparameters: C = 1). Decreasing C corresponds to more regularization.

In [17]:
help(SVR())

Help on SVR in module sklearn.svm._classes object:

class SVR(sklearn.base.RegressorMixin, sklearn.svm._base.BaseLibSVM)
 |  SVR(*, kernel='rbf', degree=3, gamma='scale', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1)
 |  
 |  Epsilon-Support Vector Regression.
 |  
 |  The free parameters in the model are C and epsilon.
 |  
 |  The implementation is based on libsvm. The fit time complexity
 |  is more than quadratic with the number of samples which makes it hard
 |  to scale to datasets with more than a couple of 10000 samples. For large
 |  datasets consider using :class:`sklearn.svm.LinearSVR` or
 |  :class:`sklearn.linear_model.SGDRegressor` instead, possibly after a
 |  :class:`sklearn.kernel_approximation.Nystroem` transformer.
 |  
 |  Read more in the :ref:`User Guide <svm_regression>`.
 |  
 |  Parameters
 |  ----------
 |  kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'
 |       Specifies the 

In [9]:
svr_model = SVR()

In [10]:
svr_model.fit(X_train, y_train)

SVR()

In [11]:
svr_preds = svr_model.predict(X_test)

### Base Model Evaluation

In [12]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

Base model performs poorly. Try grid search to attain better model.

In [13]:
mean_absolute_error(y_test, svr_preds)

56159.390862290325

In [14]:
np.sqrt(mean_squared_error(y_test, svr_preds))

81280.45930158136

In [15]:
y_test.mean()

179501.51535836176

### Grid Search Cross Validation

In [16]:
from sklearn.model_selection import GridSearchCV

In [19]:
param_grid = {'C':[0.001,0.01,0.1,0.5,1],
             'kernel':['linear','rbf','poly'],
              'gamma':['scale','auto'],
              'degree':[2,3,4],
              'epsilon':[0,0.01,0.1,0.5,1,2]}

In [20]:
svr = SVR()
grid_model = GridSearchCV(estimator=svr, param_grid=param_grid)

In [22]:
grid_model.fit(X_train, y_train)

GridSearchCV(estimator=SVR(),
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1], 'degree': [2, 3, 4],
                         'epsilon': [0, 0.01, 0.1, 0.5, 1, 2],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']})

In [24]:
grid_model.best_params_

{'C': 1, 'degree': 2, 'epsilon': 0, 'gamma': 'scale', 'kernel': 'linear'}

In [25]:
grid_preds = grid_model.predict(X_test)

In [26]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [27]:
mean_absolute_error(y_test, grid_preds)

35133.290327241855

In [28]:
np.sqrt(mean_squared_error(y_test, grid_preds))

59116.62483533184

While performing much better than base SVR model, the grid search model still underperforms the Elastic Net model.