In [10]:
#import libraries
import pandas as pd
import numpy as np
import hvplot.pandas
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import max_error, median_absolute_error, mean_squared_error, r2_score, explained_variance_score
from sklearn.model_selection import GridSearchCV
pd.options.display.float_format

In [11]:
file = Path("Resources/clean_data_v.csv")
df = pd.read_csv(file, index_col='Unnamed: 0')

y = df['price'].values
X = df.drop(columns = 'price')

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train , X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=7)

In [12]:
#set up a results data frame

results = pd.DataFrame(index = ['max_error', 'median_absolute_error', 'mean_squared_error', 'r2_score', 'explained_variance_score'], 
                       columns = ['SVR_rbf', 'SVR_lin', 'SVR_poly' ])



In [13]:
svr_rbf = SVR(kernel='rbf' ) 
svr_lin = SVR(kernel='linear', gamma='auto')
svr_poly = SVR(kernel='poly',  gamma='auto', degree=3, epsilon=.1, coef0=1)

svr_rbf.fit(X_train, y_train)

y_pred = svr_rbf.predict(X_test)

results.loc['max_error','SVR_rbf'] = max_error(y_test, y_pred)
results.loc['median_absolute_error','SVR_rbf'] = median_absolute_error(y_test, y_pred)
results.loc['mean_squared_error','SVR_rbf'] = mean_squared_error(y_test, y_pred)
results.loc['r2_score','SVR_rbf'] = r2_score(y_test, y_pred)
results.loc['explained_variance_score','SVR_rbf'] = explained_variance_score(y_test, y_pred)




svr_lin.fit(X_train, y_train)

y_pred_lin = svr_lin.predict(X_test)

results.loc['max_error','SVR_lin'] = max_error(y_test, y_pred_lin)
results.loc['median_absolute_error','SVR_lin'] = median_absolute_error(y_test, y_pred_lin)
results.loc['mean_squared_error','SVR_lin'] = mean_squared_error(y_test, y_pred_lin)
results.loc['r2_score','SVR_lin'] = r2_score(y_test, y_pred_lin)
results.loc['explained_variance_score','SVR_lin'] = explained_variance_score(y_test, y_pred_lin)



svr_poly.fit(X_train, y_train)

y_pred_poly = svr_poly.predict(X_test)

results.loc['max_error','SVR_poly'] = max_error(y_test, y_pred_poly)
results.loc['median_absolute_error','SVR_poly'] = median_absolute_error(y_test, y_pred_poly)
results.loc['mean_squared_error','SVR_poly'] = mean_squared_error(y_test, y_pred_poly)
results.loc['r2_score','SVR_poly'] = r2_score(y_test, y_pred_poly)
results.loc['explained_variance_score','SVR_poly'] = explained_variance_score(y_test, y_pred_poly)

In [14]:
results

Unnamed: 0,SVR_rbf,SVR_lin,SVR_poly
max_error,15762.9,11508.9,19283.7
median_absolute_error,669.892,397.003,215.908
mean_squared_error,11029200.0,1988310.0,1715110.0
r2_score,0.301902,0.87415,0.891442
explained_variance_score,0.409356,0.881403,0.898785


In [None]:
from sklearn.metrics import make_scorer
scorer = make_scorer(mean_squared_error, greater_is_better=False)


parameters = [{'kernel': ['rbf'], 'gamma': [ 0.01, 0.1, 0.2, 0.5, 0.6],'C': [10, 100, 1000]}]
print("Tuning hyper-parameters")
svr_gs = GridSearchCV(SVR(epsilon = 0.01), parameters, cv = 3, scoring=scorer)
svr_gs.fit(X_train, y_train)

# Checking the score for all parameters
print("Grid scores on training set:")
means = svr_gs.cv_results_['mean_test_score']
stds = svr_gs.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, svr_gs.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"% (mean, std * 2, params))

Tuning hyper-parameters
