# Hyperparameter Tuning

## Imports

In [1]:
import pickle
import pandas as pd
from sklearn import svm

from cross_validation import custom_cross_validation, hyperparameter_search

## Load DataFrame

In [2]:
dirname = '../data/processed/'
df = pd.read_csv(dirname + 'housing_data_2_trimmed.csv')

# Drop non-numeric features, except 'postal_code'
df = df.drop(columns=['city', 'state', 'sold_date'])
df.shape

(5643, 51)

In [3]:
train_validate_folds = custom_cross_validation(df, 5)

In [4]:
best_model = hyperparameter_search(
    train_validate_folds[0], 
    train_validate_folds[1],
    param_grid={
        'C': [250_000, 500_000],
        'gamma': [10, 50]
    }
)
best_model

{'C': 500000, 'gamma': 10, 'score': 0.8058787487874819}

Next, we select the best SVM model that we found and pickle the model to an external file.

In [5]:
model = svm.SVR(
    kernel='rbf',
    C=best_model['C'],
    gamma=best_model['gamma'],
    epsilon=1.0
)

dirname = '../models/'
basename = 'best_svm.pkl'
with open(dirname + basename, 'wb') as f:
    pickle.dump(model, f)