# Import Libraries and Dataset

This notebook is using the auto insurance dataset and involves predicting the total amount in claims (thousands of Swedish Kronor) given the number of claims for differing geographical regions.

In [1]:
import pandas as pd
from scipy.stats import loguniform
from sklearn.linear_model import Ridge
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import RandomizedSearchCV

url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/auto-insurance.csv'
df = pd.read_csv(url, header=None)

In [3]:
# split dataset into features and labels
X, y = df.values[:, :-1], df.values[:, -1]

# Define Model

In [6]:
# we will be training a RidgeRegression model to predict the claims
model = Ridge()

## RandomSearch for Hyperparameter Tuning

In [None]:
# first define the evaluations
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# define the search space
space = dict()
space['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
space['alpha'] = loguniform(1e-5, 100)
space['fit_intercept'] = [True, False]
space['normalize'] = [True, False]

# then define the search
search = RandomizedSearchCV(model,
                           space,
                           n_iter=500,
                           scoring='neg_mean_absolute_error',
                           n_jobs=-1,
                           cv=cv,
                           random_state=1)