# Hyperparameter Tuning

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge

sales_df = pd.read_csv("datasets/advertising_and_sales_clean.csv")
print(sales_df.head())

X = sales_df.drop(["sales", "influencer"], axis=1).values
y = sales_df["sales"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)

        tv     radio  social_media influencer      sales
0  16000.0   6566.23       2907.98       Mega   54732.76
1  13000.0   9237.76       2409.57       Mega   46677.90
2  41000.0  15886.45       2913.41       Mega  150177.83
3  83000.0  30020.03       6922.30       Mega  298246.34
4  15000.0   8437.41       1406.00      Micro   56594.18


## Grid Search CV

In [3]:
from sklearn.model_selection import GridSearchCV

# Create the hyperparameter grid
kf = KFold(n_splits=5, shuffle=True, random_state=5)
param_grid = {"alpha": np.arange(0.001, 1, 0.001)}

ridge = Ridge()
ridge_cv = GridSearchCV(ridge, param_grid,cv=kf)

ridge_cv.fit(X_train, y_train)
print(ridge_cv.best_params_)
print(ridge_cv.best_score_)

grid_score = ridge_cv.score(X_test, y_test)
print(grid_score)

{'alpha': 0.5750000000000001}
0.9989799026957102
0.9990124514317014


## Randomized Search CV

In [4]:
from sklearn.model_selection import RandomizedSearchCV

# Create the hyperparameter grid
kf = KFold(n_splits=5, shuffle=True, random_state=5)
param_grid = {"alpha": np.arange(0.001, 1, 0.001)}

ridge = Ridge()
ridge_cv = RandomizedSearchCV(ridge, param_grid, cv=kf, random_state=5)

ridge_cv.fit(X_train, y_train)
print(ridge_cv.best_params_)
print(ridge_cv.best_score_)

random_score = ridge_cv.score(X_test, y_test)
print(random_score)

{'alpha': 0.545}
0.9989799026957101
0.9990124514317015
