In [1]:
from threading import get_ident

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('../../DATA/Advertising.csv')

In [3]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
X = df.drop('sales', axis=1)

In [5]:
y = df['sales']

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
scaler = StandardScaler()

In [10]:
scaler.fit(X_train, y_train)

In [11]:
scaled_X_train = scaler.fit_transform(X_train)

In [12]:
scaled_X_test = scaler.fit(X_test)

In [13]:
from sklearn.linear_model import ElasticNet

In [14]:
base_elastic_model = ElasticNet()

In [15]:
param_grid = {
    'alpha': [0.1, 1, 5, 10, 50, 100],
    'l1_ratio': [.1, .5, .7, .9, .95, .99, 1]
}

In [16]:
from sklearn.model_selection import GridSearchCV

In [17]:
grid_model = GridSearchCV(
    estimator=base_elastic_model,
    param_grid=param_grid,
    n_jobs=-1,
    scoring='neg_mean_squared_error',
    cv=5,
    verbose=2,
)

In [18]:
grid_model.fit(X_train, y_train)

Fitting 5 folds for each of 42 candidates, totalling 210 fits


In [19]:
grid_model.best_estimator_

In [20]:
grid_model.best_params_

{'alpha': 5, 'l1_ratio': 0.5}

In [22]:
y_pred = grid_model.predict(X_test)

In [24]:
y_pred

array([15.4095113 , 19.53355871, 11.54088067, 16.69266165,  9.17231854,
        7.13757738, 20.18893265, 17.26482361, 10.06282362, 19.07712098,
       12.19334511, 13.98676834, 13.97635539, 20.93501503, 18.26762922,
        9.96837394, 15.7955504 ,  7.99162344,  7.73331946, 20.19380089,
        7.85988809, 18.01929464, 24.31813657, 22.54529234,  8.17839859,
       12.91870851, 21.37223179,  8.2300128 , 12.59781481, 12.24239666,
       10.65678831, 19.0981798 , 10.31790056,  6.84434676, 17.17666064,
        8.07575689,  9.42310188,  8.53390137, 10.43168003, 10.90979259,
       12.8752984 , 10.06203325, 10.34551019,  8.10270215, 11.53527041,
       10.13613175,  8.93337872, 16.34754064, 13.41324646, 20.48349901,
       12.30851888, 14.15679545, 17.71362678, 11.26237379, 12.83170693,
        5.69128547, 22.98583286, 12.39928078, 18.78672222, 15.19549754])

In [25]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [26]:
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)

In [27]:
print(f'MAE: {MAE}, MSE: {MSE}, RMSE: {RMSE}')

MAE: 1.2091998053856274, MSE: 2.369712034469649, RMSE: 1.5393869021365776
