In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('../../data/advertising_dataset_regression.csv')

In [3]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,112.4,2.6,16.9,8.8
1,285.2,26.6,27.9,19.9
2,219.6,27.0,17.7,21.6
3,179.6,31.9,8.9,19.1
4,46.8,36.3,12.1,14.5


In [4]:
X = df.drop('sales', axis=1)
y = df['sales']

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
scaler = StandardScaler()

In [9]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
from sklearn.linear_model import ElasticNet

In [11]:
base_elasticnet_model = ElasticNet()

In [12]:
parameter_grid = {'alpha':[0.1, 1, 5, 50, 100],
                  'l1_ratio':[0.1, 0.5, 0.7, 0.95, 0.99, 1]}

In [13]:
from sklearn.model_selection import GridSearchCV

In [14]:
grid_model = GridSearchCV(estimator = base_elasticnet_model, param_grid = parameter_grid, scoring='neg_mean_squared_error', cv=5, verbose=2)

In [15]:
grid_model.fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.7; total time=   0.0s
[CV] END ............................alpha=0.1,

In [16]:
grid_model.best_estimator_

In [17]:
grid_model.best_params_

{'alpha': 0.1, 'l1_ratio': 1}

In [18]:
grid_model.cv_results_

{'mean_fit_time': array([0.00910029, 0.00083823, 0.00079064, 0.00086107, 0.00087895,
        0.00077519, 0.00078135, 0.00070095, 0.00097265, 0.00086713,
        0.00074029, 0.00074744, 0.0008739 , 0.00083547, 0.00074582,
        0.00076852, 0.00073228, 0.00086684, 0.0008275 , 0.00073609,
        0.00072279, 0.00078731, 0.00086451, 0.00073476, 0.00072904,
        0.00072203, 0.00085416, 0.00096598, 0.00088544, 0.00092916]),
 'std_fit_time': array([1.59729667e-02, 1.20331763e-04, 2.10254369e-05, 7.77341977e-05,
        1.09639225e-04, 6.06079724e-05, 6.57953036e-05, 9.71039974e-06,
        1.64142740e-04, 9.83570701e-05, 2.43929123e-05, 1.62229784e-05,
        1.76160233e-04, 8.77796727e-05, 3.25957176e-05, 7.38127287e-05,
        8.41938341e-06, 1.17084973e-04, 1.18916189e-04, 1.50143899e-05,
        5.07467659e-06, 8.81521414e-05, 8.89199663e-05, 2.10140789e-05,
        1.68164721e-05, 7.57045630e-06, 1.24974334e-04, 1.61942815e-04,
        1.25344282e-04, 1.18283530e-04]),
 'mean_scor

In [20]:
results = pd.DataFrame(grid_model.cv_results_)

In [21]:
results.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.0091,0.015973,0.000649,0.000197,0.1,0.1,"{'alpha': 0.1, 'l1_ratio': 0.1}",-5.35125,-4.4014,-2.943594,-2.621422,-5.354233,-4.13438,1.161647,6
1,0.000838,0.00012,0.00047,1.2e-05,0.1,0.5,"{'alpha': 0.1, 'l1_ratio': 0.5}",-5.12202,-4.17795,-2.936291,-2.653581,-5.208929,-4.019754,1.06719,5
2,0.000791,2.1e-05,0.000487,5.4e-05,0.1,0.7,"{'alpha': 0.1, 'l1_ratio': 0.7}",-5.032514,-4.075753,-2.959047,-2.681846,-5.143003,-3.978433,1.019516,4
3,0.000861,7.8e-05,0.000491,3.8e-05,0.1,0.95,"{'alpha': 0.1, 'l1_ratio': 0.95}",-4.949762,-3.959094,-3.017035,-2.730438,-5.068289,-3.944923,0.95998,3
4,0.000879,0.00011,0.000532,9.4e-05,0.1,0.99,"{'alpha': 0.1, 'l1_ratio': 0.99}",-4.939913,-3.941719,-3.029687,-2.739703,-5.057222,-3.941649,0.950514,2


In [22]:
y_pred = grid_model.predict(X_test)

In [23]:
from sklearn.metrics import mean_squared_error

In [24]:
mean_squared_error(y_test, y_pred)

3.7865573526818177