In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics as m 

In [2]:
advert=pd.read_csv("Advertising_sales-4.csv")

In [3]:
advert.corr()

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
Unnamed: 0,1.0,0.017715,-0.11068,-0.154944,-0.051616
TV,0.017715,1.0,0.054809,0.056648,0.782224
Radio,-0.11068,0.054809,1.0,0.354104,0.576223
Newspaper,-0.154944,0.056648,0.354104,1.0,0.228299
Sales,-0.051616,0.782224,0.576223,0.228299,1.0


In [4]:
x=advert[['TV','Radio','Newspaper']]

In [5]:
y=advert.Sales

In [6]:
from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x= st_x.fit_transform(x)

In [7]:
models=[]   # model[] is empty
scores=[]   #scores[] is empty

# GridSearchCV- RidgeRegression

In [8]:
parameters = {'alpha':[0.0001,0.001,0.01,0.1]}        
c1 = Ridge()
grid = GridSearchCV(c1, parameters, cv=10)
grid.fit(x,y)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)

models.append(c1)   #models[c1]
scores.append(grid.best_score_)   #scores[0.8853679599908248]

{'alpha': 0.1}
Ridge(alpha=0.1)
0.8853679599908248


# GridSearchCV- LassoRegression

In [9]:
parameters = {'alpha':[0.0001,0.001,0.01,0.1]}        
c2 = Lasso()
grid = GridSearchCV(c2, parameters, cv=10)
grid.fit(x,y)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)
models.append(c2)   #models[c1,c2]
scores.append(grid.best_score_)   #scores[0.8863844264592868]

{'alpha': 0.1}
Lasso(alpha=0.1)
0.8863844264592868


# GridSearchCV- DecisionTreeRegressor

In [10]:
parameters = {'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
              'splitter':['best', 'random']
              }                  
c3 = DecisionTreeRegressor()
grid = GridSearchCV(c3, parameters, cv=10)
grid.fit(x,y)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)

models.append(c3)   #models[c1]
scores.append(grid.best_score_)   #scores[0.8863844264592868]()



{'criterion': 'absolute_error', 'splitter': 'best'}
DecisionTreeRegressor(criterion='absolute_error')
0.9570272347905128


# GridSearchCV- Support Vector Regressor(SVR)

In [11]:
parameters = {'C': [0.01, 0.05],
              'degree': [2, 3],
              'gamma':[0.001, 0.01],
              'kernel': ['rbf']
              }        
c4 = SVR()
grid = GridSearchCV(c4, parameters, cv=10)
grid.fit(x,y)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)

models.append(c4)   #models[c1,c2,c3]
scores.append(grid.best_score_) 

{'C': 0.05, 'degree': 2, 'gamma': 0.01, 'kernel': 'rbf'}
SVR(C=0.05, degree=2, gamma=0.01)
-0.03169236078848042


# GrdidSearchCV- KNeighborsRegressor(KNN)

In [12]:
parameters = {'n_neighbors': range(30),
              'metric':['manhattan','euclidean']}        
c5 = KNeighborsRegressor()  
grid = GridSearchCV(c5, parameters, cv=10)
grid.fit(x,y)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)

models.append(c5)     #models[c1,c2,c3,c4]
scores.append(grid.best_score_) 

{'metric': 'manhattan', 'n_neighbors': 5}
KNeighborsRegressor(metric='manhattan')
0.9504601196854001


20 fits failed out of a total of 600.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/lib/python3.8/site-packages/sklearn/neighbors/_regression.py", line 213, in fit
    return self._fit(X, y)
  File "/opt/anaconda3/lib/python3.8/site-packages/sklearn/neighbors/_base.py", line 569, in _fit
    raise ValueError("Expected n_neighbors > 0. Got %d" % self.n_neighbors)
ValueError: Expected n_neighbors > 0. Got 0

 0.94507557 0.9384593  0.93519919 0.93224244 0.93196243 0.93108275

# GrdidSearchCV- RandomForestRegressor

In [13]:
parameters = {'n_estimators':range(1,20),
              'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson']}        
c6 = RandomForestRegressor()  
grid = GridSearchCV(c6, parameters, cv=10)
grid.fit(x,y)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)


models.append(c6)      #models[c1,c2,c3,c4,c5]
scores.append(grid.best_score_) 

{'criterion': 'friedman_mse', 'n_estimators': 17}
RandomForestRegressor(criterion='friedman_mse', n_estimators=17)
0.9790119427271111


# GridSearchCV- LinearRegression

In [14]:
parameters = {'n_jobs':['-1','1'],
              'fit_intercept':[True,False]}        
c7 = LinearRegression()  
grid = GridSearchCV(c7, parameters, cv=10)
grid.fit(x,y)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)


models.append(c7)      #models[c1,c2,c3,c4,c5]
scores.append(grid.best_score_) 

{'fit_intercept': True, 'n_jobs': '-1'}
LinearRegression(n_jobs='-1')
0.8853562237979616


In [15]:
result_data=pd.DataFrame({"Models":models,"Score":scores})

In [16]:
print(result_data)

                    Models     Score
0                  Ridge()  0.885368
1                  Lasso()  0.886384
2  DecisionTreeRegressor()  0.957027
3                    SVR() -0.031692
4    KNeighborsRegressor()  0.950460
5  RandomForestRegressor()  0.979012
6       LinearRegression()  0.885356
