# Linear Model 2 Ridge + RidgeCV


In [1]:
import numpy as np 
import pandas as pd

In [2]:
df = pd.read_csv("Advertising.csv")

In [3]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   radio      200 non-null    float64
 2   newspaper  200 non-null    float64
 3   sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


# To increase the accuracy a polynomial features is used 

In [5]:
from sklearn.preprocessing import PolynomialFeatures

In [6]:
poly_converter = PolynomialFeatures(degree=4 ,include_bias=False)
X=df.drop('sales',axis=1)
y=df['sales']

In [7]:
polynomial_featured =poly_converter.fit_transform(X)

In [8]:
from sklearn.model_selection import train_test_split 

In [9]:
pX_train,pX_test , y_train,y_test = train_test_split(polynomial_featured,y,test_size=0.3,random_state=101)

In [10]:
from sklearn.linear_model import Ridge

In [11]:
from sklearn.preprocessing import StandardScaler

In [12]:
model = Ridge(alpha=10)
scaler = StandardScaler()
scaler.fit(pX_train)

StandardScaler()

In [13]:
pX_train = scaler.transform(pX_train)
pX_test = scaler.transform(pX_test)

In [14]:
model.fit(pX_train,y_train)

Ridge(alpha=10)

In [15]:
Rpred = model.predict(pX_test)

In [16]:
from sklearn.metrics import mean_squared_error , mean_absolute_error 

In [17]:
np.sqrt(mean_squared_error(y_test,Rpred))

0.9160353702370551

In [18]:
mean_absolute_error(y_test,Rpred)

0.6079107187126626

# Ridge 

In [19]:
from sklearn.linear_model import RidgeCV

In [20]:
modelrcv= RidgeCV(alphas=(0.1,1.0,10))

In [21]:
modelrcv.fit(pX_train,y_train)
Rcvpred = modelrcv.predict(pX_test)

In [22]:
np.sqrt(mean_squared_error(y_test,Rcvpred))

0.6242818489681624

In [23]:
mean_absolute_error(y_test,Rcvpred)

0.42422716289459417

In [24]:
# to find the pest alpha value
modelrcv.alpha_

0.1

In [25]:
# With RidgeCV the results is better 

In [26]:
modelrcv.coef_

array([ 5.07665809,  0.69581004,  0.3719049 , -4.01391514,  3.8488346 ,
       -0.56376907, -0.84743483,  0.48029363, -0.43721227, -0.46487399,
       -1.74834461, -0.84928794,  2.18893967, -0.30026264,  0.41337395,
       -0.68655586,  0.28318356, -0.51573875,  0.87590086,  1.10213681,
        1.53908841,  0.88777377, -2.14518671,  0.23163482,  0.52256028,
        0.60556143, -0.37374377,  0.22760633, -0.78495661,  0.4796754 ,
       -0.07194254, -0.07792478,  0.12231745, -0.3446115 ])

In [27]:
model.coef_

array([ 1.99444162,  0.63522105,  0.0052737 ,  0.37599236,  1.67014155,
        0.36018879,  0.05348105,  0.2031058 , -0.05596467, -0.22530702,
        0.49691819, -0.1863154 ,  0.90236119,  0.29966359,  0.07309324,
       -0.16156678, -0.02552639,  0.03503848, -0.02780858, -0.44022449,
       -0.03254158, -0.34956696,  0.0172993 , -0.18691283, -0.15906923,
        0.45349576,  0.0098378 ,  0.09218193,  0.04638876, -0.21778765,
       -0.14881337, -0.07961385,  0.01444889,  0.01302114])

In [28]:
# There is alot of error masurement and can be found by
from sklearn.metrics import SCORERS

In [29]:
SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_wei

In [30]:
# All these mesurement readed as that ( as higher value as better ) not like the before tools 

# Lasso

In [31]:
# Lasso (if there are no entry for alpha then automatically will provided )
# eps = alpha min / alpha max

In [32]:
from sklearn.linear_model import LassoCV

In [33]:
lassocv_model = LassoCV(eps=0.001, n_alphas=100, cv=5 )
# all these is defualt values 
# if there is a worning appear , then max_iter can be increased (defualt = 1000)

In [34]:
lassocv_model.fit(pX_train,y_train)

LassoCV(cv=5)

In [35]:
# To find out the alpha value 
lassocv_model.alpha_

0.004943070909225827

In [36]:
lassocv_pred = lassocv_model.predict(pX_test)

In [37]:
np.sqrt(mean_squared_error(y_test,lassocv_pred))

0.589236408649863

In [38]:
mean_absolute_error(y_test,lassocv_pred)

0.42009796286848056

In [39]:
lassocv_model.coef_

array([ 5.06722606e+00,  1.37480166e-01,  2.64113607e-01, -4.66509073e+00,
        4.48924376e+00, -2.84145793e-01, -0.00000000e+00,  3.46812297e-02,
       -0.00000000e+00,  0.00000000e+00, -7.42380716e-01, -0.00000000e+00,
        0.00000000e+00, -4.05258140e-02, -0.00000000e+00, -0.00000000e+00,
        0.00000000e+00, -0.00000000e+00, -0.00000000e+00,  1.29109806e+00,
       -0.00000000e+00,  4.87012825e-02, -0.00000000e+00, -0.00000000e+00,
       -0.00000000e+00,  8.80333598e-02, -0.00000000e+00, -0.00000000e+00,
       -4.92565307e-03,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,
       -0.00000000e+00, -0.00000000e+00])

In [40]:
# LassoCV usually  not giving results as good as RidgeCv but it have many coef. equal to zero which give less complex


# ElasticNetCV

In [41]:
from sklearn.linear_model import ElasticNetCV

In [42]:
elasticcv_model = ElasticNetCV(l1_ratio =[ 0.1, 0.5,0.7,0.9,0.9,0.95,0.99,1],eps=0.001, n_alphas=100 , max_iter=1000000)
# l1_ratio = ratio of the model toward RadgeCV or LassoCV --> as increased it will directed toward LassoCV

In [43]:
elasticcv_model.fit(pX_train,y_train)

ElasticNetCV(l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.9, 0.95, 0.99, 1],
             max_iter=1000000)

In [44]:
elasticcv_model.l1_ratio_

1.0

In [45]:
pred_elasticcv = elasticcv_model.predict(pX_test)

In [46]:
np.sqrt(mean_squared_error(y_test,pred_elasticcv))

0.589236408649863

In [47]:
mean_absolute_error(y_test,pred_elasticcv)

0.42009796286848056

In [48]:
elasticcv_model.coef_

array([ 5.06722606e+00,  1.37480166e-01,  2.64113607e-01, -4.66509073e+00,
        4.48924376e+00, -2.84145793e-01, -0.00000000e+00,  3.46812297e-02,
       -0.00000000e+00,  0.00000000e+00, -7.42380716e-01, -0.00000000e+00,
        0.00000000e+00, -4.05258140e-02, -0.00000000e+00, -0.00000000e+00,
        0.00000000e+00, -0.00000000e+00, -0.00000000e+00,  1.29109806e+00,
       -0.00000000e+00,  4.87012825e-02, -0.00000000e+00, -0.00000000e+00,
       -0.00000000e+00,  8.80333598e-02, -0.00000000e+00, -0.00000000e+00,
       -4.92565307e-03,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,
       -0.00000000e+00, -0.00000000e+00])