In [7]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
df = pd.read_csv('../data/Advertising.csv')
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [9]:
X = df.drop('sales', axis=1)
y = df['sales']

In [10]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

polynomial_converter = PolynomialFeatures(degree=3, include_bias=False)
polynomial_features = polynomial_converter.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(polynomial_features, y, test_size=0.3, random_state=42)

In [11]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [12]:
# A, B, C --> A B C A2 AB

In [13]:
from sklearn.linear_model import LassoCV


lasso_cv_model = LassoCV(eps=0.1, n_alphas=100)
lasso_cv_model.fit(scaled_X_train, y_train)

test_predictions = lasso_cv_model.predict(scaled_X_test)
lasso_cv_model.coef_

array([0.97675148, 0.        , 0.        , 0.        , 3.8148913 ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        ])

In [14]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

MAE = mean_absolute_error(y_test, test_predictions)
MSE = mean_squared_error(y_test, test_predictions)
RMSE = np.sqrt(MSE)

print(f'MAE: {MAE}')
print(f'MSE: {MSE}')
print(f'RMSE: {RMSE}')

MAE: 0.6811456342837983
MSE: 1.071044372269007
RMSE: 1.034912736547873


In [15]:
lasso_cv_model.alpha_

0.4924531806474871

In [16]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

polynomial_converter = PolynomialFeatures(degree=3, include_bias=False)
polynomial_features = polynomial_converter.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(polynomial_features, y, test_size=0.1, random_state=42)

In [17]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [18]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

ridge_model = Ridge(alpha=0.1)

scores = cross_val_score(ridge_model, scaled_X_train, y_train, 
                         scoring='neg_mean_squared_error', cv=5)

scores

array([-0.33674462, -0.35896679, -0.31072905, -0.11171404, -0.8580868 ])

In [19]:
abs(scores.mean())

0.39524826132766167

In [20]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_validate

ridge_model = Ridge(alpha=0.1)

scores = cross_validate(ridge_model, scaled_X_train, y_train, 
                        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error'], cv=5)

scores = pd.DataFrame(scores)
scores


Unnamed: 0,fit_time,score_time,test_neg_mean_squared_error,test_neg_mean_absolute_error
0,0.001001,0.001036,-0.336745,-0.470286
1,0.001005,0.002302,-0.358967,-0.444996
2,0.003603,0.002287,-0.310729,-0.358024
3,0.001015,0.000999,-0.111714,-0.276194
4,0.001008,0.0,-0.858087,-0.427102
