In [23]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import polars as pl
import polars.selectors as cs

In [24]:
df = pl.read_csv('../../DATA/Advertising.csv')

In [25]:
df

TV,radio,newspaper,sales
f64,f64,f64,f64
230.1,37.8,69.2,22.1
44.5,39.3,45.1,10.4
17.2,45.9,69.3,9.3
151.5,41.3,58.5,18.5
180.8,10.8,58.4,12.9
…,…,…,…
38.2,3.7,13.8,7.6
94.2,4.9,8.1,9.7
177.0,9.3,6.4,12.8
283.6,42.0,66.2,25.5


In [26]:
X = df.drop('sales')

In [27]:
y = df['sales']

In [28]:
from sklearn.preprocessing import PolynomialFeatures

In [29]:
polynomial_converter = PolynomialFeatures(degree=3, include_bias=False)

In [30]:
poly_features = polynomial_converter.fit_transform(X)

In [31]:
from sklearn.model_selection import train_test_split

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=101)

In [33]:
from sklearn.preprocessing import StandardScaler

In [34]:
scaler = StandardScaler()

In [35]:
scaler.fit(X_train, y_train)

In [36]:
X_train = scaler.fit_transform(X_train)

In [37]:
X_test = scaler.transform(X_test)

In [38]:
from sklearn.linear_model import Ridge

In [39]:
ridge_model = Ridge(alpha=10)

In [40]:
ridge_model.fit(X_train, y_train)

In [41]:
test_predictions = ridge_model.predict(X_test)

In [72]:
predictions = pl.DataFrame({
    'Sales Test': y_test,
    'Predictions': test_predictions,
    'Residuals': y_test - test_predictions,
    'MAE': np.abs(y_test - test_predictions),
    'MSE': (y_test - test_predictions) ** 2,
    'RMSE': np.sqrt((y_test - test_predictions) ** 2),
    'MAPE': np.abs((y_test - test_predictions) / y_test),
    'MAPE%': np.abs((y_test - test_predictions) / y_test) * 100,
    'MAPE% Mean': np.abs((y_test - test_predictions) / y_test) * 100,
    'MAPE% Max': np.abs((y_test - test_predictions) / y_test) * 100,
    'MAPE% Min': np.abs((y_test - test_predictions) / y_test) * 100,
    'MAPE% Std': np.abs((y_test - test_predictions) / y_test) * 100,
    'MAPE% Skew': np.abs((y_test - test_predictions) / y_test) * 100,
    'MAPE% Kurt': np.abs((y_test - test_predictions) / y_test) * 100,
    'MAPE% Sum': np.abs((y_test - test_predictions) / y_test) * 100,
})

In [51]:
predictions['MAE'].mean()

1.2587692456910877

In [61]:
predictions['MSE'].mean()

2.582186472392257

In [62]:
predictions['RMSE'].mean()

1.2587692456910877

In [68]:
predictions['MAPE'].mean()

12.378173208938344

In [73]:
predictions

Sales Test,Predictions,Residuals,MAE,MSE,RMSE,MAPE,MAPE%,MAPE% Mean,MAPE% Max,MAPE% Min,MAPE% Std,MAPE% Skew,MAPE% Kurt,MAPE% Sum
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
14.7,15.685845,-0.985845,0.985845,0.971891,0.985845,0.067064,6.706432,6.706432,6.706432,6.706432,6.706432,6.706432,6.706432,6.706432
19.8,19.159734,0.640266,0.640266,0.409941,0.640266,0.032337,3.233667,3.233667,3.233667,3.233667,3.233667,3.233667,3.233667,3.233667
11.9,11.670629,0.229371,0.229371,0.052611,0.229371,0.019275,1.927491,1.927491,1.927491,1.927491,1.927491,1.927491,1.927491,1.927491
16.7,16.892858,-0.192858,0.192858,0.037194,0.192858,0.011548,1.15484,1.15484,1.15484,1.15484,1.15484,1.15484,1.15484,1.15484
9.5,9.396865,0.103135,0.103135,0.010637,0.103135,0.010856,1.085627,1.085627,1.085627,1.085627,1.085627,1.085627,1.085627,1.085627
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
3.2,5.996513,-2.796513,2.796513,7.820487,2.796513,0.87391,87.391041,87.391041,87.391041,87.391041,87.391041,87.391041,87.391041,87.391041
25.4,22.587598,2.812402,2.812402,7.909606,2.812402,0.110724,11.072449,11.072449,11.072449,11.072449,11.072449,11.072449,11.072449,11.072449
10.8,12.623552,-1.823552,1.823552,3.32534,1.823552,0.168847,16.884737,16.884737,16.884737,16.884737,16.884737,16.884737,16.884737,16.884737
17.4,18.362699,-0.962699,0.962699,0.926788,0.962699,0.055328,5.53275,5.53275,5.53275,5.53275,5.53275,5.53275,5.53275,5.53275


In [64]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [65]:
MAE = mean_absolute_error(y_test, test_predictions)
MSA = mean_squared_error(y_test, test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))
MAPE = mean_absolute_percentage_error(y_test, test_predictions)

In [66]:
print(f'MAE: {MAE},\n MSA: {MSA},\n RMSE: {RMSE}, \n MAPE: {MAPE}')

MAE: 1.2587692456910873,
 MSA: 2.582186472392257,
 RMSE: 1.6069183154075557, 
 MAPE: 0.12378173208938341


In [74]:
train_predictions = ridge_model.predict(X_train)

In [75]:
MAE = mean_absolute_error(y_train, train_predictions)
MSA = mean_squared_error(y_train, train_predictions)
RMSE = np.sqrt(mean_squared_error(y_train, train_predictions))

In [76]:
print(f'MAE: {MAE},\n MSA: {MSA},\n RMSE: {RMSE}')

MAE: 1.269635017752412,
 MSA: 3.1059138871631857,
 RMSE: 1.7623603170643583


In [77]:
from sklearn.linear_model import RidgeCV

In [78]:
ridge_cv_model = RidgeCV(alphas=(0.1, 1.0, 10.0), scoring='neg_mean_absolute_error')

In [79]:
ridge_cv_model.fit(X_train, y_train)

In [80]:
ridge_cv_model.alpha_

0.1

In [82]:
ridge_cv_model.coef_

array([ 3.76333559,  2.76339947, -0.00593674])

In [84]:
pl.DataFrame({
    'Columns': X.columns,
    'Weights': ridge_cv_model.coef_
})

Columns,Weights
str,f64
"""TV""",3.763336
"""radio""",2.763399
"""newspaper""",-0.005937


In [85]:
test_predictions = ridge_cv_model.predict(X_test)

In [87]:
pl.DataFrame({
    'Sales Test': y_test,
    'Predictions': test_predictions,
    'Residuals': y_test - test_predictions,
    'MAE': np.abs(y_test - test_predictions),
    'MSE': (y_test - test_predictions) ** 2,
    'RMSE': np.sqrt((y_test - test_predictions) ** 2),
})

Sales Test,Predictions,Residuals,MAE,MSE,RMSE
f64,f64,f64,f64,f64,f64
14.7,15.740723,-1.040723,1.040723,1.083104,1.040723
19.8,19.6057,0.1943,0.1943,0.037753,0.1943
11.9,11.451302,0.448698,0.448698,0.20133,0.448698
16.7,17.006987,-0.306987,0.306987,0.094241,0.306987
9.5,9.175161,0.324839,0.324839,0.105521,0.324839
…,…,…,…,…,…
3.2,5.513869,-2.313869,2.313869,5.35399,2.313869
25.4,23.286948,2.113052,2.113052,4.464989,2.113052
10.8,12.623977,-1.823977,1.823977,3.32689,1.823977
17.4,18.769493,-1.369493,1.369493,1.875511,1.369493


In [88]:
MAE = mean_absolute_error(y_test, test_predictions)
MSA = mean_squared_error(y_test, test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))

In [89]:
print(f'MAE: {MAE},\n MSA: {MSA},\n RMSE: {RMSE}')

MAE: 1.2140617628442483,
 MSA: 2.3006527003549277,
 RMSE: 1.5167902624802572


In [90]:
from sklearn.linear_model import LassoCV

In [91]:
lasso_cv_model = LassoCV(eps=.1, n_jobs=-1, cv=5, n_alphas=100)

In [92]:
lasso_cv_model.fit(X_train, y_train)

In [93]:
lasso_cv_model.alpha_

0.39636306046337183

In [94]:
lasso_cv_model.coef_

array([3.39549312, 2.39355522, 0.        ])

In [95]:
pl.DataFrame({
    'Columns': X.columns,
    'Weights': lasso_cv_model.coef_
})

Columns,Weights
str,f64
"""TV""",3.395493
"""radio""",2.393555
"""newspaper""",0.0


In [96]:
test_predictions = lasso_cv_model.predict(X_test)

In [103]:
predictions = pl.DataFrame({
    'Sales Test': y_test,
    'Predictions': test_predictions,
    'Residuals': y_test - test_predictions,
    'MAE': np.abs(y_test - test_predictions),
    'MSE': (y_test - test_predictions) ** 2,
    'RMSE': np.sqrt((y_test - test_predictions) ** 2),
    'RMSE %': np.sqrt((y_test - test_predictions) ** 2) / y_test * 100,
})

In [105]:
predictions['RMSE %'].mean()

12.603728954561761

In [106]:
MAE = mean_absolute_error(y_test, test_predictions)
MSA = mean_squared_error(y_test, test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))

In [107]:
print(f'MAE: {MAE},\n MSA: {MSA},\n RMSE: {RMSE}')

MAE: 1.2873601365766183,
 MSA: 2.871612584474154,
 RMSE: 1.6945833070327803


In [108]:
from sklearn.linear_model import ElasticNetCV

In [109]:
elastic_model = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], eps=.1, n_alphas=100, cv=5)

In [110]:
elastic_model.fit(X_train, y_train)

In [111]:
elastic_model.alpha_

0.39636306046337183

In [112]:
elastic_model.l1_ratio_

1.0

In [113]:
elastic_model.coef_

array([3.39549312, 2.39355522, 0.        ])

In [114]:
pl.DataFrame({
    'Columns': X.columns,
    'Weights': elastic_model.coef_
})

Columns,Weights
str,f64
"""TV""",3.395493
"""radio""",2.393555
"""newspaper""",0.0


In [115]:
test_predictions = elastic_model.predict(X_test)

In [116]:
predictions = pl.DataFrame({
    'Sales Test': y_test,
    'Predictions': test_predictions,
    'Residuals': y_test - test_predictions,
    'MAE': np.abs(y_test - test_predictions),
    'MSE': (y_test - test_predictions) ** 2,
    'RMSE': np.sqrt((y_test - test_predictions) ** 2),
    'RMSE %': np.sqrt((y_test - test_predictions) ** 2) / y_test * 100,
})

In [117]:
predictions

Sales Test,Predictions,Residuals,MAE,MSE,RMSE,RMSE %
f64,f64,f64,f64,f64,f64,f64
14.7,15.42887,-0.72887,0.72887,0.531251,0.72887,4.958297
19.8,19.058217,0.741783,0.741783,0.550242,0.741783,3.746378
11.9,11.773407,0.126593,0.126593,0.016026,0.126593,1.063807
16.7,16.583614,0.116386,0.116386,0.013546,0.116386,0.696921
9.5,9.661931,-0.161931,0.161931,0.026222,0.161931,1.704537
…,…,…,…,…,…,…
3.2,6.447325,-3.247325,3.247325,10.545117,3.247325,101.478894
25.4,22.265198,3.134802,3.134802,9.826983,3.134802,12.34174
10.8,12.66293,-1.86293,1.86293,3.470509,1.86293,17.249353
17.4,18.34912,-0.94912,0.94912,0.900828,0.94912,5.45471


In [118]:
predictions['RMSE %'].mean()

12.603728954561761

In [120]:
MAE = mean_absolute_error(y_test, test_predictions)
MSA = mean_squared_error(y_test, test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))

In [121]:
print(f'MAE: {MAE},\n MSA: {MSA},\n RMSE: {RMSE}')

MAE: 1.2873601365766183,
 MSA: 2.871612584474154,
 RMSE: 1.6945833070327803
