In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv("../../DATA/Advertising.csv")

In [4]:
df

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [5]:
X = df.drop('sales', axis=1)

In [6]:
y = df['sales']

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
scaler = StandardScaler()

In [13]:
scaler.fit(X_train)

In [14]:
X_train = scaler.fit_transform(X_train)

In [15]:
X_test = scaler.transform(X_test)

In [16]:
from sklearn.linear_model import Ridge

In [17]:
model = Ridge(alpha=100)

In [18]:
model.fit(X_train, y_train)

In [19]:
y_pred = model.predict(X_test)

In [20]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [21]:
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)

In [22]:
print(f'MAE: {MAE}, MSE: {MSE}, RMSE: {RMSE}')

MAE: 2.1631741364394363, MSE: 7.341775789034126, RMSE: 2.7095711448556075


In [23]:
model = Ridge(alpha=1)

In [24]:
model.fit(X_train, y_train)

In [25]:
y_pred = model.predict(X_test)

In [26]:
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)

In [27]:
print(f'MAE: {MAE}, MSE: {MSE}, RMSE: {RMSE}')

MAE: 1.2168768443580575, MSE: 2.3190215794287514, RMSE: 1.5228334050147283


In [28]:
from sklearn.model_selection import train_test_split

X_train, X_OTHER, y_train, y_OTHER = train_test_split(X, y, test_size=0.3, random_state=101)
X_eval, X_test, y_eval, y_test = train_test_split(X_OTHER, y_OTHER, test_size=0.5, random_state=101)

In [29]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_eval = scaler.transform(X_eval)
X_test = scaler.transform(X_test)

In [31]:
from sklearn.linear_model import Ridge

In [43]:
model = Ridge(alpha=1)

In [44]:
model.fit(X_train, y_train)

In [45]:
y_final_test_pred = model.predict(X_test)

In [46]:
MAE = mean_absolute_error(y_test, y_final_test_pred)
MSE = mean_squared_error(y_test, y_final_test_pred)
RMSE = np.sqrt(MSE)

In [47]:
print(f'MAE: {MAE}, MSE: {MSE}, RMSE: {RMSE}')

MAE: 1.238610264692412, MSE: 2.254260083800517, RMSE: 1.5014193564093001


In [48]:
from sklearn.model_selection import cross_val_score

In [49]:
X = df.drop('sales', axis=1)

In [50]:
y = df['sales']

In [51]:
from sklearn.model_selection import train_test_split

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [53]:
from sklearn.preprocessing import StandardScaler

In [54]:
scaler = StandardScaler()

In [55]:
scaler.fit(X_train)

In [56]:
X_train = scaler.fit_transform(X_train)

In [57]:
X_test = scaler.transform(X_test)

In [58]:
model = Ridge(alpha=100)

In [59]:
from sklearn.model_selection import cross_val_score

In [61]:
scores = cross_val_score(
    model,
    X_train, y_train,
    n_jobs=-1,
    scoring='neg_mean_squared_error',
    cv=5,
)

In [62]:
scores

array([ -7.20530399,  -6.89358457,  -8.55331817,  -5.60050497,
       -13.19842747])

In [63]:
abs(scores.mean())

8.290227832496518

In [64]:
model = Ridge(alpha=1)

In [65]:
scores = cross_val_score(
    model, X_train, y_train,
    n_jobs=-1, 
    scoring = 'neg_mean_squared_error',
    cv=5,
)

In [66]:
scores

array([-2.44875574, -2.48797343, -1.8124339 , -2.64472743, -5.92159202])

In [67]:
abs(scores.mean())

3.063096503033666

In [68]:
model.fit(X_train, y_train)

In [69]:
y_final_test_pred = model.predict(X_test)

In [70]:
MAE = mean_absolute_error(y_test, y_final_test_pred)
MSE = mean_squared_error(y_test, y_final_test_pred)
RMSE = np.sqrt(MSE)

In [71]:
print(f'MAE: {MAE}, MSE: {MSE}, RMSE: {RMSE}')

MAE: 1.2303187160667517, MSE: 2.697329571005994, RMSE: 1.6423548858288801


# Cross Validation with cross validate

In [72]:
X = df.drop('sales', axis=1)

In [73]:
y = df['sales']

In [74]:
from sklearn.model_selection import train_test_split

In [75]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [76]:
from sklearn.preprocessing import StandardScaler

In [77]:
scaler = StandardScaler()

In [79]:
from sklearn.linear_model import Ridge

In [80]:
model = Ridge(alpha=100)

In [88]:
from sklearn.model_selection import cross_val_score, cross_validate

In [96]:
scores = cross_validate(
    model, X_train, y_train,
    n_jobs=-1,
    scoring=['neg_mean_absolute_error', 'neg_mean_squared_error', 'max_error'],
    cv=5,
    return_train_score=True,
)

In [98]:
pd.DataFrame(scores)

Unnamed: 0,fit_time,score_time,test_neg_mean_absolute_error,train_neg_mean_absolute_error,test_neg_mean_squared_error,train_neg_mean_squared_error,test_max_error,train_max_error
0,0.006735,0.000931,-1.549049,-1.231634,-3.157441,-3.015602,-3.033079,-8.55944
1,0.006164,0.000999,-1.030048,-1.340412,-1.611905,-3.381343,-2.890241,-8.965684
2,0.007632,0.001002,-1.391145,-1.207895,-5.375887,-2.466132,-9.323436,-5.348275
3,0.007517,0.001158,-1.16827,-1.30173,-2.239846,-3.211144,-4.002848,-8.841384
4,0.00651,0.002503,-1.465326,-1.171855,-4.326403,-2.791283,-6.436729,-9.333214


In [100]:
pd.DataFrame(scores).mean()

fit_time                         0.006911
score_time                       0.001319
test_neg_mean_absolute_error    -1.320768
train_neg_mean_absolute_error   -1.250705
test_neg_mean_squared_error     -3.342296
train_neg_mean_squared_error    -2.973101
test_max_error                  -5.137267
train_max_error                 -8.209599
dtype: float64

In [101]:
model = Ridge(alpha=1)

In [102]:
scores = cross_validate(
    model, X_train, y_train,
    n_jobs=-1,
    scoring=['neg_mean_absolute_error','neg_mean_squared_error','max_error'],
    cv=5,
)

In [103]:
pd.DataFrame(scores)

Unnamed: 0,fit_time,score_time,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_max_error
0,0.013072,0.002512,-1.545354,-3.139509,-3.0345
1,0.013072,0.002512,-1.030709,-1.622356,-2.910301
2,0.013072,0.002512,-1.387887,-5.373856,-9.343527
3,0.013072,0.001506,-1.169156,-2.242221,-4.011364
4,0.013072,0.002512,-1.467583,-4.341516,-6.45374


In [105]:
pd.DataFrame(scores).mean()

fit_time                        0.013072
score_time                      0.002311
test_neg_mean_absolute_error   -1.320138
test_neg_mean_squared_error    -3.343892
test_max_error                 -5.150686
dtype: float64

In [104]:
model.fit(X_train, y_train)

In [106]:
y_final_tst_pred = model.predict(X_test)

In [107]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [108]:
MAE = mean_absolute_error(y_test, y_final_tst_pred)
MSE = mean_squared_error(y_test, y_final_tst_pred)
RMSE = np.sqrt(MSE)

In [109]:
print(f'MAE: {MAE}, MSE: {MSE}, RMSE: {RMSE}')

MAE: 1.2137460281856998, MSE: 2.298727428933738, RMSE: 1.5161554765042198
