In [1]:
import pandas as pd

import sklearn.metrics as mt
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet

In [2]:
def model_r2_rmse(name, model):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    r2 = mt.r2_score(y_test, predictions)
    rmse = mt.root_mean_squared_error(y_test, predictions)
    return pd.Series([name, r2, rmse])

<p style="background-image: linear-gradient(to right, #0D9276, #41b675)"> &nbsp;</p>

In [3]:
data = pd.read_csv('data/02_advertising.csv')
data.tail(3)

Unnamed: 0,TV,Radio,Newspaper,Sales
197,177.0,9.3,6.4,14.8
198,283.6,42.0,66.2,25.5
199,232.1,8.6,8.7,18.4


In [4]:
y = data['Sales']
X = data.drop(columns='Sales')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

<p style="background-image: linear-gradient(#0D9276, #FFFFFF 10%)"> &nbsp; </p>

In [5]:
models = {
    'Linear': LinearRegression(), 
    'Ridge': Ridge(), 
    'Lasso': Lasso(), 
    'ElasticNet': ElasticNet(),
    'SVR': SVR(),
    'DecisionTree': DecisionTreeRegressor(random_state=0), 
    'Bagging': BaggingRegressor(random_state=0), 
    'Random Forest': RandomForestRegressor(random_state=0)
}

results = pd.DataFrame()
for name, model in models.items():
    row = model_r2_rmse(name, model)
    results = pd.concat([results, row], axis=1)

results = results.T
results.columns = ['Model', 'R2', 'RMSE']
results.set_index('Model', inplace=True)
results

Unnamed: 0_level_0,R2,RMSE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
Linear,0.864505,2.126629
Ridge,0.864506,2.126623
Lasso,0.866266,2.112764
ElasticNet,0.865541,2.118488
SVR,0.833844,2.35499
DecisionTree,0.877292,2.023796
Bagging,0.95092,1.279925
Random Forest,0.944961,1.355394
