# Stacking Regression


In [1]:
import pandas as pd
from joblib import dump
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
    mean_squared_log_error,
    r2_score,
    root_mean_squared_error,
    root_mean_squared_log_error,
)
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

In [2]:
df = pd.read_csv("../../Datasets/IceCreamData.csv")
df.head()

Unnamed: 0,Temperature,Revenue
0,24.6,534.8
1,26.0,625.19
2,27.8,660.63
3,20.6,487.71
4,11.5,316.24


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  500 non-null    float64
 1   Revenue      500 non-null    float64
dtypes: float64(2)
memory usage: 7.9 KB


In [4]:
df.describe()

Unnamed: 0,Temperature,Revenue
count,500.0,500.0
mean,22.234,521.57066
std,8.094131,175.40473
min,0.0,10.0
25%,17.1,405.5575
50%,22.4,529.37
75%,27.725,642.26
max,45.0,1000.0


In [5]:
df.isnull().sum()

Temperature    0
Revenue        0
dtype: int64

In [6]:
df.columns

Index(['Temperature', 'Revenue'], dtype='object')

In [7]:
df.corr()

Unnamed: 0,Temperature,Revenue
Temperature,1.0,0.989759
Revenue,0.989759,1.0


In [8]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

In [10]:
estimators = [
    ("rfr", RandomForestRegressor()),
    ("svr", SVR()),
]
stacking = StackingRegressor(
    estimators=estimators, final_estimator=Ridge()
)  # By default final_estimator is RidgeCV
stacking.fit(X_train, y_train)

In [11]:
y_pred = stacking.predict(X_test)

In [12]:
print("The coefficient of determination :", r2_score(y_test, y_pred))
adjusted_r2_score = 1 - ((1 - r2_score(y_test, y_pred)) * (len(y_test) - 1)) / (  # type: ignore
    len(y_test) - X_test.shape[1] - 1
)
print("The adjusted coefficient of determination :", adjusted_r2_score)

The coefficient of determination : 0.9761246728683085
The adjusted coefficient of determination : 0.9756272702197316


In [13]:
print("Mean Absolute Error (MAE) :", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error (MSE) :", mean_squared_error(y_test, y_pred))
print(
    "Root Mean Squared Error (RMSE) :",
    root_mean_squared_error(y_test, y_pred),
)
print("Mean Squared Log Error (MSLE) :", mean_squared_log_error(y_test, y_pred))
print(
    "Root Mean Squared Log Error (RMSLE) :",
    root_mean_squared_log_error(y_test, y_pred),
)
print(
    "Mean Absolute Percentage Error (MAPE) :",
    mean_absolute_percentage_error(y_test, y_pred),
)

Mean Absolute Error (MAE) : 19.375684334441196
Mean Squared Error (MSE) : 583.7468777687426
Root Mean Squared Error (RMSE) : 24.16085424335701
Mean Squared Log Error (MSLE) : 0.0023730726102770055
Root Mean Squared Log Error (RMSLE) : 0.04871419310916486
Mean Absolute Percentage Error (MAPE) : 0.039104669000752795


In [14]:
dump(stacking, "../../Models/stackingRegressor.joblib")

['../../Models/stackingRegressor.joblib']