In [78]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import make_column_selector, ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, StackingClassifier, StackingRegressor, AdaBoostClassifier, AdaBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.metrics import r2_score, accuracy_score, mean_absolute_error, matthews_corrcoef
from sklearn.linear_model import ElasticNet
from xgboost import XGBRegressor

In [13]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
abalone = fetch_ucirepo(id=1) 
  
# data (as pandas dataframes) 
X = abalone.data.features 
y = abalone.data.targets 
y = y['Rings']

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [79]:
# Stacking Regressor
ct = ColumnTransformer(
    [
        ("dummify", OneHotEncoder(sparse_output = False, handle_unknown='ignore', drop="first"), make_column_selector(dtype_include=object)),
        ("standardize", StandardScaler(), make_column_selector(dtype_include=np.number))
    ]
)

estimators = [
    ("DecisionTree", DecisionTreeRegressor()),
    ("KNeighbors", KNeighborsRegressor()),
    ("RandomForest", RandomForestRegressor()),
    ("ADABoost", AdaBoostRegressor()),
    ("XGBoost", XGBRegressor())
]

my_pipeline = Pipeline(
    [
        ("preprocessing", ct),
        ("stacking", StackingRegressor(n_jobs= -1, estimators = estimators))
    ]
)

fitted_pipeline = my_pipeline.fit(X = X_train, y = y_train)

y_pred = fitted_pipeline.predict(X_test)

In [80]:
r2_score(y_true = y_test, y_pred = y_pred)

0.5482137533646217

In [56]:
fitted_pipeline

In [73]:
# Stacking Classifier
ct = ColumnTransformer(
    [
        ("dummify", OneHotEncoder(sparse_output = False, handle_unknown='ignore', drop="first"), make_column_selector(dtype_include=object)),
        ("standardize", StandardScaler(), make_column_selector(dtype_include=np.number))
    ]
)

estimators = [
    ("DecisionTree", DecisionTreeClassifier()),
    ("KNeighbors", KNeighborsClassifier()),
    ("RandomForest", RandomForestClassifier()),
    ("ADABoost", AdaBoostClassifier())
]

my_pipeline = Pipeline(
    [
        ("preprocessing", ct),
        ("stacking", StackingClassifier(n_jobs= -1, estimators = estimators, final_estimator = RandomForestClassifier()))
    ]
)

fitted_pipeline = my_pipeline.fit(X = X_train, y = y_train)

y_pred = fitted_pipeline.predict(X_test)



In [74]:
matthews_corrcoef(y_true = y_test, y_pred = y_pred)

0.1450822738837459

In [72]:
fitted_pipeline