STACKING

In [1]:
# Import Required Libraries
# vecstack: is Python package for stacking
!pip install vecstack

Collecting vecstack
  Downloading vecstack-0.5.2-py3-none-any.whl.metadata (2.0 kB)
Downloading vecstack-0.5.2-py3-none-any.whl (22 kB)
Installing collected packages: vecstack
Successfully installed vecstack-0.5.2


In [3]:
# Import necessary libraries like pandas, sklearn and vecstack
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from vecstack import stacking
from sklearn.preprocessing import LabelEncoder


# Define the URL of the dataset
link = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'

# Define column names for the dataset
names = ['Class', 'Alcohol', 'Malic acid', 'Ash',
         'Alkalinity of ash', 'Magnesium', 'Total phenols',
         'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
         'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
         'Proline']

# Read the dataset into a DataFrame
df = pd.read_csv(link, header=None, names=names)

# Separate features and target variable
X = df.drop('Class', axis=1)
y = df['Class']


# Initialize LabelEncoder
le = LabelEncoder()

# Transform target variable y_train using LabelEncoder
y = le.fit_transform(y)


In [4]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define base models
models = [KNeighborsClassifier(n_neighbors=5),
          RandomForestClassifier(n_estimators=100, random_state=42),
          XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
]

# Define meta-model
meta_model = LogisticRegression(max_iter=1000)

In [5]:
# Perform stacking
S_train, S_test = stacking(models, X_train, y_train, X_test,
                           regression=False, metric=accuracy_score,
                           n_folds=5, stratified=True, shuffle=True,
                           random_state=42, verbose=2)

# Train the meta-model
meta_model.fit(S_train, y_train)

# Make predictions with the meta-model
y_pred = meta_model.predict(S_test)

# # Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of the stacking ensemble with Logistic Regression as meta-model: {accuracy:.2f}')

task:         [classification]
n_classes:    [3]
metric:       [accuracy_score]
mode:         [oof_pred_bag]
n_models:     [3]

model  0:     [KNeighborsClassifier]
    fold  0:  [0.82758621]
    fold  1:  [0.65517241]
    fold  2:  [0.64285714]
    fold  3:  [0.64285714]
    fold  4:  [0.75000000]
    ----
    MEAN:     [0.70369458] + [0.07382427]
    FULL:     [0.70422535]

model  1:     [RandomForestClassifier]
    fold  0:  [1.00000000]
    fold  1:  [0.96551724]
    fold  2:  [1.00000000]
    fold  3:  [0.92857143]
    fold  4:  [1.00000000]
    ----
    MEAN:     [0.97881773] + [0.02845227]
    FULL:     [0.97887324]

model  2:     [XGBClassifier]


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


    fold  0:  [1.00000000]
    fold  1:  [0.86206897]
    fold  2:  [0.92857143]
    fold  3:  [0.92857143]
    fold  4:  [1.00000000]
    ----
    MEAN:     [0.94384236] + [0.05188574]
    FULL:     [0.94366197]

Accuracy of the stacking ensemble with Logistic Regression as meta-model: 0.97


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
