# Exercise 10: Test the StackingClassifier model
 
## SIB - Intelligent Systems for Bioinformatics

BÃ¡rbara Freitas PG55693

In [2]:
import numpy as np
from typing import List
from si.io.csv_file import read_csv 
from si.model_selection.split import stratified_train_test_split 
from si.ensemble.stacking_classifier import StackingClassifier
from si.metrics.accuracy import accuracy
from si.base.model import Model


from si.models.knn_classifier import KNNClassifier 
from si.models.logistic_regression import LogisticRegression 
from si.models.decision_tree_classifier import DecisionTreeClassifier 


# --- Use the breast-bin dataset ---
path_to_breast = '../datasets/breast_bin/breast-bin.csv' 
breast_dataset = read_csv(path_to_breast, sep=',', label=True)

print("--- StackingClassifier Test Protocol ---")
print("Shape:", breast_dataset.shape())
print("-" * 40)

# --- Split the data into train and test sets ---
TEST_SIZE = 0.3
RANDOM_STATE = 42

train_set, test_set = stratified_train_test_split(
    breast_dataset, 
    test_size=TEST_SIZE, 
    random_state=RANDOM_STATE
)

print(f"2. Split completed. Train: {train_set.shape()[0]} samples | Test: {test_set.shape()[0]} samples")
print("-" * 40)

# --- Create Base and Final Models ---
base_models: List[Model] = [
    KNNClassifier(k=5),                           # 3. KNNClassifier model
    LogisticRegression(l2_penalty=0.1),           # 4. LogisticRegression model
    DecisionTreeClassifier(max_depth=5)           # 5. DecisionTree model
]

final_model: Model = KNNClassifier(k=3)           # 6. Second KNNClassifier model (Final)

# --- Create a StackingClassifier model ---
stacking_model = StackingClassifier(
    models=base_models,
    final_model=final_model
)

print(f"7. Stacking model initialized with {len(base_models)} base models and KNN(3) as final.")
print("-" * 40)

# --- Train the StackingClassifier model ---
stacking_model.fit(train_set)

print("8. Stacking Model Training Complete.")
print("   (All base models and the final meta-model are now fitted.)")
print("-" * 40)

# --- Get the score of the model on the test set ---
test_accuracy = stacking_model.score(test_set)

print(f"Final Result:")
print(f"Accuracy on Test Set (StackingClassifier): {test_accuracy:.4f}")

--- StackingClassifier Test Protocol ---
Shape: (698, 9)
----------------------------------------
2. Split completed. Train: 488 samples | Test: 210 samples
----------------------------------------
7. Stacking model initialized with 3 base models and KNN(3) as final.
----------------------------------------
8. Stacking Model Training Complete.
   (All base models and the final meta-model are now fitted.)
----------------------------------------
Final Result:
Accuracy on Test Set (StackingClassifier): 0.9286
