In [None]:
from __future__ import annotations
import collections
import itertools
import math
import random
import numpy as np

from river import base
from river.drift import ADWIN
from river.metrics import Accuracy
from river.tree import HoeffdingTreeClassifier
from river.utils.random import poisson

# Utility to generate a random feature subspace.
def random_subspace(all_features: list, k: int, rng: random.Random):
    return rng.sample(all_features, k=k)

###############################################################################
# Base Ensemble and Estimator Classes
###############################################################################

class BaseSRPEnsemble(base.Wrapper, base.Ensemble):
    _TRAIN_RANDOM_SUBSPACES = "subspaces"
    _TRAIN_RESAMPLING = "resampling"
    _TRAIN_RANDOM_PATCHES = "patches"

    _FEATURES_SQRT = "sqrt"
    _FEATURES_SQRT_INV = "rmsqrt"

    def __init__(
        self,
        model: base.Estimator | None = None,
        n_models: int = 10,
        subspace_size: int | float | str = 0.6,
        training_method: str = "patches",
        lam: float = 6.0,
        drift_detector: base.DriftDetector | None = None,
        warning_detector: base.DriftDetector | None = None,
        disable_weighted_vote: bool = False,
        disable_detector: str = "off",
        seed: int | None = None,
        metric: Accuracy | None = None,
    ):
        super().__init__([])  # start with an empty ensemble
        self.model = model
        self.n_models = n_models
        self.subspace_size = subspace_size
        self.training_method = training_method
        self.lam = lam
        self.drift_detector = drift_detector if drift_detector is not None else ADWIN(delta=1e-5)
        self.warning_detector = warning_detector if warning_detector is not None else ADWIN(delta=1e-4)
        self.disable_weighted_vote = disable_weighted_vote
        self.disable_detector = disable_detector
        self.metric = metric if metric is not None else Accuracy()
        self.seed = seed
        self._rng = random.Random(self.seed)
        self._n_samples_seen = 0
        self._subspaces: list = []
        # This attribute must be set by the subclass.
        self._base_learner_class = None

    def _init_ensemble(self, features: list):
        self._generate_subspaces(features)
        subspace_indexes = list(range(self.n_models))
        if self.training_method in {self._TRAIN_RANDOM_PATCHES, self._TRAIN_RANDOM_SUBSPACES}:
            self._rng.shuffle(subspace_indexes)
        for i in range(self.n_models):
            subspace = self._subspaces[subspace_indexes[i]]
            self.append(
                self._base_learner_class(
                    idx_original=i,
                    model=self.model,
                    metric=self.metric,
                    created_on=self._n_samples_seen,
                    drift_detector=self.drift_detector,
                    warning_detector=self.warning_detector,
                    is_background_learner=False,
                    rng=self._rng,
                    features=subspace,
                )
            )

    def _generate_subspaces(self, features: list):
        n_features = len(features)
        # If we're not doing resampling, we try to compute a subspace
        if self.training_method != self._TRAIN_RESAMPLING:
            if isinstance(self.subspace_size, float) and 0.0 < self.subspace_size <= 1:
                k = self.subspace_size
                percent = (1.0 + k)
                k = round(n_features * percent)
                if k < 2:
                    k = round(n_features * percent) + 1
            elif isinstance(self.subspace_size, int) and self.subspace_size > 2:
                k = self.subspace_size
            elif self.subspace_size == self._FEATURES_SQRT:
                k = round(math.sqrt(n_features)) + 1
            elif self.subspace_size == self._FEATURES_SQRT_INV:
                k = n_features - round(math.sqrt(n_features)) + 1
            else:
                raise ValueError(f"Invalid subspace_size: {self.subspace_size}.")
            if k < 0:
                k = n_features + k
            if k != 0 and k < n_features:
                if n_features <= 20 or k < 2:
                    if k == 1 and n_features > 2:
                        k = 2
                    self._subspaces = []
                    for i, combination in enumerate(
                        itertools.cycle(itertools.combinations(features, k))
                    ):
                        if i == self.n_models:
                            break
                        self._subspaces.append(list(combination))
                else:
                    self._subspaces = [
                        random_subspace(all_features=features, k=k, rng=self._rng)
                        for _ in range(self.n_models)
                    ]
            else:
                # When k is not less than the number of features, switch to resampling
                self.training_method = self._TRAIN_RESAMPLING
                self._subspaces = [None] * self.n_models
        else:
            self._subspaces = [None] * self.n_models


    def reset(self):
        self.data = []
        self._n_samples_seen = 0
        self._rng = random.Random(self.seed)

class BaseSRPEstimator:
    def __init__(
        self,
        idx_original: int,
        model: base.Estimator,
        metric: Accuracy,
        created_on: int,
        drift_detector: base.DriftDetector,
        warning_detector: base.DriftDetector,
        is_background_learner,
        rng: random.Random,
        features=None,
    ):
        self.idx_original = idx_original
        self.created_on = created_on
        self.model = model.clone()
        self.metric = metric.clone()
        self.features = features
        if drift_detector is not None:
            self.disable_drift_detector = False
            self.drift_detector = drift_detector.clone()
        else:
            self.disable_drift_detector = True
            self.drift_detector = None
        if warning_detector is not None:
            self.disable_background_learner = False
            self.warning_detector = warning_detector.clone()
        else:
            self.disable_background_learner = True
            self.warning_detector = None
        self.is_background_learner = is_background_learner
        self.n_drifts_detected = 0
        self.n_warnings_detected = 0
        self.rng = rng
        self._background_learner = None

###############################################################################
# Base Learner for Classification with Weighting
###############################################################################

class BaseSRPClassifier(BaseSRPEstimator):
    def __init__(
        self,
        idx_original: int,
        model: base.Classifier,
        metric: Accuracy,
        created_on: int,
        drift_detector: base.DriftDetector,
        warning_detector: base.DriftDetector,
        is_background_learner,
        rng: random.Random,
        features=None,
    ):
        super().__init__(
            idx_original, model, metric, created_on, drift_detector, warning_detector, is_background_learner, rng, features
        )
        self.weight = 1.0  # initialize the base learner's weight

    def reset(self, all_features: list, n_samples_seen: int):
        # Optionally, update the feature subspace using a new random sample
        if self.features is not None:
            self.features = random_subspace(all_features, len(self.features), self.rng)
        # Reset the underlying model and metrics
        self.model = self.model.clone()
        self.metric = self.metric.clone()
        self.created_on = n_samples_seen
        if self.drift_detector is not None:
            self.drift_detector = self.drift_detector.clone()
        if self.warning_detector is not None:
            self.warning_detector = self.warning_detector.clone()
        self.weight = 1.0

    def learn_one(self, x: dict, y: base.typing.ClfTarget, *, w: int, n_samples_seen: int, **kwargs):
        if self.features is not None:
            x_subset = {k: x[k] for k in self.features if k in x}
        else:
            x_subset = x
        for _ in range(int(w)):
            self.model.learn_one(x=x_subset, y=y, **kwargs)
        if self._background_learner:
            self._background_learner.learn_one(x=x, y=y, w=w, n_samples_seen=n_samples_seen)
        if not self.disable_drift_detector and not self.is_background_learner:
            y_pred = self.model.predict_one(x_subset)
            correctly_classifies = (y_pred == y)
            # Update the learner's weight: decrease if correct, increase if wrong
            if correctly_classifies:
                self.weight *= 0.9
            else:
                self.weight *= 1.1
            self.warning_detector.update(int(not correctly_classifies))
            if self.warning_detector.drift_detected:
                all_features = list(x.keys())
                self.n_warnings_detected += 1
                # (Optional: trigger background learning here)
            self.drift_detector.update(int(not correctly_classifies))
            if self.drift_detector.drift_detected:
                all_features = list(x.keys())
                self.n_drifts_detected += 1
                # Call the newly added reset method
                self.reset(all_features=all_features, n_samples_seen=n_samples_seen)

    def predict_proba_one(self, x, **kwargs):
        if self.features is not None:
            x_subset = {k: x[k] for k in self.features if k in x}
        else:
            x_subset = x
        return self.model.predict_proba_one(x_subset, **kwargs)

    def predict_one(self, x: dict, **kwargs) -> base.typing.ClfTarget:
        y_pred = self.predict_proba_one(x, **kwargs)
        if y_pred:
            return max(y_pred, key=y_pred.get)
        return None


###############################################################################
# Standard SRPClassifier (for reference)
###############################################################################

class SRPClassifier(BaseSRPEnsemble, base.Classifier):
    def __init__(
        self,
        model: base.Estimator | None = None,
        n_models: int = 10,
        subspace_size: int | float | str = 0.6,
        training_method: str = "patches",
        lam: int = 6,
        drift_detector: base.DriftDetector | None = None,
        warning_detector: base.DriftDetector | None = None,
        disable_detector: str = "off",
        disable_weighted_vote: bool = False,
        seed: int | None = None,
        metric: Accuracy | None = None,
    ):
        if model is None:
            model = HoeffdingTreeClassifier(grace_period=50, delta=0.01)
        if drift_detector is None:
            drift_detector = ADWIN(delta=1e-5)
        if warning_detector is None:
            warning_detector = ADWIN(delta=1e-4)
        if metric is None:
            metric = Accuracy()
        super().__init__(
            model=model,
            n_models=n_models,
            subspace_size=subspace_size,
            training_method=training_method,
            lam=lam,
            drift_detector=drift_detector,
            warning_detector=warning_detector,
            disable_detector=disable_detector,
            disable_weighted_vote=disable_weighted_vote,
            seed=seed,
            metric=metric,
        )
        self._base_learner_class = BaseSRPClassifier

    def predict_proba_one(self, x, **kwargs):
        y_pred = collections.Counter()
        if not self:
            self._init_ensemble(features=list(x.keys()))
            return y_pred
        for model in self:
            y_proba_temp = model.predict_proba_one(x, **kwargs)
            if not self.disable_weighted_vote:
                # Use the learner's weight for vote weighting
                weight = model.weight
                y_proba_temp = {k: val * weight for k, val in y_proba_temp.items()}
            y_pred.update(y_proba_temp)
        total = sum(y_pred.values())
        if total > 0:
            return {label: proba / total for label, proba in y_pred.items()}
        return y_pred

    def predict_one(self, x, **kwargs):
        y_pred = self.predict_proba_one(x, **kwargs)
        if y_pred:
            return max(y_pred, key=y_pred.get)
        return None

###############################################################################
# Dynamic SRPClassifier: Adaptive Ensemble Size
###############################################################################

class DynamicSRPClassifier(SRPClassifier):
    @property
    def _min_number_of_models(self):
        return 0

    @property
    def _wrapped_model(self):
        return self.model

    def __init__(
        self,
        model: base.Estimator | None = None,
        n_models: int = 10,
        max_models: int = 100,
        subspace_size: int | float | str = 0.6,
        training_method: str = "patches",
        lam: int = 6,
        drift_detector: base.DriftDetector | None = None,
        warning_detector: base.DriftDetector | None = None,
        disable_detector: str = "off",
        disable_weighted_vote: bool = False,
        seed: int | None = None,
        metric: Accuracy | None = None,
        window_size: int = 100,
        performance_threshold: float = 0.8,
    ):
        super().__init__(
            model=model,
            n_models=n_models,
            subspace_size=subspace_size,
            training_method=training_method,
            lam=lam,
            drift_detector=drift_detector,
            warning_detector=warning_detector,
            disable_detector=disable_detector,
            disable_weighted_vote=disable_weighted_vote,
            seed=seed,
            metric=metric,
        )
        self._max_models = max_models
        self._window_size = window_size
        self._performance_threshold = performance_threshold
        self._sliding_window = collections.deque(maxlen=window_size)

    def learn_one(self, x: dict, y: base.typing.ClfTarget, **kwargs):
        self._n_samples_seen += 1
        if not self:
            self._init_ensemble(features=list(x.keys()))
        for model in self:
            y_pred = model.predict_one(x)
            if y_pred is not None:
                model.metric.update(y_true=y, y_pred=y_pred)
            if self.training_method == self._TRAIN_RANDOM_SUBSPACES:
                k = 1
            else:
                k = poisson(rate=self.lam, rng=self._rng)
                if k == 0:
                    continue
            model.learn_one(x=x, y=y, w=k, n_samples_seen=self._n_samples_seen, **kwargs)
        ensemble_pred = self.predict_one(x)
        correct = 1 if ensemble_pred == y else 0
        self._sliding_window.append(correct)
        if len(self._sliding_window) == self._window_size:
            window_accuracy = sum(self._sliding_window) / self._window_size
            if window_accuracy < self._performance_threshold:
                features = list(x.keys())
                if len(self) < self._max_models:
                    self._add_learner(features)
                else:
                    self._remove_worst_learner()
                    self._add_learner(features)
                self._sliding_window.clear()

    def _generate_subspace_single(self, features: list):
        n_features = len(features)
        if self.training_method != self._TRAIN_RESAMPLING:
            if isinstance(self.subspace_size, float) and 0.0 < self.subspace_size <= 1:
                k = self.subspace_size
                percent = (1.0 + k)
                k = round(n_features * percent)
                if k < 2:
                    k = round(n_features * percent) + 1
            elif isinstance(self.subspace_size, int) and self.subspace_size > 2:
                k = self.subspace_size
            elif self.subspace_size == self._FEATURES_SQRT:
                k = round(math.sqrt(n_features)) + 1
            elif self.subspace_size == self._FEATURES_SQRT_INV:
                k = n_features - round(math.sqrt(n_features)) + 1
            else:
                raise ValueError(f"Invalid subspace_size: {self.subspace_size}.")
            if k < 0:
                k = n_features + k
            if k != 0 and k < n_features:
                return random_subspace(all_features=features, k=k, rng=self._rng)
            else:
                self.training_method = self._TRAIN_RESAMPLING
                return None
        else:
            return None

    def _add_learner(self, features: list):
        subspace = self._generate_subspace_single(features)
        new_learner = self._base_learner_class(
            idx_original=len(self),
            model=self.model,
            metric=self.metric.clone(),
            created_on=self._n_samples_seen,
            drift_detector=self.drift_detector,
            warning_detector=self.warning_detector,
            is_background_learner=False,
            rng=self._rng,
            features=subspace,
        )
        new_learner.weight = 1.0
        self.append(new_learner)

    def _remove_worst_learner(self):
        worst_index = None
        worst_weight = float('inf')
        for i, learner in enumerate(self):
            if learner.weight < worst_weight:
                worst_weight = learner.weight
                worst_index = i
        if worst_index is not None:
            del self[worst_index]


In [None]:
from river.datasets import synth
dataset = synth.Agrawal( classification_function=0,seed=42).take(10000)


In [None]:
from river import ensemble, evaluate, metrics, datasets, tree

# Create a synthetic concept drift stream
dataset = datasets.synth.ConceptDriftStream(
    seed=42,
    position=500,
    width=50
).take(1000)

# Define a base model (Hoeffding Tree)
base_model = tree.HoeffdingTreeClassifier(
    grace_period=50, delta=0.01,
    nominal_attributes=['age', 'car', 'zipcode']
)


# Reinitialize the stream for a fair comparison
dataset = datasets.synth.ConceptDriftStream(
    seed=42,
    position=500,
    width=50
).take(1000)

# --- Dynamic SRPClassifier ---
dynamic_model = DynamicSRPClassifier(
    model=base_model,
    n_models=10,        # starting with 10 learners
    max_models=100,     # ensemble will grow up to 100 learners
    seed=42,
    window_size=100,           # sliding window size for performance monitoring
    performance_threshold=0.8  # if accuracy over window falls below 80%, add a learner
)

result_dynamic = evaluate.progressive_val_score(dataset, dynamic_model, metric)
print("Dynamic SRPClassifier Accuracy:", result_dynamic)


Dynamic SRPClassifier Accuracy: Accuracy: 74.44%


In [None]:
from river import ensemble, evaluate, metrics, datasets, tree

# Create a synthetic concept drift stream
dataset = datasets.synth.ConceptDriftStream(
    seed=42,
    position=500,
    width=50
).take(10000)

# Define a base model (Hoeffding Tree)
base_model = tree.HoeffdingTreeClassifier(
    grace_period=50, delta=0.01,
    nominal_attributes=['age', 'car', 'zipcode']
)

# --- Original SRPClassifier ---
original_model = ensemble.SRPClassifier(
    model=base_model,
    n_models=10,  # e.g., 3 base learners
    seed=42,
)

metric = metrics.Accuracy()

result_original = evaluate.progressive_val_score(dataset, original_model, metric)
print("Original SRPClassifier Accuracy:", result_original)

# Reinitialize the stream for a fair comparison
dataset = datasets.synth.ConceptDriftStream(
    seed=42,
    position=500,
    width=50
).take(10000)

# --- Dynamic SRPClassifier ---
dynamic_model = DynamicSRPClassifier(
    model=base_model,
    n_models=10,        # starting with 10 learners
    max_models=100,     # ensemble will grow up to 100 learners
    seed=42,
    window_size=100,           # sliding window size for performance monitoring
    performance_threshold=0.8  # if accuracy over window falls below 80%, add a learner
)

result_dynamic = evaluate.progressive_val_score(dataset, dynamic_model, metric)
print("Dynamic SRPClassifier Accuracy:", result_dynamic)


Original SRPClassifier Accuracy: Accuracy: 68.68%
Dynamic SRPClassifier Accuracy: Accuracy: 60.80%


# Here look kind gentelman

In [24]:
from __future__ import annotations
import collections
import itertools
import math
import random
import numpy as np

from river import base
from river.drift import ADWIN
from river.metrics import Accuracy
from river.tree import HoeffdingTreeClassifier
from river.utils.random import poisson

# Utility to generate a random feature subspace.
def random_subspace(all_features: list, k: int, rng: random.Random):
    return rng.sample(all_features, k=k)

###############################################################################
# Base Ensemble and Estimator Classes (unchanged)
###############################################################################

class BaseSRPEnsemble(base.Wrapper, base.Ensemble):
    _TRAIN_RANDOM_SUBSPACES = "subspaces"
    _TRAIN_RESAMPLING = "resampling"
    _TRAIN_RANDOM_PATCHES = "patches"

    _FEATURES_SQRT = "sqrt"
    _FEATURES_SQRT_INV = "rmsqrt"

    def __init__(
        self,
        model: base.Estimator | None = None,
        n_models: int = 10,
        subspace_size: int | float | str = 0.6,
        training_method: str = "patches",
        lam: float = 6.0,
        drift_detector: base.DriftDetector | None = None,
        warning_detector: base.DriftDetector | None = None,
        disable_weighted_vote: bool = False,
        disable_detector: str = "off",
        seed: int | None = None,
        metric: Accuracy | None = None,
    ):
        super().__init__([])  # start with an empty ensemble
        self.model = model
        self.n_models = n_models
        self.subspace_size = subspace_size
        self.training_method = training_method
        self.lam = lam
        self.drift_detector = drift_detector if drift_detector is not None else ADWIN(delta=1e-5)
        self.warning_detector = warning_detector if warning_detector is not None else ADWIN(delta=1e-4)
        self.disable_weighted_vote = disable_weighted_vote
        self.disable_detector = disable_detector
        self.metric = metric if metric is not None else Accuracy()
        self.seed = seed
        self._rng = random.Random(self.seed)
        self._n_samples_seen = 0
        self._subspaces: list = []
        # This attribute must be set by the subclass.
        self._base_learner_class = None

    def _init_ensemble(self, features: list):
        self._generate_subspaces(features)
        subspace_indexes = list(range(self.n_models))
        if self.training_method in {self._TRAIN_RANDOM_PATCHES, self._TRAIN_RANDOM_SUBSPACES}:
            self._rng.shuffle(subspace_indexes)
        for i in range(self.n_models):
            subspace = self._subspaces[subspace_indexes[i]]
            self.append(
                self._base_learner_class(
                    idx_original=i,
                    model=self.model,
                    metric=self.metric,
                    created_on=self._n_samples_seen,
                    drift_detector=self.drift_detector,
                    warning_detector=self.warning_detector,
                    is_background_learner=False,
                    rng=self._rng,
                    features=subspace,
                )
            )

    def _generate_subspaces(self, features: list):
        n_features = len(features)
        # If we're not doing resampling, try to compute a subspace.
        if self.training_method != self._TRAIN_RESAMPLING:
            if isinstance(self.subspace_size, float) and 0.0 < self.subspace_size <= 1:
                k = self.subspace_size
                percent = (1.0 + k)
                k = round(n_features * percent)
                if k < 2:
                    k = round(n_features * percent) + 1
            elif isinstance(self.subspace_size, int) and self.subspace_size > 2:
                k = self.subspace_size
            elif self.subspace_size == self._FEATURES_SQRT:
                k = round(math.sqrt(n_features)) + 1
            elif self.subspace_size == self._FEATURES_SQRT_INV:
                k = n_features - round(math.sqrt(n_features)) + 1
            else:
                raise ValueError(f"Invalid subspace_size: {self.subspace_size}.")
            if k < 0:
                k = n_features + k
            if k != 0 and k < n_features:
                if n_features <= 20 or k < 2:
                    if k == 1 and n_features > 2:
                        k = 2
                    self._subspaces = []
                    for i, combination in enumerate(
                        itertools.cycle(itertools.combinations(features, k))
                    ):
                        if i == self.n_models:
                            break
                        self._subspaces.append(list(combination))
                else:
                    self._subspaces = [
                        random_subspace(all_features=features, k=k, rng=self._rng)
                        for _ in range(self.n_models)
                    ]
            else:
                # When k is not less than the number of features, switch to resampling.
                self.training_method = self._TRAIN_RESAMPLING
                self._subspaces = [None] * self.n_models
        else:
            self._subspaces = [None] * self.n_models

    def reset(self):
        self.data = []
        self._n_samples_seen = 0
        self._rng = random.Random(self.seed)

class BaseSRPEstimator:
    def __init__(
        self,
        idx_original: int,
        model: base.Estimator,
        metric: Accuracy,
        created_on: int,
        drift_detector: base.DriftDetector,
        warning_detector: base.DriftDetector,
        is_background_learner,
        rng: random.Random,
        features=None,
    ):
        self.idx_original = idx_original
        self.created_on = created_on
        self.model = model.clone()
        self.metric = metric.clone()
        self.features = features
        if drift_detector is not None:
            self.disable_drift_detector = False
            self.drift_detector = drift_detector.clone()
        else:
            self.disable_drift_detector = True
            self.drift_detector = None
        if warning_detector is not None:
            self.disable_background_learner = False
            self.warning_detector = warning_detector.clone()
        else:
            self.disable_background_learner = True
            self.warning_detector = None
        self.is_background_learner = is_background_learner
        self.n_drifts_detected = 0
        self.n_warnings_detected = 0
        self.rng = rng
        self._background_learner = None

###############################################################################
# Base Learner for Classification with Weighting (including reset)
###############################################################################

class BaseSRPClassifier(BaseSRPEstimator):
    def __init__(
        self,
        idx_original: int,
        model: base.Classifier,
        metric: Accuracy,
        created_on: int,
        drift_detector: base.DriftDetector,
        warning_detector: base.DriftDetector,
        is_background_learner,
        rng: random.Random,
        features=None,
    ):
        super().__init__(idx_original, model, metric, created_on, drift_detector, warning_detector, is_background_learner, rng, features)
        self.weight = 1.0  # initialize the learner's weight

    def reset(self, all_features: list, n_samples_seen: int):
        # Optionally update the feature subspace
        if self.features is not None:
            self.features = random_subspace(all_features, len(self.features), self.rng)
        self.model = self.model.clone()
        self.metric = self.metric.clone()
        self.created_on = n_samples_seen
        if self.drift_detector is not None:
            self.drift_detector = self.drift_detector.clone()
        if self.warning_detector is not None:
            self.warning_detector = self.warning_detector.clone()
        self.weight = 1.0

    def learn_one(self, x: dict, y: base.typing.ClfTarget, *, w: int, n_samples_seen: int, **kwargs):
        if self.features is not None:
            x_subset = {k: x[k] for k in self.features if k in x}
        else:
            x_subset = x
        for _ in range(int(w)):
            self.model.learn_one(x=x_subset, y=y, **kwargs)
        if self._background_learner:
            self._background_learner.learn_one(x=x, y=y, w=w, n_samples_seen=n_samples_seen)
        if not self.disable_drift_detector and not self.is_background_learner:
            y_pred = self.model.predict_one(x_subset)
            correctly_classifies = (y_pred == y)
            # Update learner's weight
            if correctly_classifies:
                self.weight *= 0.9
            else:
                self.weight *= 1.1
            self.warning_detector.update(int(not correctly_classifies))
            if self.warning_detector.drift_detected:
                all_features = list(x.keys())
                self.n_warnings_detected += 1
            self.drift_detector.update(int(not correctly_classifies))
            if self.drift_detector.drift_detected:
                all_features = list(x.keys())
                self.n_drifts_detected += 1
                self.reset(all_features=all_features, n_samples_seen=n_samples_seen)

    def predict_proba_one(self, x, **kwargs):
        if self.features is not None:
            x_subset = {k: x[k] for k in self.features if k in x}
        else:
            x_subset = x
        return self.model.predict_proba_one(x_subset, **kwargs)

    def predict_one(self, x: dict, **kwargs) -> base.typing.ClfTarget:
        y_pred = self.predict_proba_one(x, **kwargs)
        if y_pred:
            return max(y_pred, key=y_pred.get)
        return None

###############################################################################
# Standard SRPClassifier (for reference)
###############################################################################

class SRPClassifier(BaseSRPEnsemble, base.Classifier):
    def __init__(
        self,
        model: base.Estimator | None = None,
        n_models: int = 10,
        subspace_size: int | float | str = 0.6,
        training_method: str = "patches",
        lam: int = 6,
        drift_detector: base.DriftDetector | None = None,
        warning_detector: base.DriftDetector | None = None,
        disable_detector: str = "off",
        disable_weighted_vote: bool = False,
        seed: int | None = None,
        metric: Accuracy | None = None,
    ):
        if model is None:
            model = HoeffdingTreeClassifier(grace_period=50, delta=0.01)
        if drift_detector is None:
            drift_detector = ADWIN(delta=1e-5)
        if warning_detector is None:
            warning_detector = ADWIN(delta=1e-4)
        if metric is None:
            metric = Accuracy()
        super().__init__(
            model=model,
            n_models=n_models,
            subspace_size=subspace_size,
            training_method=training_method,
            lam=lam,
            drift_detector=drift_detector,
            warning_detector=warning_detector,
            disable_detector=disable_detector,
            disable_weighted_vote=disable_weighted_vote,
            seed=seed,
            metric=metric,
        )
        self._base_learner_class = BaseSRPClassifier

    def predict_proba_one(self, x, **kwargs):
        y_pred = collections.Counter()
        if not self:
            self._init_ensemble(features=list(x.keys()))
            return y_pred
        for model in self:
            y_proba_temp = model.predict_proba_one(x, **kwargs)
            # Here we use the individual learner's weight
            weight = model.weight
            y_proba_temp = {k: val * weight for k, val in y_proba_temp.items()}
            y_pred.update(y_proba_temp)
        total = sum(y_pred.values())
        if total > 0:
            return {label: proba / total for label, proba in y_pred.items()}
        return y_pred

    def predict_one(self, x, **kwargs):
        y_pred = self.predict_proba_one(x, **kwargs)
        if y_pred:
            return max(y_pred, key=y_pred.get)
        return None

###############################################################################
# Dynamic SRPClassifier: Adaptive Ensemble Size with Per-Tree Weighting
###############################################################################

class DynamicSRPClassifier(SRPClassifier):
    @property
    def _min_number_of_models(self):
        return 0

    @property
    def _wrapped_model(self):
        return self.model

    def __init__(
        self,
        model: base.Estimator | None = None,
        n_models: int = 10,
        max_models: int = 100,
        subspace_size: int | float | str = 0.6,
        training_method: str = "patches",
        lam: int = 6,
        drift_detector: base.DriftDetector | None = None,
        warning_detector: base.DriftDetector | None = None,
        disable_detector: str = "off",
        disable_weighted_vote: bool = False,
        seed: int | None = None,
        metric: Accuracy | None = None,
        window_size: int = 100,
        performance_threshold: float = 0.8,
    ):
        super().__init__(
            model=model,
            n_models=n_models,
            subspace_size=subspace_size,
            training_method=training_method,
            lam=lam,
            drift_detector=drift_detector,
            warning_detector=warning_detector,
            disable_detector=disable_detector,
            disable_weighted_vote=disable_weighted_vote,
            seed=seed,
            metric=metric,
        )
        self._max_models = max_models
        self._window_size = window_size
        self._performance_threshold = performance_threshold
        self._sliding_window = collections.deque(maxlen=window_size)
        # Initialize per-learner performance metrics
        self.tree_accuracies = [1.0] * n_models
        self.correct_predictions = [0] * n_models
        self.total_predictions = [0] * n_models
        self.ensemble_weights = [1.0 / n_models] * n_models

    def learn_one(self, x: dict, y: base.typing.ClfTarget, **kwargs):
        self._n_samples_seen += 1
        if not self:
            self._init_ensemble(features=list(x.keys()))
        # Train each base learner with online bagging
        for model in self:
            y_pred = model.predict_one(x)
            if y_pred is not None:
                model.metric.update(y_true=y, y_pred=y_pred)
            if self.training_method == self._TRAIN_RANDOM_SUBSPACES:
                k = 1
            else:
                k = poisson(rate=self.lam, rng=self._rng)
                if k == 0:
                    continue
            model.learn_one(x=x, y=y, w=k, n_samples_seen=self._n_samples_seen, **kwargs)
        # Update per-learner performance metrics:
        for i, learner in enumerate(self):
            pred = learner.predict_one(x)
            if pred == y:
                self.tree_accuracies[i] *= 0.9
                self.correct_predictions[i] += 1
            else:
                self.tree_accuracies[i] *= 1.1
            self.total_predictions[i] += 1
        # Update ensemble weights:
        self.ensemble_weights = [1 / (1 + acc) for acc in self.tree_accuracies]
        total_weight = sum(self.ensemble_weights)
        self.ensemble_weights = [w / total_weight for w in self.ensemble_weights]
        # Update sliding window for ensemble-level performance:
        ensemble_pred = self.predict_one(x)
        correct = 1 if ensemble_pred == y else 0
        self._sliding_window.append(correct)
        if len(self._sliding_window) == self._window_size:
            window_accuracy = sum(self._sliding_window) / self._window_size
            if window_accuracy < self._performance_threshold:
                features = list(x.keys())
                if len(self) < self._max_models:
                    self._add_learner(features)
                    self.tree_accuracies.append(1.0)
                    self.correct_predictions.append(0)
                    self.total_predictions.append(0)
                    self.ensemble_weights.append(1.0 / (len(self)))
                else:
                    worst_index = self._remove_worst_learner()
                    self._add_learner(features)
                    # Remove corresponding metrics of removed learner:
                    del self.tree_accuracies[worst_index]
                    del self.correct_predictions[worst_index]
                    del self.total_predictions[worst_index]
                    del self.ensemble_weights[worst_index]
                    # Append default values for the new learner:
                    self.tree_accuracies.append(1.0)
                    self.correct_predictions.append(0)
                    self.total_predictions.append(0)
                    self.ensemble_weights.append(1.0 / (len(self)))
                self._sliding_window.clear()

    def _generate_subspace_single(self, features: list):
        n_features = len(features)
        if self.training_method != self._TRAIN_RESAMPLING:
            if isinstance(self.subspace_size, float) and 0.0 < self.subspace_size <= 1:
                k = self.subspace_size
                percent = (1.0 + k)
                k = round(n_features * percent)
                if k < 2:
                    k = round(n_features * percent) + 1
            elif isinstance(self.subspace_size, int) and self.subspace_size > 2:
                k = self.subspace_size
            elif self.subspace_size == self._FEATURES_SQRT:
                k = round(math.sqrt(n_features)) + 1
            elif self.subspace_size == self._FEATURES_SQRT_INV:
                k = n_features - round(math.sqrt(n_features)) + 1
            else:
                raise ValueError(f"Invalid subspace_size: {self.subspace_size}.")
            if k < 0:
                k = n_features + k
            if k != 0 and k < n_features:
                return random_subspace(all_features=features, k=k, rng=self._rng)
            else:
                self.training_method = self._TRAIN_RESAMPLING
                return None
        else:
            return None

    def _add_learner(self, features: list):
        subspace = self._generate_subspace_single(features)
        new_learner = self._base_learner_class(
            idx_original=len(self),
            model=self.model,
            metric=self.metric.clone(),
            created_on=self._n_samples_seen,
            drift_detector=self.drift_detector,
            warning_detector=self.warning_detector,
            is_background_learner=False,
            rng=self._rng,
            features=subspace,
        )
        new_learner.weight = 1.0
        self.append(new_learner)

    def _remove_worst_learner(self):
        worst_index = None
        worst_weight = float('inf')
        for i, _ in enumerate(self):
            if self.ensemble_weights[i] < worst_weight:
                worst_weight = self.ensemble_weights[i]
                worst_index = i
        if worst_index is not None:
            del self[worst_index]
            return worst_index
        return None

    def predict_proba_one(self, x, **kwargs):
        y_pred = collections.Counter()
        if not self:
            self._init_ensemble(features=list(x.keys()))
            return y_pred
        for i, learner in enumerate(self):
            learner_proba = learner.predict_proba_one(x, **kwargs)
            weight = self.ensemble_weights[i]
            learner_proba = {k: val * weight for k, val in learner_proba.items()}
            y_pred.update(learner_proba)
        total = sum(y_pred.values())
        if total > 0:
            return {label: proba / total for label, proba in y_pred.items()}
        return y_pred

    def predict_one(self, x, **kwargs):
        y_pred = self.predict_proba_one(x, **kwargs)
        if y_pred:
            return max(y_pred, key=y_pred.get)
        return None


In [None]:
from river import ensemble, evaluate, metrics, datasets, tree

# Create a synthetic concept drift stream
dataset = datasets.synth.ConceptDriftStream(
    seed=42,
    position=500,
    width=50
).take(10000)

# Define a base model (Hoeffding Tree)
base_model = tree.HoeffdingTreeClassifier(
    grace_period=50, delta=0.01,
    nominal_attributes=['age', 'car', 'zipcode']
)

# --- Original SRPClassifier ---
original_model = ensemble.SRPClassifier(
    model=base_model,
    n_models=10,  # e.g., 3 base learners
    seed=42,
)

metric = metrics.Accuracy()

result_original = evaluate.progressive_val_score(dataset, original_model, metric)
print("Original SRPClassifier Accuracy:", result_original)

# Reinitialize the stream for a fair comparison
dataset = datasets.synth.ConceptDriftStream(
    seed=42,
    position=500,
    width=50
).take(10000)

# --- Dynamic SRPClassifier ---
dynamic_model = DynamicSRPClassifier(
    model=base_model,
    n_models=10,        # starting with 10 learners
    max_models=100,     # ensemble will grow up to 100 learners
    seed=42,
    window_size=100,           # sliding window size for performance monitoring
    performance_threshold=0.8  # if accuracy over window falls below 80%, add a learner
)

result_dynamic = evaluate.progressive_val_score(dataset, dynamic_model, metric)
print("Dynamic SRPClassifier Accuracy:", result_dynamic)


Original SRPClassifier Accuracy: Accuracy: 68.68%
Dynamic SRPClassifier Accuracy: Accuracy: 75.58%


In [2]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

def get_subpage_links(main_url):
    response = requests.get(main_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    base_domain = urlparse(main_url).netloc
    links = set()

    for a_tag in soup.find_all('a', href=True):
        href = a_tag['href']
        full_url = urljoin(main_url, href)
        if urlparse(full_url).netloc == base_domain:
            links.add(full_url)

    return list(links)

def extract_content(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        # You can customize this to get more specific content
        return soup.get_text()
    except Exception as e:
        print(f"Failed to extract {url}: {e}")
        return ""

def scrape_website(main_url):
    subpages = get_subpage_links(main_url)
    contents = {}

    for sub_url in subpages:
        print(f"Scraping {sub_url}")
        contents[sub_url] = extract_content(sub_url)

    return contents

# Example usage
main_url = 'https://riverml.xyz/dev/api/datasets/AirlinePassengers/'
website_content = scrape_website(main_url)

# Print first 500 characters from each page
for url, content in website_content.items():
    print(f"\nURL: {url}\nContent:\n{content[:500]}\n{'-'*80}")


[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m







            River
          



            
              PoissonInclusion
            
          




































            Initializing search
          












    online-ml/river
  










          
  
    
  
  Introduction 🍼

        



          
  
    
  
  Recipes 🌮

        



          
  
    
  
  API reference 🍱

        



          
  
    

--------------------------------------------------------------------------------

URL: https://riverml.xyz/dev/api/base/Base/
Content:













Base - River















          Skip to content
        

















            River
          



            
              Base
            
          




































            Initializing search
          












    online-ml/river
  










          
  
    
  
  Introduction 🍼

        



          
  
    
  
  Re

In [12]:
from river import datasets

# List of available datasets
available_datasets = [
    datasets.AirlinePassengers(),
    datasets.Bananas(),
    datasets.Bikes(),
    datasets.ChickWeights(),
    datasets.CreditCard(),
    datasets.Elec2(),
    datasets.Higgs(),
    datasets.ImageSegments(),
    datasets.Insects(),
    datasets.Keystroke(),
    datasets.HTTP(),
    datasets.Higgs(),
    datasets.MaliciousURL(),
    datasets.Phishing(),
    datasets.SMSSpam(),
    datasets.SMTP(),
    datasets.TREC07(),
    datasets.ImageSegments(),
    datasets.Insects(),
    datasets.Keystroke(),
    datasets.Music(),

    datasets.TrumpApproval(),
    datasets.WaterFlow(),
    datasets.WebTraffic()
]

# Filter classification datasets
classification_datasets = [ds for ds in available_datasets if 'classification' in ds.task.lower()]

# Display information about each classification dataset
for ds in classification_datasets:
    print(f"Name: {ds.__class__.__name__}")
    print(f"Task: {ds.task}")
    print(f"Samples: {ds.n_samples}")
    print(f"Features: {ds.n_features}")
    print(f"Classes: {ds.n_classes}")
    print(f"Sparse: {ds.sparse}")
    print(f"Path: {ds.path}")
    print("-" * 40)


Name: Bananas
Task: Binary classification
Samples: 5300
Features: 2
Classes: None
Sparse: False
Path: /usr/local/lib/python3.11/dist-packages/river/datasets/banana.zip
----------------------------------------
Name: CreditCard
Task: Binary classification
Samples: 284807
Features: 30
Classes: None
Sparse: False
Path: /root/river_data/CreditCard/creditcard.csv
----------------------------------------
Name: Elec2
Task: Binary classification
Samples: 45312
Features: 8
Classes: None
Sparse: False
Path: /root/river_data/Elec2/electricity.csv
----------------------------------------
Name: Higgs
Task: Binary classification
Samples: 11000000
Features: 28
Classes: None
Sparse: False
Path: /root/river_data/Higgs/HIGGS.csv.gz
----------------------------------------
Name: ImageSegments
Task: Multi-class classification
Samples: 2310
Features: 18
Classes: 7
Sparse: False
Path: /usr/local/lib/python3.11/dist-packages/river/datasets/segment.csv.zip
----------------------------------------
Name: Insects

In [18]:
from river import datasets

# 🟩 Regression Datasets
regression_datasets = [
    datasets.AirlinePassengers(),
    datasets.Bikes(),
    datasets.ChickWeights(),
    datasets.MovieLens100K(),
    datasets.Restaurants(),
    datasets.Taxis(),
    datasets.TrumpApproval(),
    datasets.WaterFlow()
]

# 🟦 Binary Classification Datasets
binary_classification_datasets = [
    datasets.Bananas(),
    datasets.CreditCard(),
    datasets.Elec2(),
    datasets.Higgs(),
    datasets.HTTP(),
    datasets.MaliciousURL(),
    datasets.Phishing(),
    datasets.SMSSpam(),
    datasets.SMTP(),
    datasets.TREC07()
]

# 🟨 Multi-class Classification Datasets
multi_class_classification_datasets = [
    datasets.ImageSegments(),
    datasets.Insects(),
    datasets.Keystroke()
]

# 🟪 Multi-output Binary Classification Dataset
multi_output_binary_classification_datasets = [
    datasets.Music()
]

# 🟧 Multi-output Regression Datasets
multi_output_regression_datasets = [
    datasets.SolarFlare(),
    datasets.WebTraffic()
]

# ✅ Combine all datasets
available_datasets = (
    regression_datasets +
    binary_classification_datasets +
    multi_class_classification_datasets +
    multi_output_binary_classification_datasets +
    multi_output_regression_datasets
)

# 🔍 Filter classification datasets
classification_datasets = [
    ds for ds in available_datasets
    if "classification" in ds.task.lower()
]

# 📋 Display metadata for classification datasets
print("Real-world Classification Dataset Summary\n" + "="*45)
for ds in classification_datasets:
    print(f"Name: {ds.__class__.__name__}")
    print(f"Task: {ds.task}")
    print(f"Samples: {ds.n_samples}")
    print(f"Features: {ds.n_features}")
    print(f"Classes: {ds.n_classes}")
    print(f"Sparse: {ds.sparse}")
    print(f"Path: {ds.path}")
    print("-" * 40)


Real-world Classification Dataset Summary
Name: Bananas
Task: Binary classification
Samples: 5300
Features: 2
Classes: None
Sparse: False
Path: /usr/local/lib/python3.11/dist-packages/river/datasets/banana.zip
----------------------------------------
Name: CreditCard
Task: Binary classification
Samples: 284807
Features: 30
Classes: None
Sparse: False
Path: /root/river_data/CreditCard/creditcard.csv
----------------------------------------
Name: Elec2
Task: Binary classification
Samples: 45312
Features: 8
Classes: None
Sparse: False
Path: /root/river_data/Elec2/electricity.csv
----------------------------------------
Name: Higgs
Task: Binary classification
Samples: 11000000
Features: 28
Classes: None
Sparse: False
Path: /root/river_data/Higgs/HIGGS.csv.gz
----------------------------------------
Name: HTTP
Task: Binary classification
Samples: 567498
Features: 3
Classes: None
Sparse: False
Path: /root/river_data/HTTP/kdd99_http.csv
----------------------------------------
Name: Malicious

In [15]:
from river.datasets import synth

# List of available synthetic datasets
synthetic_datasets = [
    synth.Agrawal(),
    synth.AnomalySine(),
    synth.ConceptDriftStream(),
    synth.Friedman(),
    synth.FriedmanDrift(),
    synth.Hyperplane(),
    synth.LED(),
    synth.LEDDrift(),
    synth.Logical(),
    synth.Mixed(),
    synth.Mv(),
    synth.Planes2D(),
    synth.RandomRBF(),
    synth.RandomRBFDrift(),
    synth.RandomTree(),
    synth.SEA(),
    synth.STAGGER(),
    synth.Sine(),
    synth.Waveform()
]

# Example: Print task type for each synthetic dataset
for ds in synthetic_datasets:
    print(f"Name: {ds.__class__.__name__}")
    print(f"Task: {ds.task}")
    print(f"Features: {ds.n_features}")
    print(f"Classes: {ds.n_classes}")
    print("-" * 40)


Name: Agrawal
Task: Binary classification
Features: 9
Classes: 2
----------------------------------------
Name: AnomalySine
Task: Binary classification
Features: 2
Classes: 1
----------------------------------------
Name: ConceptDriftStream
Task: Binary classification
Features: 9
Classes: 2
----------------------------------------
Name: Friedman
Task: Regression
Features: 10
Classes: None
----------------------------------------
Name: FriedmanDrift
Task: Regression
Features: 10
Classes: None
----------------------------------------
Name: Hyperplane
Task: Binary classification
Features: 10
Classes: 2
----------------------------------------
Name: LED
Task: Multi-class classification
Features: 7
Classes: 10
----------------------------------------
Name: LEDDrift
Task: Multi-class classification
Features: 7
Classes: 10
----------------------------------------
Name: Logical
Task: Multi-output binary classification
Features: 2
Classes: None
----------------------------------------
Name: Mix

In [17]:
from river.datasets import synth

# 🟦 Binary Classification Datasets
binary_classification_datasets = [
    synth.Agrawal(),
    synth.AnomalySine(),
    synth.ConceptDriftStream(),
    synth.Hyperplane(),
    synth.Mixed(),
    synth.SEA(),
    synth.STAGGER(),
    synth.Sine()
]

# 🟨 Multi-class Classification Datasets
multi_class_classification_datasets = [
    synth.LED(),
    synth.LEDDrift(),
    synth.RandomRBF(),
    synth.RandomRBFDrift(),
    synth.RandomTree(),
    synth.Waveform()
]

# 🟩 Regression Datasets
regression_datasets = [
    synth.Friedman(),
    synth.FriedmanDrift(),
    synth.Mv(),
    synth.Planes2D()
]

# 🟪 Multi-output Binary Classification Dataset
multi_output_binary_classification_datasets = [
    synth.Logical()
]

# Combine all for general use
all_synthetic_datasets = (
    binary_classification_datasets +
    multi_class_classification_datasets +
    regression_datasets +
    multi_output_binary_classification_datasets
)

# Optional: print all dataset metadata
print("Synthetic Dataset Summary\n" + "="*30)
for ds in all_synthetic_datasets:
    print(f"Name: {ds.__class__.__name__}")
    print(f"Task: {ds.task}")
    print(f"Features: {ds.n_features}")
    print(f"Classes: {ds.n_classes}")
    print("-" * 40)


Synthetic Dataset Summary
Name: Agrawal
Task: Binary classification
Features: 9
Classes: 2
----------------------------------------
Name: AnomalySine
Task: Binary classification
Features: 2
Classes: 1
----------------------------------------
Name: ConceptDriftStream
Task: Binary classification
Features: 9
Classes: 2
----------------------------------------
Name: Hyperplane
Task: Binary classification
Features: 10
Classes: 2
----------------------------------------
Name: Mixed
Task: Binary classification
Features: 4
Classes: 2
----------------------------------------
Name: SEA
Task: Binary classification
Features: 3
Classes: None
----------------------------------------
Name: STAGGER
Task: Binary classification
Features: 3
Classes: 2
----------------------------------------
Name: Sine
Task: Binary classification
Features: 2
Classes: 2
----------------------------------------
Name: LED
Task: Multi-class classification
Features: 7
Classes: 10
----------------------------------------
Name:

In [23]:
from river import evaluate, metrics, tree, ensemble, naive_bayes
from river.datasets import synth

# Create a concept drift stream dataset
stream = synth.ConceptDriftStream(
    stream=synth.SEA(seed=42, variant=0),
    drift_stream=synth.SEA(seed=42, variant=1),
    seed=1,
    position=500,
    width=50
).take(100000)

# List of models to evaluate
models = {
    'Hoeffding Adaptive Tree': tree.HoeffdingAdaptiveTreeClassifier(
        grace_period=100, delta=1e-5, leaf_prediction='nb',
        nb_threshold=10, seed=0
    ),
    'Hoeffding Tree': tree.HoeffdingTreeClassifier(
        grace_period=50, delta=0.01
    ),
    'Naive Bayes': naive_bayes.GaussianNB(),
    'SRP Ensemble': ensemble.SRPClassifier(
        model=tree.HoeffdingTreeClassifier(), n_models=3, seed=42
    )
}

# Evaluate each model
for name, model in models.items():
    print(f"Evaluating {name}...")

    # Create a fresh metric for each evaluation
    metric = metrics.Accuracy()

    # Make a fresh copy of the stream for each model (since it's an iterator)
    dataset = synth.ConceptDriftStream(
        stream=synth.SEA(seed=42, variant=0),
        drift_stream=synth.SEA(seed=42, variant=1),
        seed=1,
        position=500,
        width=50
    ).take(1000)

    final_metric = evaluate.progressive_val_score(
        dataset=dataset,
        model=model,
        metric=metric,
        print_every=0
    )

    # Print actual metric value
    print(f"{name} Accuracy: {final_metric.get():.4f}")
    print("-" * 40)


Evaluating Hoeffding Adaptive Tree...
Hoeffding Adaptive Tree Accuracy: 0.9149
----------------------------------------
Evaluating Hoeffding Tree...
Hoeffding Tree Accuracy: 0.9139
----------------------------------------
Evaluating Naive Bayes...
Naive Bayes Accuracy: 0.9109
----------------------------------------
Evaluating SRP Ensemble...
SRP Ensemble Accuracy: 0.9199
----------------------------------------


In [26]:
from river import evaluate, metrics, tree, ensemble, forest
from river.datasets import synth
from river.metrics import Accuracy
from copy import deepcopy

# -------------------------------
# Define Stream Generators
# -------------------------------
binary_classification_datasets = [
    synth.Agrawal(),
    synth.AnomalySine(),
    synth.ConceptDriftStream(),
    synth.Hyperplane(),
    synth.Mixed(),
    synth.SEA(),
    synth.STAGGER(),
    synth.Sine()
]

multi_class_classification_datasets = [
    synth.LED(),
    synth.LEDDrift(),
    synth.RandomRBF(),
    synth.RandomRBFDrift(),
    synth.RandomTree(),
    synth.Waveform()
]

all_datasets = {
    ds.__class__.__name__: ds for ds in binary_classification_datasets + multi_class_classification_datasets
}

# -------------------------------
# Evaluation Loop
# -------------------------------
for dataset_name, stream in all_datasets.items():
    print(f"\n🔍 Dataset: {dataset_name}")

    # Re-initialize stream (take 1000 samples)
    dataset = deepcopy(stream).take(1000)

    # Define base model
    base_model = tree.HoeffdingTreeClassifier(grace_period=50, delta=0.01)

    # Define models
    models = {
        "Dynamic SRPClassifier": DynamicSRPClassifier(
            model=base_model.clone(), n_models=10, max_models=100,
            window_size=100, performance_threshold=0.8, seed=42
        ),
        "Static SRPClassifier": ensemble.SRPClassifier(
            model=base_model.clone(), n_models=10, seed=42
        ),
        "ARFClassifier": forest.ARFClassifier(
            seed=8, leaf_prediction="mc"
        )
    }

    # Evaluate each model
    for model_name, model in models.items():
        metric = Accuracy()
        dataset = deepcopy(stream).take(1000)  # Reset stream again
        result = evaluate.progressive_val_score(
            dataset=dataset,
            model=model,
            metric=metric,
            print_every=0
        )
        print(f"{model_name:>25}: Accuracy = {result.get():.4f}")



🔍 Dataset: Agrawal
    Dynamic SRPClassifier: Accuracy = 0.9740
     Static SRPClassifier: Accuracy = 0.9870
            ARFClassifier: Accuracy = 0.8378

🔍 Dataset: AnomalySine
    Dynamic SRPClassifier: Accuracy = 0.7838
     Static SRPClassifier: Accuracy = 0.9560
            ARFClassifier: Accuracy = 0.9790

🔍 Dataset: ConceptDriftStream
    Dynamic SRPClassifier: Accuracy = 0.9820
     Static SRPClassifier: Accuracy = 0.9720
            ARFClassifier: Accuracy = 0.8048

🔍 Dataset: Hyperplane
    Dynamic SRPClassifier: Accuracy = 0.7828
     Static SRPClassifier: Accuracy = 0.7437
            ARFClassifier: Accuracy = 0.7558

🔍 Dataset: Mixed
    Dynamic SRPClassifier: Accuracy = 0.9159
     Static SRPClassifier: Accuracy = 0.8438
            ARFClassifier: Accuracy = 0.9089

🔍 Dataset: SEA
    Dynamic SRPClassifier: Accuracy = 0.9309
     Static SRPClassifier: Accuracy = 0.9369
            ARFClassifier: Accuracy = 0.9510

🔍 Dataset: STAGGER
    Dynamic SRPClassifier: Accuracy = 