In [1]:
from skmultiflow.classification import MultiOutputClassifier
from skmultiflow.core import BaseSKMObject, ClassifierMixin, MetaEstimatorMixin
from skmultiflow.core.utils.utils import get_dimensions
from skmultiflow.drift_detection import ADWIN
from skmultiflow.options import FloatOption, IntOption
from skmultiflow.core.measurements import Measurement
from skmultiflow.utils import check_random_state
from skmultiflow.utils.utils import get_max_value_key
from skmultiflow.classification.base import BaseClassifier
from skmultiflow.utils.utils import calculate_object_size
from skmultiflow.utils import check_random_state
from skmultiflow.utils.utils import get_max_value_key
import numpy as np
from sklearn.utils import check_random_state
from skmultiflow.utils.data_structures import DoubleVector
from typing import Optional, List
from sklearn.base import is_classifier, clone
from sklearn.utils import check_random_state

class CALMID(BaseSKMObject, ClassifierMixin, MetaEstimatorMixin):
    def __init__(self, base_learner=HoeffdingTree(), ensemble_size=10, weight_shrink=6.0, delta_adwin=0.002,
                 active_learning_threshold=0.5, active_learning_budget=0.2, rand_ratio=0.1, init_instance_num=500,
                 size_window=500, random_seed=0):
        super().__init__()
        self.base_learner = base_learner
        self.ensemble_size = ensemble_size
        self.weight_shrink = weight_shrink
        self.delta_adwin = delta_adwin
        self.active_learning_threshold = active_learning_threshold
        self.active_learning_budget = active_learning_budget
        self.rand_ratio = rand_ratio
        self.init_instance_num = init_instance_num
        self.size_window = size_window
        self.random_seed = random_seed

        self.ensemble = [clone(base_learner) for _ in range(ensemble_size)]
        self.adwin_ensemble = [ADWIN(delta=delta_adwin) for _ in range(ensemble_size)]

        self.labeling_cost = 0
        self.processed_instance = 0
        self.margin_value = 0
        self.cur_real_class_index = 0
        self.rand_imbalance_count = None
        self.init_first_time = True
        self.first_max_index = 0
        self.second_max_index = 0
        self.mutual_margin_threshold = None
        self.number_of_classes = 2

    def reset_learning(self):
        self.ensemble = [clone(self.base_learner) for _ in range(self.ensemble_size)]
        self.adwin_ensemble = [ADWIN(delta=self.delta_adwin) for _ in range(self.ensemble_size)]

        self.labeling_cost = 0
        self.processed_instance = 0
        self.margin_value = 0
        self.cur_real_class_index = 0
        self.rand_imbalance_count = None
        self.init_first_time = True
        self.first_max_index = 0
        self.second_max_index = 0
        self.mutual_margin_threshold = None

    def train_on_instance(self, X, y):
        if self.init_first_time:
            self.number_of_classes = len(np.unique(y))
            self.size_sample_win = int(np.ceil((self.size_window * self.rand_ratio) / self.number_of_classes))
            self.rand_imbalance_count = np.zeros(self.number_of_classes)
            self.mutual_margin_threshold = np.full((self.number_of_classes, self.number_of_classes),
                                                    self.active_learning_threshold)
            self.sample_arrays = np.full((self.number_of_classes, self.size_sample_win),
                                         fill_value=Sample(-1, 0, X), dtype= )
            self.cur_pos_arrays = np.full(self.number_of_classes, -1, dtype=int)
            self.init_first_time = False

        self.cur_real_class_index = y
        change = False
        weighted_inst = Sample(self.processed_instance, 0, X)

        learn_or_not = False
        count = self.get_votes_for_instance(X)
        self.processed_instance += 1
        diff = 0.0
        adapted_weight = 0
        imb = 1.0

        if self.processed_instance < self.init_instance_num or len(count) < 1:
            learn_or_not = True
            diff = -1.0
            if self.processed_instance < self.init_instance_num:
                self.add_to_sampling_window(self.cur_real_class_index)
        else:
            cost_now = self.labeling_cost / self.processed_instance
            if cost_now < self.active_learning_budget:
                if np.random.rand() < self.rand_ratio:
                    self.add_to_sampling_window(self.cur_real_class_index)
                    learn_or_not = True
                else:
                    self.add_to_sampling_window(-1)

                first_max_index = np.argmax(count)
                max_distr = count[first_max_index]
                if len(count) > 1:
                    count[first_max_index] = -1.0
                    second_max_index = np.argmax(count)
                    self.margin_value = max_distr - count[second_max_index]
                else:
                    second_max_index = 1
                    self.margin_value = max_distr

                num_rand_imb_count = max(1, self.rand_imbalance_count[self.cur_real_class_index])
                imb = num_rand_imb_count * self.number_of_classes / (self.size_window - self.qty_nans)

                if self.margin_value < self.mutual_margin_threshold[first_max_index][second_max_index]:
                    learn_or_not = True

                    if self.cur_real_class_index == first_max_index:
                        self.mutual_margin_threshold[first_max_index][second_max_index] *= (1.0 - 0.01)
                        if imb > 0.5:
                            self.mutual_margin_threshold[first_max_index][second_max_index] *= (1.0 - 0.01)
                    elif self.cur_real_class_index == second_max_index and imb > 0.5:
                        self.mutual_margin_threshold[first_max_index][second_max_index] *= (1.0 - 0.01)
                else:
                    sampling_budget = self.active_learning_budget - cost_now
                    p = self.margin_value - self.mutual_margin_threshold[first_max_index][second_max_index]
                    sampling_budget = sampling_budget / (sampling_budget + p)
                    if np.random.rand() < sampling_budget:
                        learn_or_not = True

                    if learn_or_not:
                        if self.cur_real_class_index == second_max_index:
                            self.mutual_margin_threshold[first_max_index][self.cur_real_class_index] = max(
                                self.active_learning_threshold,
                                self.mutual_margin_threshold[first_max_index][self.cur_real_class_index] * (1.0 + 0.01))

        if learn_or_not:
            self.labeling_cost += 1
            if diff < 0:
                adapted_weight = 0
            else:
                f = 1.0 if self.cur_real_class_index == first_max_index else -1.0
                s = 1.0 if self.cur_real_class_index == second_max_index else 0.0


AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations