# Classifier

## Authors

In [5]:
__author__ = 'Anderson Carlos Ferreira da Silva'

## Imports

In [6]:
import sys
import logging
from operator import attrgetter
from skmultiflow.core.utils.utils import *
from skmultiflow.classification.base import BaseClassifier
from skmultiflow.classification.trees.hoeffding_tree import HoeffdingTree
from random import randint

## Constants

In [7]:
FEATURES_M = ''
FEATURES_SQRT = 'sqrt'
FEATURES_SQRT_INV = 'sqrt_inv'
FEATURES_PERCENT = 'percent'

## ADFHoeffdingTree

### References

- [Hoeffding Tree](https://github.com/scikit-multiflow/scikit-multiflow/blob/17327dc81b7d6e35d533795ae13493ad08118708/skmultiflow/classification/trees/hoeffding_tree.py)
- [Adaptive Random Forest Hoeffding Tree](https://github.com/Waikato/moa/blob/f5cdc1051a7247bb61702131aec3e62b40aa82f8/moa/src/main/java/moa/classifiers/trees/ARFHoeffdingTree.java)

In [10]:
class ARFHoeffdingTree(HoeffdingTree):
            
    class RandomLearningNode(HoeffdingTree.ActiveLearningNode):                    
        """Random learning node.
        Parameters
        ----------
        initial_class_observations: dict (class_value, weight) or None
            Initial class observations
        """
        def __init__(self, initial_class_observations, nb_attributes):
            super().__init__(initial_class_observations)
            self.nb_attributes = nb_attributes;
            self.list_attributes = None
            
        def learn_from_instance(self, X, y, weight, ht):
            """Update the node with the provided instance.
            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes for updating the node.
            y: int
                Instance class.
            weight: float
                Instance weight.
            ht: HoeffdingTree
                Hoeffding Tree to update.
            """            
            self._observed_class_distribution[y] += weight                            
            if not self.list_attributes:
                self.list_attributes = [None] * self.nb_attributes
                for j in range(self.nb_attributes):    
                    is_unique = False
                    while is_unique == False:
                        self.list_attributes[j] = randint(0, self.nb_attributes - 1)
                        is_unique = True
                        for i in range(j):
                            if self.list_attributes[j] == self.list_attributes[i]:
                                is_unique = False
                                break
            
            for j in range(self.nb_attributes):
                i = self.list_attributes[j]
                obs = self._attribute_observers[i]
                if obs is None:
                    if i in ht.nominal_attributes:
                        obs = NominalAttributeClassObserver()
                    else:
                        obs = GaussianNumericAttributeClassObserver()
                    self._attribute_observers[i] = obs
            obs.observe_attribute_class(X[i], int(y), weight)
            
    class LearningNodeNB(RandomLearningNode):

        def __init__(self, initial_class_observations, nb_attributes):
            super().__init__(initial_class_observations, nb_attributes)            
            
        def get_class_votes(self, X, ht):
            """Get the votes per class for a given instance.
            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes.
            ht: HoeffdingTree
                Hoeffding Tree.
            Returns
            -------
            dict (class_value, weight)
                Class votes for the given instance.
            """
            if self.get_weight_seen() >= ht.nb_threshold:
                return do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers)
            else:
                return super().get_class_votes(X, ht)

    class LearningNodeNBAdaptive(LearningNodeNB):
        """Learning node that uses Adaptive Naive Bayes models.
        Parameters
        ----------
        initial_class_observations: dict (class_value, weight) or None
            Initial class observations
        """
        def __init__(self, initial_class_observations, nb_attributes):
            """LearningNodeNBAdaptive class constructor. """
            super().__init__(initial_class_observations, nb_attributes)
            self._mc_correct_weight = 0.0
            self._nb_correct_weight = 0.0

        def learn_from_instance(self, X, y, weight, ht):
            """Update the node with the provided instance.
            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes for updating the node.
            y: int
                Instance class.
            weight: float
                The instance's weight.
            ht: HoeffdingTree
                The Hoeffding Tree to update.
            """
            if self._observed_class_distribution == {}:
                # All classes equal, default to class 0
                if 0 == y:
                    self._mc_correct_weight += weight
            elif max(self._observed_class_distribution, key=self._observed_class_distribution.get) == y:
                self._mc_correct_weight += weight
            nb_prediction = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers)
            if max(nb_prediction, key=nb_prediction.get) == y:
                self._nb_correct_weight += weight
            super().learn_from_instance(X, y, weight, ht)

        def get_class_votes(self, X, ht):
            """Get the votes per class for a given instance.
            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes.
            ht: HoeffdingTree
                Hoeffding Tree.
            Returns
            -------
            dict (class_value, weight)
                Class votes for the given instance.
            """
            if self._mc_correct_weight > self._nb_correct_weight:
                return self._observed_class_distribution
            return do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers)
        
    def __init__(self, max_byte_size = 33554432, memory_estimate_period = 1000000, grace_period = 200,
                     split_criterion = 'info_gain', split_confidence = 0.0000001, tie_threshold = 0.05, 
                     binary_split = False, stop_mem_management = False, remove_poor_atts = False, no_preprune = False, 
                     leaf_prediction = 'mc', nb_threshold = 0, nominal_attributes = None, nb_attributes = 2):                
        """ADFHoeffdingTree class constructor."""
        super().__init__(max_byte_size, memory_estimate_period, grace_period, split_criterion, split_confidence,
                        tie_threshold, binary_split, stop_mem_management, remove_poor_atts, no_preprune,
                        leaf_prediction, nb_threshold, nominal_attributes)
        self.nb_attributes = nb_attributes
        self.remove_poor_attributes_option = None        

    def newLearningNode(self, initial_class_observations):        
        if self._leaf_prediction == MAJORITY_CLASS:
            return RandomLearningNode(self, initialClassObservations, self.nb_attributes)            
        elif self._leaf_prediction == NAIVE_BAYES:
            return LearningNodeNB(self, initialClassObservations, self.nb_attributes)            
        else: #NAIVE_BAYES_ADAPTIVE
            return LearningNodeNBAdaptative(self, initialClassObservations, self.nb_attributes)
            
    def isRandomizable():
        return True;
    

## Adaptive Random Forest

- [Adaptive Random Forest](https://github.com/Waikato/moa/blob/master/moa/src/main/java/moa/classifiers/meta/AdaptiveRandomForest.java)

In [None]:
class AdaptiveRandomForest(BaseClassifier):
    
    def __init__(self, nb_ensemble = 10, feature_mode = 'sqrt', nb_features_per_tree = 2, 
                 disable_drift_detection = False, disable_background_learner = False):                
        """."""
        super().__init__()          
        self.nb_ensemble = nb_ensemble        
        self.feature_mode = feature_mode
        self.nb_features_per_tree = nb_features_per_tree
        self.disable_drift_detection = disable_drift_detection
        self.disable_background_Learner = disable_background_learner        
        self.x_seen = 0        
        self.ensemble = None     
        self.nb_attributes = None        
        #to fix
        self.drift_detection_method = None
        self.warning_detection_method = None        
        """
        public ClassOption driftDetectionMethodOption = new ClassOption("driftDetectionMethod", 'x',
            "Change detector for drifts and its parameters", ChangeDetector.class, "ADWINChangeDetector -a 1.0E-5");

        public ClassOption warningDetectionMethodOption = new ClassOption("warningDetectionMethod", 'p',
            "Change detector for warnings (start training bkg learner)", ChangeDetector.class, "ADWINChangeDetector -a 1.0E-4");
   
        """
    
    def fit(self, X, y, classes=None, weight=None):
        raise NotImplementedError
    
    def partial_fit(self, X, y, classes=None, weight=None):
        self.x_seen = self.x_seen + 1
        
        if not self.ensemble:
            self.init()
        
        # to fix start
        #Collection<TrainingRunnable> trainers = new ArrayList<TrainingRunnable>();
        trainers = []
        #for (int i = 0 ; i < this.ensemble.length ; i++) {
        for i in range(self.nb_ensemble):
        #    DoubleVector vote = new DoubleVector(this.ensemble[i].getVotesForInstance(instance));
            
        #    InstanceExample example = new InstanceExample(instance);
            example = (X,y)
        #    this.ensemble[i].evaluator.addResult(example, vote.getArrayRef());
            
        #    int k = MiscUtils.poisson(this.lambdaOption.getValue(), this.classifierRandom);
            if k > 0:
        #    if (k > 0) {
                if self.executor:
        #        if(this.executor != null) {
                    trainer = TrainingRunnable(self.ensemble[i], X, y, k, self.instancesSeen)
        #            TrainingRunnable trainer = new TrainingRunnable(this.ensemble[i], 
        #                instance, k, this.instancesSeen);
                    trainers.append(trainer)
        #            trainers.add(trainer);
        #        }
        #        else { // SINGLE_THREAD is in-place... 
                else:
        #            this.ensemble[i].trainOnInstance(instance, k, this.instancesSeen);
                    self.ensemble[i].partial_fit(X, y, k, self.instancesSeen)
        #        }
        #    }
        #}
        #if(this.executor != null) {
        if self.executor:
        #    try {
        #        this.executor.invokeAll(trainers);
            self.executor.invokeAll(trainers)
        #    } catch (InterruptedException ex) {
        #        throw new RuntimeException("Could not call invokeAll() on training threads.");
        #    }
        #}
        # to fix end
    
    def predict(self, X):
        raise NotImplementedError

    def predict_proba(self, X):
        raise NotImplementedError
        
    def reset(self):        
        """Reset attributes."""
        self.ensemble = None;
        self.nb_attributes = 0;
        self.x_seen = 0;
        
    def score(self, X, y):
        raise NotImplementedError
        
    def get_info(self):
        raise NotImplementedError   
        
    def init(X):
        self.ensemble = [None] * self.nb_ensemble
        
        self.nb_attributes = self.nb_features_per_tree
        
        """The size of m depends on:"""
        _, n = get_dimensions(X)
        
        if self.feature_mode == FEATURES_SQRT:
            self.nb_attributes = int(round(math.sqrt(n)) + 1)            
        elif self.feature_mode == FEATURES_SQRT_INV:
            self.nb_attributes = n - int(round(math.sqrt(n) + 1))
        elif self.feature_mode == FEATURES_PERCENT:            
            percent = (100 + self.nb_attributes) / 100.0 if self.nb_attributes < 0 else self.nb_attributes / 100.0
            self.nb_attributes = int(round(n * percent))
            
        """Notice that if the selected feature_mode was FEATURES_M then nothing is performed, 
        still it is necessary to check (and adjusted) for when a negative value was used. 
        """"
        
        """"m is negative, use size(features) + -m""""
        if self.nb_attributes < 0:
            self.nb_attributes = n + self.nb_attributes
        """"Other sanity checks to avoid runtime errors.""""
        """"m <= 0 (m can be negative if nb_attributes was negative and abs(m) > n), then use m = 1""""
        if self.nb_attributes <= 0:
            self.nb_attributes = 1
        """"m > n, then it should use n""""
        if self.nb_attributes > n:
            self.nb_attributes = n;
                               
        for i in range(self.nb_ensemble):            
            self.ensemble[i] = ARFBaseLearner(i, ARFHoeffdingTree(nb_attributes = self.nb_attributes), self.x_seen                                 
                not self.disable_background_learner, not self.disable_drift_detection, self.drift_detection_method,
                self.warning_detection_method, False)