In [5]:
!pip install -e ./stream-learn

Obtaining file:///home/jovyan/work/stream-learn
Installing collected packages: stream-learn
  Found existing installation: stream-learn 0.7.1
    Uninstalling stream-learn-0.7.1:
      Successfully uninstalled stream-learn-0.7.1
  Running setup.py develop for stream-learn
Successfully installed stream-learn


In [1]:
!pip install attrs



In [2]:
from strlearn.streams import StreamGenerator

In [11]:
stream = StreamGenerator(n_classes=2, n_drifts=1)

In [42]:
import numpy as np
from attr import attrs, attrib, Factory
from copy import deepcopy
from typing import List, Callable, NewType, Any, Optional, Dict
from sklearn.naive_bayes import GaussianNB
from abc import ABC, abstractmethod

In [89]:
Classifier = NewType('Classifier', Any)
Instance = NewType('Instance', Any)

In [92]:
class OALELabelingStrategy(ABC):
    
    @abstractmethod
    def label(x: Instance): raise NotImplementedError

In [91]:
@attrs(auto_attribs=True)
class RandomStrategy(OALELabelingStrategy):
    _threshold_adjustment_step: int = attrib(default=0.1)
        
    def label(x: Instance):
        if self._threshold_adjustment_step <= np.random.uniform():
            return true
        return false
        

In [90]:
@attrs(auto_attribs=True)
class UncertaintyStrategy(OALELabelingStrategy):
    _threshold_margin:int = attrib()
    _threshold_adjustment_step: int = attrib()
        
    def label(x: Instance):
        return true # TODO(bgulowaty)

In [40]:
@attrs(auto_attribs=True)
class OALE:
    _classifier_provider: Callable[[List[Instance], List[Any]], Classifier] = None
    _block_size: int = 5 # I
    _dynamics_classifiers: int = 20 # D
    _threshold_adjustment_step: int = attrib(default=0.5) # s
    _selection_ratio: int = 0.5 # r, how many of the cached instances are used in building new classifier
    _margin_threshold: int = attrib(default=1) # theta
    _random_strategy: OLE
    
#     _ensemble: List = attrib(factory=list, init=False)
    _classes: List = attrib(factory=list, init=False)
    
    _cache: List = attrib(factory=list, init=False) # of tuples (x,y)
    _processed_instances: int = attrib(default=0, init=False)
    _dynamic_clfs_count: int = attrib(default=0, init=False)
    _theta_m: int = attrib(default=0, init=False)
    _stable_clf: Optional[Classifier] = attrib(default=None, init=False)
    _dynamic_clfs: List[Classifier] = attrib(factory=list, init=False)
    
    ## SINGLE INSTANCE
    def partial_fit(self, x, y, classes):
        self._classes = classes
        self._processed_instances += 1
        
        if self._processed_instances < self._block_size: # fill the circular array for the first time
            self._cache[self._processed_instances - 1] = x
            
        elif self._processed_instances == self._block_size: # the first fill of the circular array
            self._cache[self._processed_instances - 1] = x
            new_clf = self._create_new_classifier()
            self._dynamic_clfs_count = 1
            self._stable_clf = new_clf # create C_s
            self._dynamic_clfs.append(deepcopy(new_clf)) #  create the first dynamic classifier
        else: # more instances processed than block size
            i = (self._processed_instances - 1)%self._block_size
            self._deal_instance(x, i)
            i = (i + 1)%self._block_size
            if i == 0: 
                self._k += 1
                new_clf = self._create_new_classifier()
                self._theta_m = self._margin_threshold * 2/len(self.classes)
                if self._k > len(self._dynamics_classifiers): 
                    self._dynamic_clfs = np.delete(self._dynamic_clfs, 0)
                    self._dynamic_clfs[self._dynamics_classifiers] = new_clf
                self._update_weights()
        
        for i in range(0, self._block_size - 1):
            x_ins = self._A[i]
            self._deal_instance(x_ins, i, self._random_strategy, self._theta_m, self._threshold_adjustment_step)
                
                
    def _create_new_classifier(self):
        instances, labels = self.instances_to_label_count()
        
        if not self._stable_clf is None:
            self._update_stable_classifier(instances, labels)
        
        return self._classifier_provider(instances, labels)
        
        
    def _update_stable_classifier(self, x, y):
        try:
            self._stable_clf.partial_fit(x, y, self._classes)
        except e:
            self._stable_clf.fit(x, y)
            
    def _get_randomly_selected_cache_instances(self):
        random_instances_count = np.ceil(len(self._cache) * self._selection_ratio)
        instances_with_labels = np.random.choice(self._cache, random_instances_count)
        
        return map(list, zip(*instances_with_labels))
        
        
        
        
    def _deal_instance(x_new, i, sigma, theta_m, s):
        x_i = self._A[i]
        labeling = self.
    


In [12]:
stream.get_chunk()

(array([[ 0.85801241, -0.37418577, -2.22585776, ..., -0.92462048,
         -1.45106402, -1.02171288],
        [ 0.34225386,  0.87562143,  0.49849884, ...,  0.38081094,
         -0.49283377, -0.71939448],
        [ 0.08939893,  0.38352274,  1.17993825, ..., -0.5758609 ,
          1.02622519, -0.84828413],
        ...,
        [-0.27052349, -1.30232049,  2.61901435, ...,  0.95900937,
          0.74613897, -0.33467108],
        [-0.26074908, -0.21394778, -2.06927428, ...,  0.39128634,
          0.46021334,  1.36650102],
        [ 0.40400803, -0.87146794, -0.36472071, ...,  0.81808583,
          1.49451413,  1.10835393]]),
 array([1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
        0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
   