In [21]:
import numpy as np
import xgboost as xgb
import cryptoaml.datareader as cdr

from imblearn.over_sampling import SMOTE 
from imblearn.under_sampling import NeighbourhoodCleaningRule

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix

from skmultiflow.core.base import BaseSKMObject, ClassifierMixin
from skmultiflow.drift_detection import ADWIN
from skmultiflow.utils import get_dimensions

In [2]:
elliptic = cdr.get_data("elliptic")
data = elliptic.train_test_split(train_size=0.7, 
                                 feat_set="AF", 
                                 inc_meta=False,
                                 inc_unknown=False)

train_data = data.train_X
train_data["class"] = data.train_y
test_data = data.test_X
test_data["class"] = data.test_y 
data = train_data.append(test_data, ignore_index=True)

In [3]:
class AdaptiveStackedBoostClassifier():
    def __init__(self,
                 window_size=2000,
                 n_base_models=5,
                 n_rounds_eval_base_model=3,
                 meta_learner_train_ratio=0.4,
                 verbose=True):
        
        self._first_run = True
        self._window_size = window_size
        # validate 'n_base_models' 
        if n_base_models <= 1:
            raise ValueError("'n_base_models' must be > 1")
        self._n_base_models = n_base_models
        # validate 'n_rounds_eval_base_model' 
        if n_rounds_eval_base_model > n_base_models or n_rounds_eval_base_model <= 0:
            raise ValueError("'n_rounds_eval_base_model' must be > 0 and <= to 'n_base_models'")
        self._n_rounds_eval_base_model = n_rounds_eval_base_model
        self._meta_learner = xgb.XGBClassifier()
        self.meta_learner_train_ratio = meta_learner_train_ratio
        self._X_buffer = np.array([])
        self._y_buffer = np.array([])

        # 3*N matrix 
        # 1st row - base-level model
        # 2nd row - evaluation rounds 
        self._base_models = [[None for x in range(n_base_models)] for y in range(3)]
        
    def partial_fit(self, X, y):
        if self._first_run:
            self._X_buffer = np.array([]).reshape(0, X.shape[1])
            self._y_buffer = np.array([])
            self._first_run = False
                           
        self._X_buffer = np.concatenate((self._X_buffer, X))
        self._y_buffer = np.concatenate((self._y_buffer, y))
        while self._X_buffer.shape[0] >= self._window_size:
            self._train_on_mini_batch(X=self._X_buffer[0:self._window_size, :],
                                      y=self._y_buffer[0:self._window_size])
            delete_idx = [i for i in range(self._window_size)]
            self._X_buffer = np.delete(self._X_buffer, delete_idx, axis=0)
            self._y_buffer = np.delete(self._y_buffer, delete_idx, axis=0)
    
    def _train_new_base_model(self, X_base, y_base, X_meta, y_meta):
        
        # new base-level model  
        new_base_model = xgb.XGBClassifier()
        # first train the base model on the base-level training set 
        new_base_model.fit(X_base, y_base)
        # then extract the predicted probabilities to be added as meta-level features
        y_predicted = new_base_model.predict_proba(X_meta)   
        # once the meta-features for this specific base-model are extracted,
        # we incrementally fit this base-model to the rest of the data,
        # this is done so this base-model is trained on a full batch 
        new_base_model.fit(X_meta, y_meta, xgb_model=new_base_model.get_booster())
        return new_base_model, y_predicted
    
    def _construct_meta_features(self, meta_features):
        
        # get size of of meta-features
        meta_features_shape = meta_features.shape[1]  
        # get expected number of features,
        # binary probabilities from the total number of base-level models
        meta_features_expected = self._n_base_models * 2
        
        # since the base-level models list is not full, 
        # we need to fill the features until the list is full, 
        # so we set the remaining expected meta-features as 0
        if meta_features_shape < meta_features_expected:
            diff = meta_features_expected - meta_features_shape
            empty_features = np.zeros((meta_features.shape[0], diff))
            meta_features = np.hstack((meta_features, empty_features)) 
        return meta_features 
        
    def _get_weakest_base_learner(self):
        
        # loop rounds
        worst_model_idx = None 
        worst_performance = 1
        for idx in range(len(self._base_models[0])):
            current_round = self._base_models[1][idx]
            if current_round < self._n_rounds_eval_base_model:
                continue 
            
            current_performance = self._base_models[2][idx].sum()
            if current_performance < worst_performance:
                worst_performance = current_performance 
                worst_model_idx = idx
                
#             print("ID: {} Round:{} Performance: {}".format(idx, current_round, current_performance))        
#         print("FIND WEAKEST LINK, ID: {}".format(worst_model_idx))
        
        return worst_model_idx
    
    def _train_on_mini_batch(self, X, y):
        
#         print("-----------")
        
        # ----------------------------------------------------------------------------
        # STEP 1: split mini batch to base-level and meta-level training set
        # ----------------------------------------------------------------------------
        base_idx = int(self._window_size * (1.0 - self.meta_learner_train_ratio))
        X_base = X[0: base_idx, :]
        y_base = y[0: base_idx] 
        # this part will be used to train the meta-level model,
        # and to continue training the base-level models on the rest of this batch
        X_meta = X[base_idx:self._window_size, :]  
        y_meta = y[base_idx:self._window_size]
        
        # ----------------------------------------------------------------------------
        # STEP 2: train previous base-models 
        # ----------------------------------------------------------------------------
        meta_features = []
        base_models_len = self._n_base_models - self._base_models[0].count(None)
        if base_models_len > 0: # check if we have any base-level models         
            base_model_performances = self._meta_learner.feature_importances_
#             print(base_model_performances)
            for b_idx in range(base_models_len): # loop and train and extract meta-level features 
                    
                # continuation of training (incremental) on base-level model,
                # using the base-level training set 
                base_model = self._base_models[0][b_idx]
                base_model.fit(X_base, y_base, xgb_model=base_model.get_booster())
                y_predicted = base_model.predict_proba(X_meta) # extract meta-level features 
                
#                 print("BASE MODELS: {}".format(y_predicted[0]))
                
                # extract meta-features 
                meta_features = y_predicted if b_idx == 0 else np.hstack((meta_features, y_predicted))                    
                
                # once the meta-features for this specific base-model are extracted,
                # we incrementally fit this base-model to the rest of the data,
                # this is done so this base-model is trained on a full batch 
                base_model.fit(X_meta, y_meta, xgb_model=base_model.get_booster())
                                
                # update base-level model list 
                self._base_models[0][b_idx] = base_model
                current_round = self._base_models[1][b_idx]
                last_performance = base_model_performances[b_idx * 2] + base_model_performances[(b_idx*2)+1] 
                self._base_models[2][b_idx][current_round%self._n_rounds_eval_base_model] = last_performance
                self._base_models[1][b_idx] = current_round + 1
                
        # ----------------------------------------------------------------------------
        # STEP 3: with each new batch, we create/train a new base model 
        # ----------------------------------------------------------------------------
        new_base_model, new_base_model_meta_features = self._train_new_base_model(X_base, y_base, X_meta, y_meta)

        insert_idx = base_models_len
        if base_models_len == 0:
            meta_features = new_base_model_meta_features
        elif base_models_len > 0 and base_models_len < self._n_base_models: 
            meta_features = np.hstack((meta_features, new_base_model_meta_features))     
        else: 
            insert_idx = self._get_weakest_base_learner()           
            meta_features[:, insert_idx * 2] = new_base_model_meta_features[:,0]
            meta_features[:, (insert_idx * 2) + 1] = new_base_model_meta_features[:,1]
            
        self._base_models[0][insert_idx] = new_base_model 
        self._base_models[1][insert_idx] = 0 
        self._base_models[2][insert_idx] = np.zeros(self._n_rounds_eval_base_model) 
        
#         print(self._base_models[1])
#         print(self._base_models[2])
        
        # STEP 4: train the meta-level model 
        meta_features = self._construct_meta_features(meta_features)
        if base_models_len == 0:
            self._meta_learner.fit(meta_features, y_meta)
        else:
            self._meta_learner.fit(meta_features, y_meta, xgb_model=self._meta_learner.get_booster())

    def predict(self, X):
      
        # only one model in ensemble use its predictions 
        base_models_len = self._n_base_models - self._base_models[0].count(None)
        if base_models_len == 1:
            return self._base_models[0][0].predict(X)
        
        # predict via meta learner 
        meta_features = []           
        for b_idx in range(base_models_len):
            y_predicted = self._base_models[0][b_idx].predict_proba(X) 
            meta_features = y_predicted if b_idx == 0 else np.hstack((meta_features, y_predicted))                    
        meta_features = self._construct_meta_features(meta_features)
        return self._meta_learner.predict(meta_features)

true_test = []
predictions_test = []

ncr = NeighbourhoodCleaningRule(n_neighbors=3, threshold_cleaning=0.5)
sm = SMOTE()
adpBoost = AdaptiveStackedBoostClassifier()

for ts in np.arange(data["ts"].min(), data["ts"].max()):
    train_set = data[data["ts"] == ts]
    train_set_X = train_set.iloc[:,:-1]
    train_set_y = train_set["class"]      
    
    adpBoost.partial_fit(train_set_X.values, train_set_y.values)    
    
    test_set = data[data["ts"] == ts + 1]
    test_set_X = test_set.iloc[:,:-1].values
    test_set_y = test_set["class"].values

    y_pred = adpBoost.predict(test_set_X)
    evaluation = f1_score(test_set_y, y_pred, average='binary')
    print("Proposed TS {}: {}".format(ts+1, evaluation))
    
    if ts+1 >= 35:
        true_test.append(test_set_y)
        predictions_test.append(y_pred)

f1_score_test = f1_score(np.concatenate(true_test, axis=0),   
                         np.concatenate(predictions_test, axis=0), 
                         average='binary')
print("F1-Score on test set: {}".format(round(f1_score_test, 3)))     

recall_score_test = recall_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 average='binary')
print("Recall on test set: {}".format(round(recall_score_test, 3)))      

precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Precision on test set: {}".format(round(precision_score_test, 3)))    

confusion_matrix_test = confusion_matrix(np.concatenate(true_test, axis=0), 
                                         np.concatenate(predictions_test, axis=0))
print("Confusion_matrix: {}".format(confusion_matrix_test))    

Proposed TS 2: 0.7142857142857143
Proposed TS 3: 0.6666666666666665
Proposed TS 4: 0.8524590163934426
Proposed TS 5: 0.2105263157894737
Proposed TS 6: 0.045454545454545456
Proposed TS 7: 0.4714285714285715
Proposed TS 8: 0.45714285714285713
Proposed TS 9: 0.9444444444444444
Proposed TS 10: 0.6666666666666666
Proposed TS 11: 0.9392712550607287
Proposed TS 12: 0.6666666666666666
Proposed TS 13: 0.9513513513513514
Proposed TS 14: 0.8478260869565216
Proposed TS 15: 0.9605263157894738
Proposed TS 16: 0.9571984435797667
Proposed TS 17: 0.867579908675799
Proposed TS 18: 0.8620689655172413
Proposed TS 19: 0.8313253012048193
Proposed TS 20: 0.8352941176470587
Proposed TS 21: 0.8725490196078431
Proposed TS 22: 0.8722741433021807
Proposed TS 23: 0.6391752577319587
Proposed TS 24: 0.45771144278606957
Proposed TS 25: 0.9009009009009009
Proposed TS 26: 0.7980769230769232
Proposed TS 27: 0.9387755102040817
Proposed TS 28: 0.9767441860465116
Proposed TS 29: 0.8189509306260576
Proposed TS 30: 0.9285714

In [4]:
class AdaptiveXGBoostClassifier(BaseSKMObject, ClassifierMixin):
    _PUSH_STRATEGY = 'push'
    _REPLACE_STRATEGY = 'replace'
    _UPDATE_STRATEGIES = [_PUSH_STRATEGY, _REPLACE_STRATEGY]

    def __init__(self,
                 n_estimators=30,
                 learning_rate=0.3,
                 max_depth=6,
                 max_window_size=1000,
                 min_window_size=None,
                 detect_drift=False,
                 update_strategy='replace'):
        """
        Adaptive XGBoost classifier.
        Parameters
        ----------
        n_estimators: int (default=5)
            The number of estimators in the ensemble.
        learning_rate:
            Learning rate, a.k.a eta.
        max_depth: int (default = 6)
            Max tree depth.
        max_window_size: int (default=1000)
            Max window size.
        min_window_size: int (default=None)
            Min window size. If this parameters is not set, then a fixed size
            window of size ``max_window_size`` will be used.
        detect_drift: bool (default=False)
            If set will use a drift detector (ADWIN).
        update_strategy: str (default='replace')
            | The update strategy to use:
            | 'push' - the ensemble resembles a queue
            | 'replace' - oldest ensemble members are replaced by newer ones
        Notes
        -----
        The Adaptive XGBoost [1]_ (AXGB) classifier is an adaptation of the
        XGBoost algorithm for evolving data streams. AXGB creates new members
        of the ensemble from mini-batches of data as new data becomes
        available.  The maximum ensemble  size is fixed, but learning does not
        stop once this size is reached, the ensemble is updated on new data to
        ensure consistency with the current data distribution.
        References
        ----------
        .. [1] Montiel, Jacob, Mitchell, Rory, Frank, Eibe, Pfahringer,
           Bernhard, Abdessalem, Talel, and Bifet, Albert. “AdaptiveXGBoost for
           Evolving Data Streams”. In:IJCNN’20. International Joint Conference
           on Neural Networks. 2020. Forthcoming.
        """
        super().__init__()
        self.learning_rate = learning_rate
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_window_size = max_window_size
        self.min_window_size = min_window_size
        self._first_run = True
        self._ensemble = None
        self.detect_drift = detect_drift
        self._drift_detector = None
        self._X_buffer = np.array([])
        self._y_buffer = np.array([])
        self._samples_seen = 0
        self._model_idx = 0
        if update_strategy not in self._UPDATE_STRATEGIES:
            raise AttributeError("Invalid update_strategy: {}\n"
                                 "Valid options: {}".format(update_strategy,
                                                            self._UPDATE_STRATEGIES))
        self.update_strategy = update_strategy
        self._configure()

    def _configure(self):
        if self.update_strategy == self._PUSH_STRATEGY:
            self._ensemble = []
        elif self.update_strategy == self._REPLACE_STRATEGY:
            self._ensemble = [None] * self.n_estimators
        self._reset_window_size()
        self._init_margin = 0.0
        self._boosting_params = {"silent": True,
                                 "objective": "binary:logistic",
                                 "eta": self.learning_rate,
                                 "max_depth": self.max_depth}
        if self.detect_drift:
            self._drift_detector = ADWIN()

    def reset(self):
        """
        Reset the estimator.
        """
        self._first_run = True
        self._configure()

    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """
        Partially (incrementally) fit the model.
        Parameters
        ----------
        X: numpy.ndarray
            An array of shape (n_samples, n_features) with the data upon which
            the algorithm will create its model.
        y: Array-like
            An array of shape (, n_samples) containing the classification
            targets for all samples in X. Only binary data is supported.
        classes: Not used.
        sample_weight: Not used.
        Returns
        -------
        AdaptiveXGBoostClassifier
            self
        """
        for i in range(X.shape[0]):
            self._partial_fit(np.array([X[i, :]]), np.array([y[i]]))
        return self

    def _partial_fit(self, X, y):
        if self._first_run:
            self._X_buffer = np.array([]).reshape(0, get_dimensions(X)[1])
            self._y_buffer = np.array([])
            self._first_run = False
        self._X_buffer = np.concatenate((self._X_buffer, X))
        self._y_buffer = np.concatenate((self._y_buffer, y))
        while self._X_buffer.shape[0] >= self.window_size:
            self._train_on_mini_batch(X=self._X_buffer[0:self.window_size, :],
                                      y=self._y_buffer[0:self.window_size])
            delete_idx = [i for i in range(self.window_size)]
            self._X_buffer = np.delete(self._X_buffer, delete_idx, axis=0)
            self._y_buffer = np.delete(self._y_buffer, delete_idx, axis=0)

            # Check window size and adjust it if necessary
            self._adjust_window_size()

        # Support for concept drift
        if self.detect_drift:
            correctly_classifies = self.predict(X) == y
            # Check for warning
            self._drift_detector.add_element(int(not correctly_classifies))
            # Check if there was a change
            if self._drift_detector.detected_change():
                # Reset window size
                self._reset_window_size()
                if self.update_strategy == self._REPLACE_STRATEGY:
                    self._model_idx = 0

    def _adjust_window_size(self):
        if self._dynamic_window_size < self.max_window_size:
            self._dynamic_window_size *= 2
            if self._dynamic_window_size > self.max_window_size:
                self.window_size = self.max_window_size
            else:
                self.window_size = self._dynamic_window_size

    def _reset_window_size(self):
        if self.min_window_size:
            self._dynamic_window_size = self.min_window_size
        else:
            self._dynamic_window_size = self.max_window_size
        self.window_size = self._dynamic_window_size

    def _train_on_mini_batch(self, X, y):
        if self.update_strategy == self._REPLACE_STRATEGY:
            booster = self._train_booster(X, y, self._model_idx)
            # Update ensemble
            self._ensemble[self._model_idx] = booster
            self._samples_seen += X.shape[0]
            self._update_model_idx()
        else:   # self.update_strategy == self._PUSH_STRATEGY
            booster = self._train_booster(X, y, len(self._ensemble))
            # Update ensemble
            if len(self._ensemble) == self.n_estimators:
                self._ensemble.pop(0)
            self._ensemble.append(booster)
            self._samples_seen += X.shape[0]

    def _train_booster(self, X: np.ndarray, y: np.ndarray, last_model_idx: int):
        d_mini_batch_train = xgb.DMatrix(X, y.astype(int))
        # Get margins from trees in the ensemble
        margins = np.asarray([self._init_margin] * d_mini_batch_train.num_row())
        for j in range(last_model_idx):
            margins = np.add(margins,
                             self._ensemble[j].predict(d_mini_batch_train, output_margin=True))
        d_mini_batch_train.set_base_margin(margin=margins)
        booster = xgb.train(params=self._boosting_params,
                            dtrain=d_mini_batch_train,
                            num_boost_round=1,
                            verbose_eval=False)
        return booster

    def _update_model_idx(self):
        self._model_idx += 1
        if self._model_idx == self.n_estimators:
            self._model_idx = 0

    def predict(self, X):
        """
        Predict the class label for sample X
        Parameters
        ----------
        X: numpy.ndarray
            An array of shape (n_samples, n_features) with the samples to
            predict the class label for.
        Returns
        -------
        numpy.ndarray
            A 1D array of shape (, n_samples), containing the
            predicted class labels for all instances in X.
        """
        if self._ensemble:
            if self.update_strategy == self._REPLACE_STRATEGY:
                trees_in_ensemble = sum(i is not None for i in self._ensemble)
            else:   # self.update_strategy == self._PUSH_STRATEGY
                trees_in_ensemble = len(self._ensemble)
            if trees_in_ensemble > 0:
                d_test = xgb.DMatrix(X)
                for i in range(trees_in_ensemble - 1):
                    margins = self._ensemble[i].predict(d_test, output_margin=True)
                    d_test.set_base_margin(margin=margins)
                predicted = self._ensemble[trees_in_ensemble - 1].predict(d_test)
                return np.array(predicted > 0.5).astype(int)
        # Ensemble is empty, return default values (0)
        return np.zeros(get_dimensions(X)[0])

    def predict_proba(self, X):
        """
        Not implemented for this method.
        """
        raise NotImplementedError("predict_proba is not implemented for this method.")

In [5]:
# Adaptive XGBoost classifier parameters
n_estimators = 30       # Number of members in the ensemble
learning_rate = 0.3     # Learning rate or eta
max_depth = 6           # Max depth for each tree in the ensemble
max_window_size = 1000  # Max window size
min_window_size = 1     # set to activate the dynamic window strategy
detect_drift = False    # Enable/disable drift detection

AXGBr = AdaptiveXGBoostClassifier(update_strategy='replace',
                                  n_estimators=n_estimators,
                                  learning_rate=learning_rate,
                                  max_depth=max_depth,
                                  max_window_size=max_window_size,
                                  min_window_size=min_window_size,
                                  detect_drift=detect_drift)

In [6]:
true_test = []
predictions_test = []
for ts in np.arange(data["ts"].min(), data["ts"].max()):
    train_set = data[data["ts"] == ts]
    train_set_X = train_set.iloc[:,:-1]
    train_set_y = train_set["class"]      
    AXGBr.partial_fit(train_set_X.values, train_set_y.values)    
    
    test_set = data[data["ts"] == ts + 1]
    test_set_X = test_set.iloc[:,:-1].values
    test_set_y = test_set["class"].values

    y_pred = AXGBr.predict(test_set_X)
    evaluation = f1_score(test_set_y, y_pred, average='binary')
    print("Proposed TS {}: {}".format(ts+1, evaluation))
    
    if ts+1 >= 35:
        true_test.append(test_set_y)
        predictions_test.append(y_pred)

f1_score_test = f1_score(np.concatenate(true_test, axis=0),   
                         np.concatenate(predictions_test, axis=0), 
                         average='binary')
print("F1-Score on test set: {}".format(round(f1_score_test, 3)))     

recall_score_test = recall_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 average='binary')

print("Recall on test set: {}".format(round(recall_score_test, 3)))      

precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Precision on test set: {}".format(round(precision_score_test, 3)))    

confusion_matrix_test = confusion_matrix(np.concatenate(true_test, axis=0), 
                                         np.concatenate(predictions_test, axis=0))
print("Confusion_matrix: {}".format(confusion_matrix_test))    

Proposed TS 2: 0.0
Proposed TS 3: 0.3076923076923077
Proposed TS 4: 0.8679245283018869
Proposed TS 5: 0.2222222222222222
Proposed TS 6: 0.0
Proposed TS 7: 0.03809523809523809
Proposed TS 8: 0.48888888888888893
Proposed TS 9: 0.7562189054726367
Proposed TS 10: 0.30769230769230765
Proposed TS 11: 0.9000000000000001
Proposed TS 12: 0.9032258064516129
Proposed TS 13: 0.9382716049382716
Proposed TS 14: 0.8860759493670887
Proposed TS 15: 0.9791666666666666
Proposed TS 16: 0.9435483870967741
Proposed TS 17: 0.8791208791208791
Proposed TS 18: 0.8723404255319148
Proposed TS 19: 0.822695035460993
Proposed TS 20: 0.84
Proposed TS 21: 0.8361581920903954
Proposed TS 22: 0.9019607843137254
Proposed TS 23: 0.5591397849462365
Proposed TS 24: 0.33333333333333337
Proposed TS 25: 0.9292035398230089
Proposed TS 26: 0.8023255813953488
Proposed TS 27: 0.9199999999999999
Proposed TS 28: 0.9822485207100591
Proposed TS 29: 0.47240618101545256
Proposed TS 30: 0.8679245283018868
Proposed TS 31: 0.593939393939394

In [27]:
# Adaptive XGBoost classifier parameters
n_estimators = 30       # Number of members in the ensemble
learning_rate = 0.3     # Learning rate or eta
max_depth = 6           # Max depth for each tree in the ensemble
max_window_size = 1000  # Max window size
min_window_size = 1     # set to activate the dynamic window strategy
detect_drift = False    # Enable/disable drift detection

AXGBp = AdaptiveXGBoostClassifier(update_strategy='push',
                                  n_estimators=n_estimators,
                                  learning_rate=learning_rate,
                                  max_depth=max_depth,
                                  max_window_size=max_window_size,
                                  min_window_size=min_window_size,
                                  detect_drift=detect_drift)

true_test = []
predictions_test = []
for ts in np.arange(data["ts"].min(), data["ts"].max()):
    train_set = data[data["ts"] == ts]
    train_set_X = train_set.iloc[:,:-1]
    train_set_y = train_set["class"]      
    AXGBp.partial_fit(train_set_X.values, train_set_y.values)    
    
    test_set = data[data["ts"] == ts + 1]
    test_set_X = test_set.iloc[:,:-1].values
    test_set_y = test_set["class"].values

    y_pred = AXGBp.predict(test_set_X)
    evaluation = f1_score(test_set_y, y_pred, average='binary')
    print("Proposed TS {}: {}".format(ts+1, evaluation))
    
    if ts+1 >= 35:
        true_test.append(test_set_y)
        predictions_test.append(y_pred)
        

f1_score_test = f1_score(np.concatenate(true_test, axis=0),   
                         np.concatenate(predictions_test, axis=0), 
                         average='binary')
print("F1-Score on test set: {}".format(round(f1_score_test, 3)))     

recall_score_test = recall_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 average='binary')

print("Recall on test set: {}".format(round(recall_score_test, 3)))      

precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Precision on test set: {}".format(round(precision_score_test, 3)))    

confusion_matrix_test = confusion_matrix(np.concatenate(true_test, axis=0), 
                                         np.concatenate(predictions_test, axis=0))
print("Confusion_matrix: {}".format(confusion_matrix_test))    

Proposed TS 2: 0.0
Proposed TS 3: 0.3076923076923077
Proposed TS 4: 0.8679245283018869
Proposed TS 5: 0.2222222222222222
Proposed TS 6: 0.0
Proposed TS 7: 0.03809523809523809
Proposed TS 8: 0.48888888888888893
Proposed TS 9: 0.7562189054726367
Proposed TS 10: 0.30769230769230765
Proposed TS 11: 0.9000000000000001
Proposed TS 12: 0.9032258064516129
Proposed TS 13: 0.9382716049382716
Proposed TS 14: 0.8860759493670887
Proposed TS 15: 0.9791666666666666
Proposed TS 16: 0.9435483870967741
Proposed TS 17: 0.8791208791208791
Proposed TS 18: 0.8723404255319148
Proposed TS 19: 0.822695035460993
Proposed TS 20: 0.84
Proposed TS 21: 0.8361581920903954
Proposed TS 22: 0.9019607843137254
Proposed TS 23: 0.5591397849462365
Proposed TS 24: 0.3278688524590164
Proposed TS 25: 0.9090909090909091
Proposed TS 26: 0.7042253521126761
Proposed TS 27: 0.8518518518518519
Proposed TS 28: 0.9655172413793103
Proposed TS 29: 0.8655834564254062
Proposed TS 30: 0.8426966292134832
Proposed TS 31: 0.5268292682926828


In [28]:
incremental_xgb = xgb.XGBClassifier()
true_test = []
predictions_test = []
for ts in np.arange(data["ts"].min(), data["ts"].max()):
    train_set = data[data["ts"] == ts]
    train_set_X = train_set.iloc[:,:-1]
    train_set_y = train_set["class"]      
    
    if ts > 1:
        incremental_xgb.fit(train_set_X.values, train_set_y.values, xgb_model=incremental_xgb.get_booster())
    else:
        incremental_xgb.fit(train_set_X.values, train_set_y.values)    
    
    test_set = data[data["ts"] == ts + 1]
    test_set_X = test_set.iloc[:,:-1].values
    test_set_y = test_set["class"].values

    y_pred = incremental_xgb.predict(test_set_X)
    evaluation = f1_score(test_set_y, y_pred, average='binary')
    print("Proposed TS {}: {}".format(ts+1, evaluation))
    
    if ts+1 >= 35:
        true_test.append(test_set_y)
        predictions_test.append(y_pred)

f1_score_test = f1_score(np.concatenate(true_test, axis=0),   
                         np.concatenate(predictions_test, axis=0), 
                         average='binary')

accuracy_test = accuracy_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 normalize=True)

print("Accuracy Score on test set: {}".format(round(accuracy_test, 3)))      


print("F1-Score on test set: {}".format(round(f1_score_test, 3)))     

recall_score_test = recall_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 average='binary')

print("Recall on test set: {}".format(round(recall_score_test, 3)))      

precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Precision on test set: {}".format(round(precision_score_test, 3)))    

confusion_matrix_test = confusion_matrix(np.concatenate(true_test, axis=0), 
                                         np.concatenate(predictions_test, axis=0))
print("Confusion_matrix: {}".format(confusion_matrix_test))    

Proposed TS 2: 0.9411764705882353
Proposed TS 3: 0.9
Proposed TS 4: 0.9090909090909091
Proposed TS 5: 0.23529411764705882
Proposed TS 6: 0.0
Proposed TS 7: 0.7955801104972375
Proposed TS 8: 0.832116788321168
Proposed TS 9: 0.9637096774193549
Proposed TS 10: 0.8648648648648649
Proposed TS 11: 0.9453124999999999
Proposed TS 12: 0.9032258064516129
Proposed TS 13: 0.9775474956822107
Proposed TS 14: 0.9047619047619047
Proposed TS 15: 0.9795918367346939
Proposed TS 16: 0.9561752988047808
Proposed TS 17: 0.8807339449541284
Proposed TS 18: 0.8867924528301887
Proposed TS 19: 0.8456375838926175
Proposed TS 20: 0.896414342629482
Proposed TS 21: 0.8703703703703703
Proposed TS 22: 0.8513119533527698
Proposed TS 23: 0.6236559139784946
Proposed TS 24: 0.5311203319502076
Proposed TS 25: 0.9380530973451328
Proposed TS 26: 0.8627450980392156
Proposed TS 27: 0.9166666666666666
Proposed TS 28: 0.9764705882352941
Proposed TS 29: 0.858085808580858
Proposed TS 30: 0.8862275449101796
Proposed TS 31: 0.7692307

In [25]:
from sklearn.tree import DecisionTreeClassifier
from skmultiflow.meta import AccuracyWeightedEnsemble

awej485 = AccuracyWeightedEnsemble(n_estimators=30, 
                                   window_size=1000, 
                                   base_estimator=DecisionTreeClassifier())

for ts in np.arange(data["ts"].min(), data["ts"].max()):
    train_set = data[data["ts"] == ts]
    train_set_X = train_set.iloc[:,:-1]
    train_set_y = train_set["class"]      

    awej485.partial_fit(train_set_X.values, train_set_y.values)    
    
    test_set = data[data["ts"] == ts + 1]
    test_set_X = test_set.iloc[:,:-1].values
    test_set_y = test_set["class"].values

    y_pred = awej485.predict(test_set_X)
    evaluation = f1_score(test_set_y, y_pred, average='binary')
    print("Proposed TS {}: {}".format(ts+1, evaluation))
    
    if ts+1 >= 35:
        true_test.append(test_set_y)
        predictions_test.append(y_pred)

f1_score_test = f1_score(np.concatenate(true_test, axis=0),   
                         np.concatenate(predictions_test, axis=0), 
                         average='binary')
print("F1-Score on test set: {}".format(round(f1_score_test, 3)))     

recall_score_test = recall_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 average='binary')

print("Recall on test set: {}".format(round(recall_score_test, 3)))      

accuracy_test = accuracy_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 normalize=True)

print("Accuracy Score on test set: {}".format(round(accuracy_test, 3)))      


precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Precision on test set: {}".format(round(precision_score_test, 3)))    


precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Accuracy on test set: {}".format(round(precision_score_test, 3)))    


confusion_matrix_test = confusion_matrix(np.concatenate(true_test, axis=0), 
                                         np.concatenate(predictions_test, axis=0))
print("Confusion_matrix: {}".format(confusion_matrix_test))    

Proposed TS 2: 0.0
Proposed TS 3: 0.0
Proposed TS 4: 0.0
Proposed TS 5: 0.0
Proposed TS 6: 0.020408163265306124
Proposed TS 7: 0.15632183908045977
Proposed TS 8: 0.0
Proposed TS 9: 0.0
Proposed TS 10: 0.0
Proposed TS 11: 0.0
Proposed TS 12: 0.0
Proposed TS 13: 0.0
Proposed TS 14: 0.0
Proposed TS 15: 0.0
Proposed TS 16: 0.0
Proposed TS 17: 0.0
Proposed TS 18: 0.0
Proposed TS 19: 0.0
Proposed TS 20: 0.0
Proposed TS 21: 0.0
Proposed TS 22: 0.0
Proposed TS 23: 0.0
Proposed TS 24: 0.0
Proposed TS 25: 0.0
Proposed TS 26: 0.0
Proposed TS 27: 0.0
Proposed TS 28: 0.0
Proposed TS 29: 0.0
Proposed TS 30: 0.0
Proposed TS 31: 0.0
Proposed TS 32: 0.0
Proposed TS 33: 0.0
Proposed TS 34: 0.0
Proposed TS 35: 0.0
Proposed TS 36: 0.0
Proposed TS 37: 0.0
Proposed TS 38: 0.0
Proposed TS 39: 0.0
Proposed TS 40: 0.0
Proposed TS 41: 0.0
Proposed TS 42: 0.0
Proposed TS 43: 0.0
Proposed TS 44: 0.0
Proposed TS 45: 0.0
Proposed TS 46: 0.0
Proposed TS 47: 0.0
Proposed TS 48: 0.0
Proposed TS 49: 0.0
F1-Score on tes

In [32]:
from skmultiflow.meta import AdaptiveRandomForest

arf = AdaptiveRandomForest(performance_metric="kappa")


for ts in np.arange(data["ts"].min(), data["ts"].max()):
    train_set = data[data["ts"] == ts]
    train_set_X = train_set.iloc[:,:-1]
    train_set_y = train_set["class"]      

    arf.partial_fit(train_set_X.values, train_set_y.values)    
    
    test_set = data[data["ts"] == ts + 1]
    test_set_X = test_set.iloc[:,:-1].values
    test_set_y = test_set["class"].values

    y_pred = arf.predict(test_set_X)
    evaluation = f1_score(test_set_y, y_pred, average='binary')
    print("Proposed TS {}: {}".format(ts+1, evaluation))
    
    if ts+1 >= 35:
        true_test.append(test_set_y)
        predictions_test.append(y_pred)
        
f1_score_test = f1_score(np.concatenate(true_test, axis=0),   
                         np.concatenate(predictions_test, axis=0), 
                         average='binary')
print("F1-Score on test set: {}".format(round(f1_score_test, 3)))     

recall_score_test = recall_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 average='binary')

print("Recall on test set: {}".format(round(recall_score_test, 3)))      

accuracy_test = accuracy_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 normalize=True)

print("Accuracy Score on test set: {}".format(round(accuracy_test, 3)))      


precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Precision on test set: {}".format(round(precision_score_test, 3)))    


precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Accuracy on test set: {}".format(round(precision_score_test, 3)))    


confusion_matrix_test = confusion_matrix(np.concatenate(true_test, axis=0), 
                                         np.concatenate(predictions_test, axis=0))
print("Confusion_matrix: {}".format(confusion_matrix_test))

Proposed TS 2: 0.1739130434782609
Proposed TS 3: 0.16666666666666669
Proposed TS 4: 0.18181818181818182
Proposed TS 5: 0.0
Proposed TS 6: 0.0
Proposed TS 7: 0.01941747572815534
Proposed TS 8: 0.66
Proposed TS 9: 0.9075268817204301
Proposed TS 10: 0.7058823529411765
Proposed TS 11: 0.8851063829787233
Proposed TS 12: 0.896551724137931
Proposed TS 13: 0.9516994633273702
Proposed TS 14: 0.875
Proposed TS 15: 0.9757785467128027
Proposed TS 16: 0.959349593495935
Proposed TS 17: 0.9417989417989419
Proposed TS 18: 0.9052631578947368
Proposed TS 19: 0.8904109589041096
Proposed TS 20: 0.8842105263157894
Proposed TS 21: 0.8272251308900523
Proposed TS 22: 0.9266666666666666
Proposed TS 23: 0.6506024096385542
Proposed TS 24: 0.34523809523809523
Proposed TS 25: 0.9174311926605504
Proposed TS 26: 0.8143712574850299
Proposed TS 27: 0.9787234042553191
Proposed TS 28: 0.9820359281437125
Proposed TS 29: 0.9064039408866995
Proposed TS 30: 0.9056603773584905
Proposed TS 31: 0.6424242424242425
Proposed TS 3