In [2]:
import numpy as np
import xgboost as xgb
import cryptoaml.datareader as cdr
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix

from skmultiflow.drift_detection import DDM
from imblearn.over_sampling import SMOTE 
from imblearn.under_sampling import NeighbourhoodCleaningRule

In [5]:
elliptic = cdr.get_data("eth_accounts")


# elliptic.dataset_.shape

data = elliptic.train_test_split(train_size=0.7)


display(data["ALL"].train_X.shape)
display(data["ALL"].test_X.shape)

# train_data = data.train_X

# display(train_data)
# train_data["class"] = data.train_y
# test_data = data.test_X
# test_data["class"] = data.test_y 
# # data = train_data.append(test_data, ignore_index=True)

(3276, 42)

(1405, 42)

In [4]:
import numpy as np

import xgboost as xgb

from skmultiflow.core.base import BaseSKMObject, ClassifierMixin
from skmultiflow.drift_detection import ADWIN
from skmultiflow.utils import get_dimensions


class AdaptiveXGBoostClassifier(BaseSKMObject, ClassifierMixin):
    _PUSH_STRATEGY = 'push'
    _REPLACE_STRATEGY = 'replace'
    _UPDATE_STRATEGIES = [_PUSH_STRATEGY, _REPLACE_STRATEGY]

    def __init__(self,
                 n_estimators=30,
                 learning_rate=0.3,
                 max_depth=6,
                 max_window_size=1000,
                 min_window_size=None,
                 detect_drift=False,
                 update_strategy='replace'):
        """
        Adaptive XGBoost classifier.

        Parameters
        ----------
        n_estimators: int (default=5)
            The number of estimators in the ensemble.

        learning_rate:
            Learning rate, a.k.a eta.

        max_depth: int (default = 6)
            Max tree depth.

        max_window_size: int (default=1000)
            Max window size.

        min_window_size: int (default=None)
            Min window size. If this parameters is not set, then a fixed size
            window of size ``max_window_size`` will be used.

        detect_drift: bool (default=False)
            If set will use a drift detector (ADWIN).

        update_strategy: str (default='replace')
            | The update strategy to use:
            | 'push' - the ensemble resembles a queue
            | 'replace' - oldest ensemble members are replaced by newer ones

        Notes
        -----
        The Adaptive XGBoost [1]_ (AXGB) classifier is an adaptation of the
        XGBoost algorithm for evolving data streams. AXGB creates new members
        of the ensemble from mini-batches of data as new data becomes
        available.  The maximum ensemble  size is fixed, but learning does not
        stop once this size is reached, the ensemble is updated on new data to
        ensure consistency with the current data distribution.

        References
        ----------
        .. [1] Montiel, Jacob, Mitchell, Rory, Frank, Eibe, Pfahringer,
           Bernhard, Abdessalem, Talel, and Bifet, Albert. “AdaptiveXGBoost for
           Evolving Data Streams”. In:IJCNN’20. International Joint Conference
           on Neural Networks. 2020. Forthcoming.
        """
        super().__init__()
        self.learning_rate = learning_rate
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_window_size = max_window_size
        self.min_window_size = min_window_size
        self._first_run = True
        self._ensemble = None
        self.detect_drift = detect_drift
        self._drift_detector = None
        self._X_buffer = np.array([])
        self._y_buffer = np.array([])
        self._samples_seen = 0
        self._model_idx = 0
        if update_strategy not in self._UPDATE_STRATEGIES:
            raise AttributeError("Invalid update_strategy: {}\n"
                                 "Valid options: {}".format(update_strategy,
                                                            self._UPDATE_STRATEGIES))
        self.update_strategy = update_strategy
        self._configure()

    def _configure(self):
        if self.update_strategy == self._PUSH_STRATEGY:
            self._ensemble = []
        elif self.update_strategy == self._REPLACE_STRATEGY:
            self._ensemble = [None] * self.n_estimators
        self._reset_window_size()
        self._init_margin = 0.0
        self._boosting_params = {"silent": True,
                                 "objective": "binary:logistic",
                                 "eta": self.learning_rate,
                                 "max_depth": self.max_depth}
        if self.detect_drift:
            self._drift_detector = ADWIN()

    def reset(self):
        """
        Reset the estimator.
        """
        self._first_run = True
        self._configure()

    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """
        Partially (incrementally) fit the model.

        Parameters
        ----------
        X: numpy.ndarray
            An array of shape (n_samples, n_features) with the data upon which
            the algorithm will create its model.

        y: Array-like
            An array of shape (, n_samples) containing the classification
            targets for all samples in X. Only binary data is supported.

        classes: Not used.

        sample_weight: Not used.

        Returns
        -------
        AdaptiveXGBoostClassifier
            self
        """
        for i in range(X.shape[0]):
            self._partial_fit(np.array([X[i, :]]), np.array([y[i]]))
        return self

    def _partial_fit(self, X, y):
        if self._first_run:
            self._X_buffer = np.array([]).reshape(0, get_dimensions(X)[1])
            self._y_buffer = np.array([])
            self._first_run = False
        self._X_buffer = np.concatenate((self._X_buffer, X))
        self._y_buffer = np.concatenate((self._y_buffer, y))
        while self._X_buffer.shape[0] >= self.window_size:
            self._train_on_mini_batch(X=self._X_buffer[0:self.window_size, :],
                                      y=self._y_buffer[0:self.window_size])
            delete_idx = [i for i in range(self.window_size)]
            self._X_buffer = np.delete(self._X_buffer, delete_idx, axis=0)
            self._y_buffer = np.delete(self._y_buffer, delete_idx, axis=0)

            # Check window size and adjust it if necessary
            self._adjust_window_size()

        # Support for concept drift
        if self.detect_drift:
            correctly_classifies = self.predict(X) == y
            # Check for warning
            self._drift_detector.add_element(int(not correctly_classifies))
            # Check if there was a change
            if self._drift_detector.detected_change():
                # Reset window size
                self._reset_window_size()
                if self.update_strategy == self._REPLACE_STRATEGY:
                    self._model_idx = 0

    def _adjust_window_size(self):
        if self._dynamic_window_size < self.max_window_size:
            self._dynamic_window_size *= 2
            if self._dynamic_window_size > self.max_window_size:
                self.window_size = self.max_window_size
            else:
                self.window_size = self._dynamic_window_size

    def _reset_window_size(self):
        if self.min_window_size:
            self._dynamic_window_size = self.min_window_size
        else:
            self._dynamic_window_size = self.max_window_size
        self.window_size = self._dynamic_window_size

    def _train_on_mini_batch(self, X, y):
        if self.update_strategy == self._REPLACE_STRATEGY:
            booster = self._train_booster(X, y, self._model_idx)
            # Update ensemble
            self._ensemble[self._model_idx] = booster
            self._samples_seen += X.shape[0]
            self._update_model_idx()
        else:   # self.update_strategy == self._PUSH_STRATEGY
            booster = self._train_booster(X, y, len(self._ensemble))
            # Update ensemble
            if len(self._ensemble) == self.n_estimators:
                self._ensemble.pop(0)
            self._ensemble.append(booster)
            self._samples_seen += X.shape[0]

    def _train_booster(self, X: np.ndarray, y: np.ndarray, last_model_idx: int):
        d_mini_batch_train = xgb.DMatrix(X, y.astype(int))
        # Get margins from trees in the ensemble
        margins = np.asarray([self._init_margin] * d_mini_batch_train.num_row())
        for j in range(last_model_idx):
            margins = np.add(margins,
                             self._ensemble[j].predict(d_mini_batch_train, output_margin=True))
        d_mini_batch_train.set_base_margin(margin=margins)
        booster = xgb.train(params=self._boosting_params,
                            dtrain=d_mini_batch_train,
                            num_boost_round=1,
                            verbose_eval=False)
        return booster

    def _update_model_idx(self):
        self._model_idx += 1
        if self._model_idx == self.n_estimators:
            self._model_idx = 0

    def predict(self, X):
        """
        Predict the class label for sample X

        Parameters
        ----------
        X: numpy.ndarray
            An array of shape (n_samples, n_features) with the samples to
            predict the class label for.

        Returns
        -------
        numpy.ndarray
            A 1D array of shape (, n_samples), containing the
            predicted class labels for all instances in X.

        """
        if self._ensemble:
            if self.update_strategy == self._REPLACE_STRATEGY:
                trees_in_ensemble = sum(i is not None for i in self._ensemble)
            else:   # self.update_strategy == self._PUSH_STRATEGY
                trees_in_ensemble = len(self._ensemble)
            if trees_in_ensemble > 0:
                d_test = xgb.DMatrix(X)
                for i in range(trees_in_ensemble - 1):
                    margins = self._ensemble[i].predict(d_test, output_margin=True)
                    d_test.set_base_margin(margin=margins)
                predicted = self._ensemble[trees_in_ensemble - 1].predict(d_test)
                return np.array(predicted > 0.5).astype(int)
        # Ensemble is empty, return default values (0)
        return np.zeros(get_dimensions(X)[0])

    def predict_proba(self, X):
        """
        Not implemented for this method.
        """
        raise NotImplementedError("predict_proba is not implemented for this method.")

In [23]:
from skmultiflow.data import ConceptDriftStream
from skmultiflow.evaluation import EvaluatePrequential


# Adaptive XGBoost classifier parameters
n_estimators = 30       # Number of members in the ensemble
learning_rate = 0.3     # Learning rate or eta
max_depth = 6           # Max depth for each tree in the ensemble
max_window_size = 1000  # Max window size
min_window_size = 1     # set to activate the dynamic window strategy
detect_drift = False    # Enable/disable drift detection

AXGBp = AdaptiveXGBoostClassifier(update_strategy='push',
                                  n_estimators=n_estimators,
                                  learning_rate=learning_rate,
                                  max_depth=max_depth,
                                  max_window_size=max_window_size,
                                  min_window_size=min_window_size,
                                  detect_drift=detect_drift)

AXGBr = AdaptiveXGBoostClassifier(update_strategy='replace',
                                  n_estimators=n_estimators,
                                  learning_rate=learning_rate,
                                  max_depth=max_depth,
                                  max_window_size=max_window_size,
                                  min_window_size=min_window_size,
                                  detect_drift=detect_drift)
print(data.shape)

(46564, 167)


In [6]:
# display(train_data)
# display(test_data)

model = xgb.XGBClassifier()
model.fit(train_data.iloc[:,:-1], train_data["class"])

y_pred = model.predict(test_data.iloc[:,:-1])
ts_f1 = round(f1_score(test_data["class"], y_pred, average='binary'), 3)
print(ts_f1)

true_test = []
f1_scores = []
predictions_test = []
timestep_range = np.arange(35, 50)
for timestep in timestep_range:
    test = test_data[test_data["ts"] == timestep]
    test_X = test.iloc[:,:-1]
    test_y = test["class"]
    
    y_pred = model.predict(test_X)
    ts_f1 = round(f1_score(test_y, y_pred, average='binary'), 3)
    print("Timestep: {} | F1-Score: {}".format(timestep, ts_f1))
    f1_scores.append(ts_f1)
    true_test.append(test_y)
    predictions_test.append(y_pred)
        
f1_score_test = f1_score(np.concatenate(true_test, axis=0),   
                         np.concatenate(predictions_test, axis=0), 
                         average='binary')
print("F1-Score on test set: {}".format(round(f1_score_test, 3)))     

recall_score_test = recall_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 average='binary')
print("Recall on test set: {}".format(round(recall_score_test, 3)))      

precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Precision on test set: {}".format(round(precision_score_test, 3)))    

confusion_matrix_test = confusion_matrix(np.concatenate(true_test, axis=0), 
                                         np.concatenate(predictions_test, axis=0))
print("Confusion_matrix: {}".format(confusion_matrix_test))    

0.803
Timestep: 35 | F1-Score: 0.964
Timestep: 36 | F1-Score: 0.892
Timestep: 37 | F1-Score: 0.769
Timestep: 38 | F1-Score: 0.935
Timestep: 39 | F1-Score: 0.949
Timestep: 40 | F1-Score: 0.772
Timestep: 41 | F1-Score: 0.947
Timestep: 42 | F1-Score: 0.862
Timestep: 43 | F1-Score: 0.0
Timestep: 44 | F1-Score: 0.048
Timestep: 45 | F1-Score: 0.0
Timestep: 46 | F1-Score: 0.143
Timestep: 47 | F1-Score: 0.0
Timestep: 48 | F1-Score: 0.053
Timestep: 49 | F1-Score: 0.032
F1-Score on test set: 0.803
Recall on test set: 0.723
Precision on test set: 0.902
Confusion_matrix: [[15502    85]
 [  300   783]]


In [22]:
# display(train_data)
# display(test_data)

# ncr_X, ncr_y = ncr.fit_resample(train_data.iloc[:,:-1], train_data["class"])
# sm = SMOTE()
# X, y = sm.fit_resample(ncr_X, ncr_y)

# ncr = NeighbourhoodCleaningRule(n_neighbors=3, threshold_cleaning=0.5)
# ncr_X, ncr_y = ncr.fit_resample(train_data.iloc[:,:-1], train_data["class"])

model = xgb.XGBClassifier()
model.fit(train_data.iloc[:,:-1], train_data["class"])

# model.fit(train_data.iloc[:,:-1], train_data["class"])

max_timestep = test_data["ts"].max()
true_test = []
predictions_test = []
timestep_range = np.arange(35, max_timestep)

ensembles = []

for timestep in timestep_range:

    train = test_data[test_data["ts"] == timestep]
    train_X = train.iloc[:,:-1]
    train_y = train["class"]
    
    if timestep == 35:
        y_pred = model.predict(train_X)
        ts_f1 = round(f1_score(train_y, y_pred, average='binary'), 3)     
        print("Timestep: {} | F1-Score: {}".format(timestep, ts_f1))
        true_test.append(train_y)
        predictions_test.append(y_pred)
     
    #ncr_X, ncr_y = ncr.fit_resample(train_X, train_y)
    booster = model.get_booster()
    model.fit(train_X, train_y, xgb_model=booster)
       
    test = test_data[test_data["ts"] == timestep + 1]
    test_X = test.iloc[:,:-1]
    test_y = test["class"]
    
    y_pred = model.predict(test_X)
    ts_f1 = round(f1_score(test_y, y_pred, average='binary'), 3)
    print("Timestep: {} | F1-Score: {}".format(timestep + 1, ts_f1))
    true_test.append(test_y)
    predictions_test.append(y_pred)
        
    tmp_model = xgb.XGBClassifier()
    tmp_model.fit(train_X, train_y)
    ensembles.append(tmp_model) 
    
    print("--------------------")
    for i in range(len(ensembles) - 1):
        tmp = ensembles[i]
        tmp_booster = tmp.get_booster()
        tmp.fit(train_X, train_y, xgb_model=tmp_booster)
        ensembles[i] = tmp 
        y_pred = tmp.predict(test_X)
        f1_tmp = round(f1_score(test_y, y_pred, average='binary'), 3)
        print(f1_tmp)
    print("--------------------")

    
#     y_pred = tmp_model.predict(test_X)
#     ts_f1_tmp = round(f1_score(test_y, y_pred, average='binary'), 3)
    
#     if ts_f1_tmp > ts_f1:
#         print("TMP_Timestep: {} | F1-Score: {}".format(timestep + 1, ts_f1_tmp))

    
    
f1_score_test = f1_score(np.concatenate(true_test, axis=0),   
                         np.concatenate(predictions_test, axis=0), 
                         average='binary')
print("F1-Score on test set: {}".format(round(f1_score_test, 3)))     

recall_score_test = recall_score(np.concatenate(true_test, axis=0),   
                                 np.concatenate(predictions_test, axis=0), 
                                 average='binary')
print("Recall on test set: {}".format(round(recall_score_test, 3)))      

precision_score_test = precision_score(np.concatenate(true_test, axis=0),   
                                       np.concatenate(predictions_test, axis=0), 
                                       average='binary')
print("Precision on test set: {}".format(round(precision_score_test, 3)))    

confusion_matrix_test = confusion_matrix(np.concatenate(true_test, axis=0), 
                                         np.concatenate(predictions_test, axis=0))
print("Confusion_matrix: {}".format(confusion_matrix_test))    

Timestep: 35 | F1-Score: 0.964
Timestep: 36 | F1-Score: 1.0
--------------------
--------------------
Timestep: 37 | F1-Score: 0.824
--------------------
0.75
--------------------
Timestep: 38 | F1-Score: 0.894
--------------------
0.902
0.821
--------------------
Timestep: 39 | F1-Score: 0.91
--------------------
0.908
0.893
0.93
--------------------
Timestep: 40 | F1-Score: 0.718
--------------------
0.712
0.712
0.711
0.753
--------------------
Timestep: 41 | F1-Score: 0.917
--------------------
0.921
0.943
0.918
0.934
0.946
--------------------
Timestep: 42 | F1-Score: 0.844
--------------------
0.855
0.85
0.85
0.862
0.849
0.874
--------------------
Timestep: 43 | F1-Score: 0.174
--------------------
0.31
0.292
0.14
0.178
0.208
0.186
0.054
--------------------
Timestep: 44 | F1-Score: 0.185
--------------------
0.348
0.4
0.294
0.316
0.125
0.207
0.148
0.286
--------------------
Timestep: 45 | F1-Score: 0.0
--------------------
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
--------------------
