In [1]:
import numpy as np
import xgboost as xgb
import cryptoaml.datareader as cdr
from sklearn.metrics import f1_score

In [3]:
elliptic = cdr.get_data("elliptic")
data = elliptic.train_test_split(train_size=0.7, 
                                 feat_set="AF", 
                                 inc_meta=False,
                                 inc_unknown=False)

train_data = data.train_X
train_data["class"] = data.train_y
test_data = data.test_X
test_data["class"] = data.test_y 

In [28]:
class AdaptiveStackedBoostClassifier():
    
    def __init__(self,
                 n_estimators=20,
                 window_size=1000,
                 verbose=True):
        self._n_estimators = n_estimators
        self._window_size = window_size
        self._ensemble = []
        self._first_run = True
        self._first_run_meta = True
        self._X_buffer = np.array([])
        self._y_buffer = np.array([])
        self._ensemble_size = 0
        self._meta_learner = xgb.XGBClassifier()
        self._verbose = verbose
        
    def partial_fit(self, X, y):
        if self._first_run:
            self._X_buffer = np.array([]).reshape(0, X.shape[1])
            self._y_buffer = np.array([])
            self._ensemble = [None] * self._n_estimators
            self._first_run = False
                           
        self._X_buffer = np.concatenate((self._X_buffer, X))
        self._y_buffer = np.concatenate((self._y_buffer, y))
        while self._X_buffer.shape[0] >= self._window_size:
            self._train_on_mini_batch(X=self._X_buffer[0:self._window_size, :],
                                      y=self._y_buffer[0:self._window_size])
            delete_idx = [i for i in range(self._window_size)]
            self._X_buffer = np.delete(self._X_buffer, delete_idx, axis=0)
            self._y_buffer = np.delete(self._y_buffer, delete_idx, axis=0)

    def _train_on_mini_batch(self, X, y):
        
        # split dataset to train meta classifier 
        n_instances = X.shape[0]
        n_instances_first = int(n_instances * 0.7)
        
        # split mini batch to train ensemble and meta learner 
        X_first = X[0: n_instances_first, :]
        y_first = y[0: n_instances_first]
        X_second = X[n_instances_first:n_instances, :]
        y_second = y[n_instances_first:n_instances]
                        
        meta_model_X = []
        if self._ensemble_size > 0:
            for i in range(self._ensemble_size):
                
                # partially fit old model
                tmp_model = self._ensemble[i]
                if tmp_model == None:
                    continue 
                
                tmp_model_booster = tmp_model.get_booster()
                tmp_model.fit(X_first, y_first, xgb_model=tmp_model_booster)
               
                # extract X training set for meta model 
                y_pred_prob_tmp_model = tmp_model.predict_proba(X_second) 
                if i == 0:
                    meta_model_X = y_pred_prob_tmp_model
                else:   
                    meta_model_X = np.hstack((meta_model_X, y_pred_prob_tmp_model)) 
                
                # continue fitting old model 
                tmp_model = self._ensemble[i]
                tmp_model_booster = tmp_model.get_booster()
                tmp_model.fit(X_second, y_second, xgb_model=tmp_model_booster)
                
                # old model is fully trained on the current mini batch
                self._ensemble[i] = tmp_model
                               
        # partially fit a new model
        new_instance = xgb.XGBClassifier()
        new_instance.fit(X_first, y_first)
        y_pred_prob = new_instance.predict_proba(X_second)
        if self._ensemble_size == 0:
            meta_model_X = y_pred_prob
        else: 
            meta_model_X = np.hstack((meta_model_X, y_pred_prob))      
        
#        print(self._ensemble_size % self._n_estimators)
        
        # continue fitting new model and add to ensemble 
        new_instance_booster = new_instance.get_booster()
        new_instance.fit(X_second, y_second, xgb_model=new_instance_booster)
        self._ensemble[self._ensemble_size % self._n_estimators] = new_instance
                   
        # prefil features for upcoming models as zero since ensemble is < n_estimators 
        n_current_meta_model_X = meta_model_X.shape[1]  
        n_meta_model_X = self._n_estimators * 2
        if n_current_meta_model_X < n_meta_model_X:
            n_diff = n_meta_model_X - n_current_meta_model_X
            fillers = np.zeros((meta_model_X.shape[0], n_diff))
            meta_model_X = np.hstack((meta_model_X, fillers)) 
        
        print("STACKED FEATURES")
        print(meta_model_X)
        
        # fit meta learner 
        if self._first_run_meta:
            self._meta_learner.fit(meta_model_X, y_second)
            self._first_run_meta = False
        else: 
            tmp_meta_learner_booster = self._meta_learner.get_booster()
            self._meta_learner.fit(meta_model_X, y_second, xgb_model=tmp_meta_learner_booster)       
        
        print("FEATURE IMPORTANCE")
        print(self._meta_learner.feature_importances_)
        
        self._ensemble_size += 1
         

    def predict(self, X):
      
        # only one model in ensemble use its predictions 
        if self._ensemble_size == 1:
            return self._ensemble[0].predict(X)
        
        # predict via meta learner 
        meta_model_X = []
        print(len(self._ensemble))
        
        # construct stracked features
        for i in range(self._ensemble_size):
            y_pred_prob_tmp_model = self._ensemble[i].predict_proba(X) 
            if i == 0:
                meta_model_X = y_pred_prob_tmp_model
                continue
            np.hstack((meta_model_X, y_pred_prob_tmp_model)) 
        
        # add fillers if needed 
        n_current_meta_model_X = meta_model_X.shape[1]  
        n_meta_model_X = self._n_estimators * 2
        if n_current_meta_model_X < n_meta_model_X:
            n_diff = n_meta_model_X - n_current_meta_model_X
            fillers = np.zeros((meta_model_X.shape[0], n_diff))
            meta_model_X = np.hstack((meta_model_X, fillers)) 
        
        return self._meta_learner.predict(meta_model_X)
        
train_data_tmp = train_data[train_data["ts"] <= 10]
adpBoost = AdaptiveStackedBoostClassifier()
adpBoost.partial_fit(train_data_tmp.iloc[:,:-1].values, train_data_tmp["class"].values)

test_data_tmp = train_data[train_data["ts"] == 11]
y_pred = adpBoost.predict(test_data_tmp.iloc[:,:-1].values)

# print(y_pred)
ts_f1 = round(f1_score(test_data_tmp["class"].values, y_pred, average='binary'), 3)
print(ts_f1)


STACKED FEATURES
[[9.99406934e-01 5.93082339e-04 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.98221993e-01 1.77798513e-03 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.99366343e-01 6.33668737e-04 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [9.99483407e-01 5.16601838e-04 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.99446630e-01 5.53357473e-04 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.93553638e-01 6.44634338e-03 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]]
FEATURE IMPORTANCE
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
STACKED FEATURES
[[9.99267459e-01 7.32566114e-04 9.99337971e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.99131382e-01 8.68645380e-04 9.98934448e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.94216204e-01 5.7837711

STACKED FEATURES
[[9.99977350e-01 2.26772609e-05 9.99926686e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.99992371e-01 7.60321291e-06 9.99996603e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.81991291e-01 1.80087201e-02 9.94732320e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [9.95308340e-01 4.69167205e-03 7.66673684e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.98871744e-01 1.12826598e-03 9.99647081e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [9.99986827e-01 1.31781590e-05 9.99999642e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]]
FEATURE IMPORTANCE
[0.07089154 0.         0.22327468 0.         0.05503808 0.
 0.04741685 0.         0.06631372 0.         0.07175239 0.
 0.02564141 0.         0.3397493  0.         0.099922   0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.       

In [None]:
# timestep_range = np.arange(0, 35)
# for timestep in timestep_range:
# #     print(timestep)