# xStream (row streaming) vs static results

In [48]:
import gzip
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import average_precision_score, roc_auc_score

from Chains import Chains

### Load datasets

In [54]:
data = gzip.open("data/telescope.gz", "r")

X, y = [], []

for i in data:
  i = (i.decode('utf-8')).split(",")
  i = [float(x) for x in i]
  X.append(np.array(i[:-1]))
  #X.append(i[:-1])
  y.append(i[-1])

X = np.array(X)
y = np.array(y)

In [55]:
#Replicate Static results shown in paper (Not row_streaming)

k = 100 
n_chains = 100 
depth = 15

aps = []
aucs = []

for i in range(2):
    X_shuffled, y_shuffled = shuffle(X, y, random_state=i)
    
    model = Chains(k=k, nchains=n_chains, depth=depth)

    model.fit(X_shuffled) 
    anomalyscores = -model.score(X_shuffled)  # unsupervised, as such, training does not make ground truth labels available to the models but only used for evaluation.
    ap = average_precision_score(y_shuffled, anomalyscores)
    auc = roc_auc_score(y_shuffled, anomalyscores)
    print("xstream: AP =", ap, "AUC =", auc)
    aps.append(ap)
    aucs.append(auc)

print('Average ap over 10 runs: ', sum(aps)/len(aps))
print('Average auc over 10 runs: ', sum(aucs)/len(aucs))

Fitting...: 100%|██████████| 100/100 [04:14<00:00,  2.55s/it]
Scoring...: 100%|██████████| 100/100 [03:44<00:00,  2.25s/it]


xstream: AP = 0.3590784683191133 AUC = 0.7636916498432945


Fitting...: 100%|██████████| 100/100 [04:14<00:00,  2.54s/it]
Scoring...: 100%|██████████| 100/100 [03:41<00:00,  2.22s/it]


xstream: AP = 0.36338391526192265 AUC = 0.7514101191944018


Fitting...: 100%|██████████| 100/100 [04:13<00:00,  2.54s/it]
Scoring...: 100%|██████████| 100/100 [03:43<00:00,  2.24s/it]


xstream: AP = 0.3460506906656493 AUC = 0.7395799972236746


Fitting...: 100%|██████████| 100/100 [04:12<00:00,  2.53s/it]
Scoring...: 100%|██████████| 100/100 [03:41<00:00,  2.22s/it]


xstream: AP = 0.36258231807558583 AUC = 0.7620152387520451


Fitting...: 100%|██████████| 100/100 [04:13<00:00,  2.54s/it]
Scoring...:  86%|████████▌ | 86/100 [03:13<00:31,  2.24s/it]


KeyboardInterrupt: 

In [56]:
print('Average ap over 10 runs: ', sum(aps)/len(aps))
print('Average auc over 10 runs: ', sum(aucs)/len(aucs))

Average ap over 10 runs:  0.3577738480805678
Average auc over 10 runs:  0.754174251253354
