# Demo - xStream for row-streaming datasets
This is a demo notebook for our row-streaming implementation in Python of [xStream](https://github.com/arielramos97/xStream). It will show you how to run the algorithm on the spam-sms dataset.

# Set up environment

In [1]:
from models.XStream_River import xStream
import tqdm
from sklearn.metrics import average_precision_score, roc_auc_score
from sklearn.datasets import load_svmlight_file

# Load the data

In [3]:
data = load_svmlight_file("data/Row-streaming/spam-sms")
X = data[0]
y = data[1]
X= X.todense()

# Run the algorithm

In [5]:
window_size = int(0.05*len(y))
k = 10 #change to 100
n_chains = 10 #change to 100
depth = 15

cf = xStream(num_components=k, n_chains=n_chains, depth=depth, window_size=window_size) 

all_scores = []

for i, sample in enumerate(tqdm.tqdm(X)):
  cf.learn_one(sample.A1)
  if i>=window_size:
    anomalyscore = -cf.predict_one(sample.A1)
    all_scores.append(anomalyscore[0])

  4%|▎         | 205/5574 [00:10<04:45, 18.78it/s]


KeyboardInterrupt: 

In [8]:
y_adjusted = y[window_size:window_size+len(all_scores)]

In [9]:
# Computation for Mean Average Precision

chunks = [all_scores[x:x+window_size] for x in range(0, len(all_scores), window_size)]
y_chunks = [y_adjusted[x:x+window_size] for x in range(0, len(y_adjusted), window_size)]

AP_window = []

for i in range(len(y_chunks)-1):
  score = average_precision_score(y_chunks[i], chunks[i])
  AP_window.append(score)

# Print results

In [10]:
OAP = average_precision_score(y_adjusted, all_scores) 
MAP = sum(AP_window)/len(AP_window)
AUC = roc_auc_score(y_adjusted, all_scores)

print("XStream: OAP =", OAP,"\n\t",
      "MAP =", MAP, "\n\t", 
      "AUC =", AUC)

XStream: OAP = 0.3730751125550796 
	 MAP = 0.404928224231042 
	 AUC = 0.855161384824829
