In [3]:
import pandas as pd, numpy as np, json, time
from sklearn.neighbors import KDTree

import sys
sys.path.insert(0,'../../../Architecture/')

from mtree import MTree

In [4]:
%%time
df = pd.read_csv('extended_bgp_testbed_5.csv.gz')
df = df.drop(['Unnamed: 0'], axis=1)
df = df.dropna()
df = df.astype('float64')
df.shape

CPU times: user 5.64 s, sys: 242 ms, total: 5.88 s
Wall time: 5.91 s


(185984, 82)

In [5]:
samples_to_use_list = list(range(1000, 10000, 1000))
samples_to_use_list = samples_to_use_list + list(range(10000, 110000, 10000))

In [6]:
def exactSTORM(data, window=20, R=0.5, k=5):

	data = tuple(map(tuple, data.values))

	output = np.zeros(len(data))

	mtree = MTree()
	structure = {}
	indexes = {}

	for now, instance in enumerate(data):

		structure[now] = {'count_after':0,
		                  'nn_before':[],
		                  'data': instance}

		indexes[instance] = now

		neighbors_list = mtree.get_nearest(instance, R, k)

		for neighbor in neighbors_list:
			neighbor_index = indexes[neighbor[0]]
			structure[neighbor_index]['count_after'] += 1
			structure[now]['nn_before'].append(neighbor_index)
		    
		mtree.add(instance)
		    
		if now-window in structure:
			departing_instance = structure[now-window]['data']
			del indexes[departing_instance]
			del structure[now-window]
			mtree.remove(departing_instance)
		    
		for query_index in structure:
			prec_neighbors = structure[query_index]['nn_before']
			succ_neighbors = structure[query_index]['count_after']
			if len(prec_neighbors) + succ_neighbors < k:
				output[query_index] = 1
	return output

In [7]:
results = {}
results['algorithm'] = 'exactSTORM'
results['times'] = []

for sample_to_use in samples_to_use_list:
            
    print('exactSTORM: {}'.format(sample_to_use))
    results_samples = []
    dfNormalized = df[:sample_to_use]
        
    num_runs = 10        
        
    for measurement in range(num_runs):
        sampleSkip = 30
        bufferDF = dfNormalized[0:sampleSkip]
        testDF = dfNormalized[sampleSkip:]

        start = time.time()
        
        exactSTORM(pd.DataFrame(testDF.values))
        
        end = time.time()
        time_interval = end-start
        results_samples.append(time_interval)
        print('{} - {}'.format(sample_to_use, time_interval))
    
    results['times'].append(results_samples)
    with open('Results/exactSTORM_extended.json', 'w') as f:
        json.dump(results, f, indent=2)    

exactSTORM: 1000
1000 - 0.4636881351470947
1000 - 0.3921520709991455
1000 - 0.322129487991333
1000 - 0.3228771686553955
1000 - 0.3222830295562744
1000 - 0.32338428497314453
1000 - 0.3300611972808838
1000 - 0.3308877944946289
1000 - 0.3253049850463867
1000 - 0.3283684253692627
exactSTORM: 2000
2000 - 0.6564514636993408
2000 - 0.6607987880706787
2000 - 0.6623406410217285
2000 - 0.655674934387207
2000 - 0.6542203426361084
2000 - 0.6436371803283691
2000 - 0.6536562442779541
2000 - 0.6663439273834229
2000 - 0.6674244403839111
2000 - 0.643132209777832
exactSTORM: 3000
3000 - 1.0044710636138916
3000 - 1.0071415901184082
3000 - 1.0050077438354492
3000 - 1.0035171508789062
3000 - 1.4598281383514404
3000 - 1.001004695892334
3000 - 0.9966487884521484
3000 - 0.9835050106048584
3000 - 1.0434455871582031
3000 - 1.0131585597991943
exactSTORM: 4000
4000 - 1.302825927734375
4000 - 1.319737434387207
4000 - 1.3091211318969727
4000 - 1.3167059421539307
4000 - 1.291372299194336
4000 - 1.3114089965820312
40