In [1]:
import pandas as pd, numpy as np, json, time
from sklearn.neighbors import KDTree

import sys
sys.path.insert(0,'../../../Architecture/')

from mtree import MTree

In [2]:
%%time
df = pd.read_csv('extended_bgp_testbed_5.csv.gz')
df = df.drop(['Unnamed: 0'], axis=1)
df = df.dropna()
df = df.astype('float64')
df.shape

CPU times: user 5.28 s, sys: 207 ms, total: 5.49 s
Wall time: 5.51 s


(185984, 82)

In [3]:
samples_to_use_list = list(range(1000, 10000, 1000))
samples_to_use_list = samples_to_use_list + list(range(10000, 110000, 10000))

In [4]:
def exactSTORM(data, window=20, R=0.5, k=5):

	data = tuple(map(tuple, data.values))

	output = np.zeros(len(data))

	mtree = MTree()
	structure = {}
	indexes = {}

	for now, instance in enumerate(data):

		structure[now] = {'count_after':0,
		                  'nn_before':[],
		                  'data': instance}

		indexes[instance] = now

		neighbors_list = mtree.get_nearest(instance, R, k)

		for neighbor in neighbors_list:
			neighbor_index = indexes[neighbor[0]]
			structure[neighbor_index]['count_after'] += 1
			structure[now]['nn_before'].append(neighbor_index)
		    
		mtree.add(instance)
		    
		if now-window in structure:
			departing_instance = structure[now-window]['data']
			del indexes[departing_instance]
			del structure[now-window]
			mtree.remove(departing_instance)
		    
		for query_index in structure:
			prec_neighbors = structure[query_index]['nn_before']
			succ_neighbors = structure[query_index]['count_after']
			if len(prec_neighbors) + succ_neighbors < k:
				output[query_index] = 1
	return output

In [None]:
results = {}
results['algorithm'] = 'exactSTORM'
results['times'] = []

for sample_to_use in samples_to_use_list:
            
    print('exactSTORM: {}'.format(sample_to_use))
    results_samples = []
    dfNormalized = df[:sample_to_use]
        
    num_runs = 10        
        
    for measurement in range(num_runs):
        sampleSkip = 30
        bufferDF = dfNormalized[0:sampleSkip]
        testDF = dfNormalized[sampleSkip:]

        start = time.time()
        
        exactSTORM(pd.DataFrame(testDF.values))
        
        end = time.time()
        time_interval = end-start
        results_samples.append(time_interval)
        print('{} - {}'.format(sample_to_use, time_interval))
    
    results['times'].append(results_samples)
    with open('Results/exactSTORM_extended.json', 'w') as f:
        json.dump(results, f, indent=2)    

exactSTORM: 1000
1000 - 0.34806299209594727
1000 - 0.3464207649230957
1000 - 0.3515012264251709
1000 - 0.34610462188720703
1000 - 0.3473238945007324
1000 - 0.3486952781677246
1000 - 0.35041213035583496
1000 - 0.3457920551300049
1000 - 0.34792065620422363
1000 - 0.3474247455596924
exactSTORM: 2000
2000 - 0.7122225761413574
2000 - 0.6832382678985596
2000 - 0.6856083869934082
2000 - 0.6915061473846436
2000 - 0.6917736530303955
2000 - 0.7000019550323486
2000 - 0.7151567935943604
2000 - 0.7122681140899658
2000 - 0.7026667594909668
2000 - 0.6927061080932617
exactSTORM: 3000
3000 - 1.0955750942230225
3000 - 1.0633635520935059
3000 - 1.1084957122802734
3000 - 1.081160545349121
3000 - 1.083306074142456
3000 - 1.0506494045257568
3000 - 1.069659948348999
3000 - 1.0710175037384033
3000 - 1.0520975589752197
3000 - 1.1124300956726074
exactSTORM: 4000
4000 - 1.4304218292236328
4000 - 1.419724464416504
4000 - 1.4232375621795654
4000 - 1.4067304134368896
4000 - 1.452218770980835
4000 - 1.42594218254089