In [1]:
import numpy as np
import pandas as pd
from skmultiflow.data import TemporalDataStream
from exos.streams import run_exos_simulator


In [2]:
def run_basic_experiment(n_streams=3, size=200, 
                         window_size=100, n_attrs=2, 
                         multiplier=10, round_flag = False):
    """
    results.keys()
    dict_keys(['output', 'simulator_time'])
        results['output'].keys()
        dict_keys(['window_0', 'window_1'])
            results['output']['window_0'].keys()
            dict_keys([0, 1, 2, 'est_time'])
                results['output']['window_0'][0].keys()
                dict_keys(['out_attrs', 'outlier_indices', 'temporal_neighbor_time', 'out_attrs_time'])
                    results['output']['window_0'][0]['out_attrs']
                        list of dictionary of feature names and their corresponding contribution value
                        the length of the list is equal to the number of outliers in the window ['window_0'] 
                        of the particular stream [0]
                    
                    results['output']['window_0'][0]['outlier_indices'].keys()
                    dict_keys([0, 1, 2]) --> we don't need info of other streams but 0
                    
                    results['output']['window_0'][0]['temporal_neighbor_time']
                    real number, running time required by temporal neighbor process at stream 0
                    
                    results['output']['window_0'][0]['out_attrs_time']
                    real number, running time required by outlying attributes at stream 0
        results['simulator_time']
        real number, running time required to run the entire windows
    """
    sources = list()
    attributes = list()
    feature_names = {}
    counter = 0
    for i in range(n_streams):
        X = np.random.uniform(low=5, high=20, size=(size,n_attrs))
        y = np.random.randint(2, size=size)
        ts = TemporalDataStream(X, y)
        sources.append(ts)
        feature_names[i] = [f'A{j}' for j in range(n_attrs)]
        attributes.append(counter)
        counter = counter + X.shape[1]
    d = n_streams * n_attrs
    k = d
    results = run_exos_simulator(sources, d, k, attributes, feature_names, 
                                 window_size, n_clusters = (), n_init_data = (), 
                                 multiplier = 10, round_flag=True)
    return results

In [3]:
def exos_output_df(results, ):
    pass

In [4]:
def exos_running_time_df(results):
    df_dict = {}
    output = results['output']
    windows = output.keys()
    neigh_times = list()
    est_times = list()
    out_attrs_times = list()
    for window in windows:
        est_times.append(output[window]['est_time'])
        del output[window]['est_time']
        neigh_time = max([output[window][stream_id]['temporal_neighbor_time'] for stream_id in output[window].keys()])
        neigh_times.append(neigh_time)
        out_attrs_time = max([output[window][stream_id]['out_attrs_time'] for stream_id in output[window].keys()])
        out_attrs_times.append(out_attrs_time)
    df_dict['windows'] = windows
    df_dict['est_times'] = est_times
    df_dict['neigh_times'] = neigh_times
    df_dict['out_attrs_times'] = out_attrs_times
    return pd.DataFrame.from_dict(df_dict)

In [5]:
data1 = np.array([[1,2],[3,4],[5,6],[7,8], [9,10],
                  [21, 22], [23, 24], [25, 26], [27, 28], [29, 30]])
data2 = np.array([[111,112],[113,114],[115,116],[117,118], [119,120],
                  [221, 222], [223, 224], [225, 226], [227, 228], [229, 230]])

y = np.array([0, 0, 1, 1, 0, 0, 0, 0, 0, 1])
ts1 = TemporalDataStream(data1, y)
ts2 = TemporalDataStream(data2, y)
window_size = 5
sources = (ts1, ts2)
attributes = (0,2)
feature_names = {0: ('A1', 'A2'), 1:('B1', 'B2')}
d = 4
k = d
results = run_exos_simulator(sources, d, k, attributes, feature_names, 
                                 window_size, n_clusters = (), n_init_data = (), 
                                 multiplier = 10, round_flag=True)
print(results)

Start exos simulator
Initializing Queues
producer 1 / 22718 exit
producer 0 / 22711 exit
customer 22732 exit
Run estimator

Run temporal neighbor 0

Run temporal neighbor 1

produser at main 0 done

Generating outlying attributes at 0

produser at main 1 done

Generating outlying attributes at 1

consumer at main done

value is 2

Ready to waking up temporal neighbor

Temporal neighbor 1 woke
Temporal neighbor 0 woke
estimator --> temporal neighbor woken

Run temporal neighbor 1

Run temporal neighbor 0

Run estimator

Generating outlying attributes at 0

Generating outlying attributes at 1

Ready to waking up temporal neighbor

Temporal neighbor 0 woke
Temporal neighbor 1 woke
estimator --> temporal neighbor woken

Run temporal neighbor 1

Run temporal neighbor 0

Temporal neighbor 1 DONE
Temporal neighbor 0 DONE
estimator done

Temporal neighbor 1 / 22774 exit
OA 1 DONE

estimator 22746 exit

Temporal neighbor 0 / 22761 exit
OA 0 DONE

OA 0 / 22791 exit

OA 1 / 22807 exit

temporal n

{'output': {'window_0': {0: {'out_attrs': [{'A1': 1.0, 'A2': 0.0}, {'A2': 0.5468302093335781, 'A1': 0.4531697906664218}], 'outlier_indices': {0: [0, 1], 1: [0, 1]}, 'temporal_neighbor_time': 0.016307723000863916, 'out_attrs_time': 0.02032844700079295}, 1: {'out_attrs': [{'B1': 0.5016092190355884, 'B2': 0.4983907809644116}, {'B1': 0.5041529459658665, 'B2': 0.49584705403413354}], 'outlier_indices': {0: [0, 1], 1: [0, 1]}, 'temporal_neighbor_time': 0.014231253999241744, 'out_attrs_time': 0.01745491499968921}, 'est_time': 0.01677435199962929}, 'window_1': {0: {'out_attrs': [{'A1': 1.0, 'A2': 0.0}], 'outlier_indices': {0: [0], 1: [0]}, 'temporal_neighbor_time': 0.0019343370004207827, 'out_attrs_time': 0.0157613310002489}, 1: {'out_attrs': [{'B1': 0.5031065188138317, 'B2': 0.49689348118616833}], 'outlier_indices': {0: [0], 1: [0]}, 'temporal_neighbor_time': 0.002104826000504545, 'out_attrs_time': 0.0157512730002054}, 'est_time': 0.007093661000908469}}, 'simulator_time': 0.35616659600054845}
