In [1]:
import numpy as np
import pandas as pd
from skmultiflow.data import TemporalDataStream
from exos.streams import run_exos_simulator


In [2]:
def run_basic_experiment(n_streams=3, size=200, 
                         window_size=100, n_attrs=2, 
                         multiplier=10, round_flag = False):
    """
    results.keys()
    dict_keys(['output', 'simulator_time'])
        results['output'].keys()
        dict_keys(['window_0', 'window_1'])
            results['output']['window_0'].keys()
            dict_keys([0, 1, 2, 'est_time'])
                results['output']['window_0'][0].keys()
                dict_keys(['out_attrs', 'outlier_indices', 'temporal_neighbor_time', 'out_attrs_time'])
                    results['output']['window_0'][0]['out_attrs']
                        list of dictionary of feature names and their corresponding contribution value
                        the length of the list is equal to the number of outliers in the window ['window_0'] 
                        of the particular stream [0]
                    
                    results['output']['window_0'][0]['outlier_indices'].keys()
                    dict_keys([0, 1, 2]) --> we don't need info of other streams but 0
                    
                    results['output']['window_0'][0]['temporal_neighbor_time']
                    real number, running time required by temporal neighbor process at stream 0
                    
                    results['output']['window_0'][0]['out_attrs_time']
                    real number, running time required by outlying attributes at stream 0
        results['simulator_time']
        real number, running time required to run the entire windows
    """
    sources = list()
    attributes = list()
    feature_names = {}
    counter = 0
    for i in range(n_streams):
        X = np.random.uniform(low=5, high=20, size=(size,n_attrs))
        y = np.random.randint(2, size=size)
        ts = TemporalDataStream(X, y)
        sources.append(ts)
        feature_names[i] = [f'A{j}' for j in range(n_attrs)]
        attributes.append(counter)
        counter = counter + X.shape[1]
    d = n_streams * n_attrs
    k = d
    results = run_exos_simulator(sources, d, k, attributes, feature_names, 
                                 window_size, n_clusters = (), n_init_data = (), 
                                 multiplier = 10, round_flag=True)
    return results

In [3]:
def exos_output_df(results, ):
    pass

In [4]:
def exos_running_time_df(results):
    df_dict = {}
    output = results['output']
    windows = output.keys()
    neigh_times = list()
    est_times = list()
    out_attrs_times = list()
    for window in windows:
        est_times.append(output[window]['est_time'])
        del output[window]['est_time']
        neigh_time = max([output[window][stream_id]['temporal_neighbor_time'] for stream_id in output[window].keys()])
        neigh_times.append(neigh_time)
        out_attrs_time = max([output[window][stream_id]['out_attrs_time'] for stream_id in output[window].keys()])
        out_attrs_times.append(out_attrs_time)
    df_dict['windows'] = windows
    df_dict['est_times'] = est_times
    df_dict['neigh_times'] = neigh_times
    df_dict['out_attrs_times'] = out_attrs_times
    return pd.DataFrame.from_dict(df_dict)

In [5]:
data1 = np.array([[1,2],[3,4],[5,600],[700,8], [9,10],
                  [21, 22], [23, 24], [25, 26], [27, 28], [200, 300]])
data2 = np.array([[111,112],[113,114],[5,116],[117,8], [119,120],
                  [221, 222], [223, 224], [225, 226], [227, 228], [1000,1001]])

y = np.array([0, 0, 1, 1, 0, 0, 0, 0, 0, 1])
y2 = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 1])
ts1 = TemporalDataStream(data1, y)
ts2 = TemporalDataStream(data2, y2)
window_size = 5
sources = (ts1, ts2)
attributes = (0,2)
feature_names = {0: ('A1', 'A2'), 1:('B1', 'B2')}
d = 4
k = d
results = run_exos_simulator(sources, d, k, attributes, feature_names, 
                                 window_size, n_clusters = (), n_init_data = (), 
                                 round_flag=True)
print(results)

Start exos simulator
Initializing Queues
producer 1 / 30985 exit
producer 0 / 30980 exit
customer 30993 exit
Run estimator

Run temporal neighbor 0

Run temporal neighbor 1

Generating outlying attributes at 0

Generating outlying attributes at 1



hyperplane weights are [-0.02647631  0.00123104]

hyperplane weights are [ 0.00117681 -0.00488398]

outlier_indices <class 'list'>
hyperplane weights are [0. 0.]



produser at main 0 done

produser at main 1 done

  attr_contributions = abs_weights/np.sum(abs_weights)
consumer at main done



outlier_indices <class 'list'>

value is 1






Ready to waking up temporal neighbor

estimator --> temporal neighbor woken

Temporal neighbor 1 woke
Temporal neighbor 0 woke
Run temporal neighbor 1

Run temporal neighbor 0

Run estimator

Generating outlying attributes at 0

Generating outlying attributes at 1



hyperplane weights are [0. 0.]
hyperplane weights are [0. 0.]




  attr_contributions = abs_weights/np.sum(abs_weights)


outlier_indices <class 'list'>

  attr_contributions = abs_weights/np.sum(abs_weights)



outlier_indices <class 'list'>


Ready to waking up temporal neighbor

estimator --> temporal neighbor woken

Temporal neighbor 1 woke
Temporal neighbor 0 woke
Run temporal neighbor 1

Run temporal neighbor 0

Temporal neighbor 1 DONE
estimator done

Temporal neighbor 0 DONE
Temporal neighbor 1 / 31032 exit
estimator 31005 exit

OA 1 DONE

Temporal neighbor 0 / 31021 exit
OA 1 / 31052 exit

OA 0 DONE

OA 0 / 31042 exit

temporal neighbor at main 0 done

temporal neighbor at main 1 done

value is -1

estimator at main done

value is -1

OA at main 0 done

OA at main 1 done

value is -1

Terminating processes
Done


{'output': {'window_0': {0: {'out_attrs': [{'A1': 0.9555700206688228, 'A2': 0.044429979331177316}, {'A1': 0.19416833672225858, 'A2': 0.8058316632777414}], 'outlier_indices': {0: [2, 3], 1: [2, 3]}, 'temporal_neighbor_time': 0.015239926000504056, 'out_attrs_time': 0.03249625399985234}, 1: {'out_attrs': [{}], 'outlier_indices': {0: [2, 3], 1: [2, 3]}, 'temporal_neighbor_time': 0.015281623000191757, 'out_attrs_time': 0.025290081999628455}, 'est_time': 0.025495214000329725}, 'window_1': {0: {'out_attrs': [{}], 'outlier_indices': {0: [4], 1: [4]}, 'temporal_neighbor_time': 0.003679180001199711, 'out_attrs_time': 0.024588078000306268}, 1: {'out_attrs': [{}], 'outlier_indices': {0: [4], 1: [4]}, 'temporal_neighbor_time': 0.0035289459992782213, 'out_attrs_time': 0.016368747999877087}, 'est_time': 0.005715556999348337}}, 'simulator_time': 0.3930263679994823}


In [6]:
import json
# Serializing json  
json_object = json.dumps(results, indent = 2) 
print(json_object)

{
  "output": {
    "window_0": {
      "0": {
        "out_attrs": [
          {
            "A1": 0.9555700206688228,
            "A2": 0.044429979331177316
          },
          {
            "A1": 0.19416833672225858,
            "A2": 0.8058316632777414
          }
        ],
        "outlier_indices": {
          "0": [
            2,
            3
          ],
          "1": [
            2,
            3
          ]
        },
        "temporal_neighbor_time": 0.015239926000504056,
        "out_attrs_time": 0.03249625399985234
      },
      "1": {
        "out_attrs": [
          {}
        ],
        "outlier_indices": {
          "0": [
            2,
            3
          ],
          "1": [
            2,
            3
          ]
        },
        "temporal_neighbor_time": 0.015281623000191757,
        "out_attrs_time": 0.025290081999628455
      },
      "est_time": 0.025495214000329725
    },
    "window_1": {
      "0": {
        "out_attrs": [
          {}
       

In [8]:
results['output'].keys()

dict_keys(['window_0', 'window_1'])