In [None]:
import os
import numpy as np
import pandas as pd
import networkx as nx
from matplotlib import pyplot as plt
from dask.distributed import Client, progress, wait
from IPython.display import display, HTML
from sklearn.metrics import roc_auc_score
import project_path
from src.util.generate_connected_graph import generate_connected_graph
from src.util.generate_lr_data import generate_low_rank_data
from src.util.generate_anomaly import generate_spatio_temporal_anomaly
from src.algos.lr_stss import lr_stss

In [None]:
client = Client(threads_per_worker=1, n_workers=14)
client

In [None]:
## Control Variables
gt = 'grid'
graph_param = (8,5)
dtn = 4
rad =1
amp =1
window = 'boxcar'
distribution = 'bernoulli'
local_distribution = 'constant'
dims = (40,24,7,4)
ranks = (8,8,5,4)
NoG = 1
NoT = 5
time_m = 2
local_m = 1
lda_2 = 100
psi = 20
maxit = 300
maxit2 = 40
## Independent variables
### Hyperparameters
NoA = np.logspace([50,100,150,250,500])
lda_1 = np.logspace(1,-3,5)
lda_l = np.logspace(1,-3,5)
lda_t = np.logspace(1,-3,5)

metadata = {'graph_type': gt,
            'graph_param': graph_param,
            'number_of_anomalies': NoA,
            'anomaly_duration': dtn,
            'anomaly_radius': rad,
            'anomaly_amplitude': amp,
            'window': window,
            'distribution': distribution,
            'local_distribution': local_distribution,
            'number_of_graphs': NoG,
            'number_of_trials': NoT,
            'ranks':ranks,
            'dims':dims,
            'time_mode': time_m,
            'local_mode': local_m,
            'lda_2':lda_2,
            'lda_1':lda_1,
            'lda_l':lda_l,
            'lda_t':lda_t,
            'psi': psi,
            'maxit':maxit,
            'maxit2':maxit2,
            }

# Define experiment function
def run_exp(X, Y, an_m, inputs):
    G = inputs['G']
    A = nx.adjacency_matrix(G)
    Deg = np.diag(np.asarray(np.sum(A,axis=1)).ravel())
    Dsq = np.linalg.inv(np.sqrt(Deg))
    An = Dsq@A@Dsq
    Y = inputs['Y']
    Y = np.ma.masked_array(Y, mask=np.zeros(dims,dtype=bool))
    X = inputs['X']
    an_m = inputs['an_m']
    psi = inputs['psi']
    res = lr_stss(Y, An, time_m,local_m, verbose=0, max_it2=40, max_it=300,
        lda2=inputs['lda_2'], lda1=inputs['lda_1'], lda_t=inputs['lda_t'],
        lda_loc=inputs['lda_l'], psis=[psi,psi,psi,psi])
    
    result = {'graph_type': gt,
              'auc': roc_auc_score(an_m.ravel(),np.abs(res['S']).ravel()),
              'rec_err': np.linalg.norm(res['X']-X)/np.linalg.norm(X),
              'anomaly_cardinality': an_m.sum(),
              'number_of_anomalies': NoA,
              'anomaly_duration': dtn,
              'anomaly_radius': rad,
              'graph_seed': inputs['graph_seed'],
              'anomaly_seed': inputs['anomaly_seed'],
              'anomaly_amplitude': amp,
              'lda_1': inputs['lda_1'],
              'lda_2': inputs['lda_2'],
              'lda_l': inputs['lda_l'],
              'lda_t': inputs['lda_t'],
              'psi_1': inputs['psi'], 'psi_2': inputs['psi'],
              'psi_3': inputs['psi'], 'psi_4': inputs['psi'],
              'maxit': maxit, 'maxit2': maxit2,
              'it': res['it']}
    return result

In [None]:
try:
    del futures
finally:
    pass
futures = []
seed = 984854251
input = {}
for i in range(NoG):
    G,sd = generate_connected_graph(graph_param, gt, radius=graph_param, seed=seed)
    input['G'] = G
    input['graph_type'] = gt
    input['graph_seed'] = sd
    seed = sd
    for j in range(NoT):
        # Generate low rank normal data
        X = generate_low_rank_data(dims, ranks, seed)
        X = 1*X/np.std(X)
        input['X'] = X
        # Generate anomaly
        an, an_m = generate_spatio_temporal_anomaly(dims, G, NoA, duration=dtn,
                                                    radius=rad, seed=seed,
            time_m=time_m, local_m=local_m, window_type='boxcar', amplitude=amp,
            distribution=distribution, local_dist='constant')
        input['anomaly_seed'] = seed
        input['an_m'] = an_m
        Y = X+an
        # Y = np.ma.masked_array(Y, mask=np.zeros(dims,dtype=bool))
        input['Y'] = Y
        seed +=1
        for k1 in range(len(lda_1)):
            for k2 in range(len(lda_l)):
                for k3 in range(len(lda_t)):
                    input['lda_1'] = lda_1[k1]
                    input['lda_l'] = lda_l[k2]
                    input['lda_t'] = lda_t[k3]
                    input['lda_2'] = lda_2
                    input['psi'] = psi
                    futures.append(client.submit(run_exp, X, Y, an_m, input))

In [None]:
progress(futures)

In [None]:
fname = 'lr_stss_exp1.hdf5'
key = 'grid'
cwd = os.getcwd()
fname = os.path.join(cwd, 'results', fname)

for i in range(len(futures)):
    df = pd.DataFrame(futures[i].result(), index=[i])
    with pd.HDFStore(fname) as storedata:
        storedata.append(key, df)

with pd.HDFStore(fname) as storedata:
    storedata.get_storer(key).attrs.metadata=metadata

In [None]:
fname = 'lr_stss_exp1.hdf5'
key = 'grid'
cwd = os.getcwd()
fname = os.path.join(cwd, 'results', fname)
with pd.HDFStore(fname) as storedata:
    df = storedata[key]
df.head()