In [1]:
import os
import math
import numpy as np

from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import roc_curve, auc

from src.utils import parse_filename, normalize

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.models import Model

import matplotlib as mpl
from matplotlib import gridspec
import matplotlib.pyplot as plt
mpl.rcParams['text.usetex'] = True
mpl.rcParams['text.latex.preamble'] = r'\usepackage{libertine}'
mpl.rc('font', family='serif')

## Configuration

In [2]:
data_name = "100_100_100_0.2_0.05_0.03_0.5_0.5_d.npy"
out_name = "100_100_100_0.2_0.05_0.03_0.5_0.5_o.npy"
data = np.load(os.path.join(os.getcwd(), "data", "synth", data_name))
is_outlier = np.load(os.path.join(os.getcwd(), "data", "synth", out_name))
params = parse_filename(data_name)
params

{'num_devices': 100,
 'n': 100,
 'dims': 100,
 'subspace_frac': 0.2,
 'frac_outlying_devices': 0.05,
 'frac_outlying_data': 0.03,
 'gamma': 0.5,
 'delta': 0.5}

In [3]:
num_devices = params["num_devices"]
dims = params["dims"]
num_data = params["n"]
subspace_frac = ["subspace_frac"]
frac_outlying_devices = params["frac_outlying_devices"]
frac_outlying_data = params["frac_outlying_data"]

T_start = 1
T = 9
n = int(num_data/(T_start+T))

## Model Creation

In [4]:
n_neighbors = 10
contamination = frac_outlying_devices*frac_outlying_data
detector = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=contamination)

In [13]:
# LOF Training
predictions = np.array([])
central_data = np.array([])
labels = np.array([])
for t in range(T_start, T+1):
    print("t = {}".format(t))
    start = t*n
    end = (t+1)*n
    # send data to central server
    cdata = data[:, start:end]
    cdata = np.reshape(cdata, newshape=(cdata.shape[0]*cdata.shape[1], cdata.shape[2]))
    cout = is_outlier[:, start:end].any(axis=-1).flatten()
    print(cout.shape)
    
    # train lof on central data with fixed window size
    new_predictions = detector.fit_predict(cdata) == -1
    predictions = new_predictions if t == 1 else np.concatenate((predictions, new_predictions))
    central_data = cdata if not len(central_data) else np.concatenate((central_data, cdata))
    labels = cout if not len(labels) else np.concatenate((labels, cout))
predictions

t = 1
(1000,)
t = 2
(1000,)
t = 3
(1000,)
t = 4
(1000,)
t = 5
(1000,)
t = 6
(1000,)
t = 7
(1000,)
t = 8
(1000,)
t = 9
(1000,)


array([False, False, False, ..., False, False, False])

In [17]:
p0 = (predictions == labels).sum()/len(labels)
pc = np.invert(labels).sum()/len(labels)
print("p0 = {}, pc = {}".format(p0, pc))

p0 = 0.999, pc = 0.9983333333333333


In [18]:
kappa = (p0-pc)/(1-pc)
kappa

0.40000000000001334