In [None]:
from tensorflow import keras
from time import time
import pandas as pd
import numpy as np
from sklearn import preprocessing
import os
import scipy.stats as ss
from tensorflow.keras.optimizers import Adam

In [None]:
FIRST_WEEK_TRAINING_ELECTRICITY = 0
LAST_WEEK_TRAINING_ELECTRICITY = 60
FIRST_WEEK_TESTING_ELECTRICITY = 61
LAST_WEEK_TESTING_ELECTRICITY = 75

FIRST_WEEK_TRAINING_GAS = 0
LAST_WEEK_TRAINING_GAS = 60
FIRST_WEEK_TESTING_GAS = 61
LAST_WEEK_TESTING_GAS = 77

WINDOW_LEN = 48     # 48 = one day window
WINDOW_SIZE = 5

In [None]:
def get_training_dataset(meterID):
        list_of_dataframes = list()

        d_path = "./"

        dir_files = os.listdir(d_path)
        dir_files.sort()
        for file in dir_files:
            if file.startswith(str(meterID)+"_") and file.endswith("50.csv"):
                training_scenario = pd.read_csv(d_path + file)
                list_of_dataframes.append(training_scenario)

        # list_of_dataframes[0] -> normal
        return list_of_dataframes

In [None]:
def build_model(training_dataset):
        t0 = time()

        window_len = WINDOW_LEN

        model = keras.Sequential()
        model.add(keras.layers.Dense(units=50 , activation='relu'))
        model.add(keras.layers.Dense(units=len(training_dataset), activation='softmax'))  # one neuron for each class
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy')

        x = []
        y = []

        # For each kind of behaviour
        for i in range(0, len(training_dataset)):   # i:0 -> normal

            # split into windows of two days
            list_df = [training_dataset[i][w:w + window_len] for w in range(0, training_dataset[i].shape[0], window_len)]

            # Generate training set for each window of two days
            for j in range(WINDOW_SIZE-1, len(list_df)):
                dynamic_list = [list_df[j - i] for i in range(WINDOW_SIZE)]
                x = x + [generate_input(*dynamic_list)]
                y = y + [generate_label(i, number_of_classes=len(training_dataset))]

        print(x)
        x = np.array(x)
        y = np.array(y)
        model.fit(x=x, y=y, epochs=20, verbose=1, steps_per_epoch = None,  use_multiprocessing = True, batch_size=448, validation_split=0.2)

        return model, time() - t0

In [None]:
def generate_input(*dfs):
    input_dict = {}

    for i, df in enumerate(dfs):
        prefix = '' if i == 0 else f'(t-{i})'

        input_dict[f'mean {prefix}'] = df['Usage'].mean()
        input_dict[f'mean² {prefix}'] = df['Usage'].mean() ** 2
        input_dict[f'mean³ {prefix}'] = df['Usage'].mean() ** 3
        input_dict[f'std {prefix}'] = df['Usage'].std()
        input_dict[f'std² {prefix}'] = df['Usage'].std() ** 2
        input_dict[f'mode {prefix}'] = df['Usage'].mode()
        input_dict[f'range {prefix}'] = df['Usage'].max() - df['Usage'].min()
        input_dict[f'cv {prefix}'] = df['Usage'].std() / (df['Usage'].mean() + 0.00001)
        input_dict[f'cv² {prefix}'] = (df['Usage'].std() / (df['Usage'].mean() + 0.00001)) ** 2
        input_dict[f'skew {prefix}'] = ss.skew(df['Usage'])
        input_dict[f'q1 {prefix}'] = df['Usage'].quantile(0.25)
        input_dict[f'q2 {prefix}'] = df['Usage'].quantile(0.5)
        input_dict[f'q3 {prefix}'] = df['Usage'].quantile(0.75)
        input_dict[f'iqr {prefix}'] = df['Usage'].quantile(0.75) - df['Usage'].quantile(0.25)
        input_dict[f'last_minus_first {prefix}'] = df.tail(1)['Usage'].values[0] - df.head(1)['Usage'].values[0]

    return preprocessing.scale(pd.DataFrame(input_dict, index=[0]).values.flatten().tolist()).tolist()

In [None]:
def generate_label(kind, number_of_classes):
    labels = np.zeros(number_of_classes, dtype=int)
    labels[kind] = 1
    return labels

In [None]:
def compute_outliers(testing_len, predictions, is_attack_behavior):
        n_tp, n_tn, n_fp, n_fn = 0, 0, 0, 0
        if is_attack_behavior:  # if attacks were detected, they were true positives
            n_tp = predictions
            n_fn = testing_len - predictions
        else:  # if attacks were detected, they were false positives
            n_fp = predictions
            n_tn = testing_len - predictions

        return n_tp, n_tn, n_fp, n_fn


def predict(testing_dataset, model, num_class):
    t0 = time()

    window_len = WINDOW_LEN
    list_df = [testing_dataset[w:w + window_len] for w in range(0, testing_dataset.shape[0], window_len)]
    n_attacks = 0

    #print("El numero de clase es " + str(num_class))

    obs=0
    for j in range(int(WINDOW_SIZE)-1, len(list_df)):

        dynamic_list = [list_df[j - i] for i in range(int(WINDOW_SIZE))]
        predicted = model.predict([generate_input(*dynamic_list)])[0]
        # predicted[0] += 0.04
        obs+=1

        if num_class == 2 or num_class == 3:
          print("La prediccion de clase es la clase numero " + str(np.argmax(predicted)))
          print(predicted)

        if num_class >= 1: # If we are looking for attacks
            if np.argmax(predicted) == num_class:
                n_attacks += 1

        else: # If we are looking for non-class attacks
            if np.argmax(predicted) >= 1:
                n_attacks += 1

    return n_attacks, obs, time() - t0

def print_metrics(meterID, detector, attack, time_model_creation, time_model_prediction, n_tp, n_tn, n_fp, n_fn):
        print("\n\nMeterID:\t\t\t", meterID)
        print("Detector:\t\t\t", detector)
        print("Attack:\t\t\t\t", attack)
        print("Exec. time of model creation:\t", time_model_creation, "seconds")
        print("Exec. time of model prediction:\t", time_model_prediction, "seconds")
        print("Accuracy:\t\t\t", (n_tp + n_tn) / (n_tp + n_tn + n_fp + n_fn))
        print("Number of true positives:\t", n_tp)
        print("Number of false negatives:\t", n_fn)
        print("Number of true negatives:\t", n_tn)
        print("Number of false positives:\t", n_fp)
        print("[", n_tp, n_fp, "]")
        print("[", n_fn, n_tn, "]\n\n")

In [None]:
def get_testing_dataset(attack, meterID):
        """
        Returns the testing dataset for the meterID passed
        """
        if attack:
            return pd.read_csv("./" + str(meterID) + "_" + attack + "_" + str(51) + "_" + str(101) + ".csv")
        else:
            return pd.read_csv("./" + str(meterID) + "_" + str(51) + "_" + str(101) + ".csv")


        if attack:
            testing_dataset = pd.read_csv("./" + str(meterID) + "_" + attack + "_" + str(51) + "_" + str(101) + ".csv")
        else:
            testing_dataset = pd.read_csv("./" + str(meterID) + "_" + str(51) + "_" + str(101) + ".csv")

        return testing_dataset

In [None]:
#tuple_of_attacks = (False, "Avg", "FDI10", "FDI30", "RSA_0.25_1.1", "RSA_0.5_3", "Swap")
tuple_of_attacks = (False, "Percentile",  "RSA_0.5_3", "Rating")
#meterID = 1014
meterID = 1

training_dataset = get_training_dataset(meterID)
model, time_model_creation = build_model(training_dataset)

index=0
for attack in tuple_of_attacks:
    is_attack = index > 0
    testing_dataset = get_testing_dataset(attack, meterID)
    predictions, obs, time_model_prediction = predict(testing_dataset, model, index)
    index += 1
    n_tp, n_tn, n_fp, n_fn = compute_outliers(obs, predictions, is_attack)
    print_metrics(meterID, "NN", attack, time_model_creation, time_model_prediction, n_tp, n_tn, n_fp, n_fn)

[[-0.42847147522814394, -0.5627085256577585, -0.5823966263874354, -0.2945215783240355, -0.5066878441158904, -0.5857805187003485, 0.3902505919340119, 1.3999405028866778, 3.090547027292494, 1.4240064181707077, -0.5857805187003485, -0.5857805187003485, -0.44634750289543995, -0.44634750289543995, -0.5857805187003485, -0.46400008021689476, -0.5719533647475398, -0.5842105605952901, -0.3628364096887016, -0.5394390563910887, -0.5857805187003485, 0.26154319272948084, 1.3775910068458517, 3.008257839140922, 1.4163262384570572, -0.5857805187003485, -0.5857805187003485, -0.46779873609619516, -0.46779873609619516, -0.5857805187003485, -0.5084666990392934, -0.5802074808664476, -0.5853787955564882, -0.4513805806628071, -0.5689392087621776, -0.5857805187003485, -0.09240215508297954, 1.278468668060445, 2.6545227289618225, 1.1446548220844914, -0.5857805187003485, -0.5857805187003485, -0.4651173319461007, -0.4651173319461007, -0.5857805187003485, -0.39428357231444033, -0.5515903347310313, -0.5796761462708