In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"


In [None]:
def maeScores(originalDF, reducedDF):
    loss = np.mean(np.abs(np.array(originalDF)-np.array(reducedDF)), axis=1)
    loss = pd.Series(data=loss)
    loss = (loss-np.min(loss))/(np.max(loss)-np.min(loss))
    return loss

In [None]:
def anomScores_Snorkel(yp, trans_lhud_test, df_lhud_test, thres):
    # Se o modelo é RNN, precisa mudar a dimensionalidade de 3d para 2d
    if len(yp.shape) == 3:
        yp = np.reshape(yp,(yp.shape[0]*yp.shape[1],yp.shape[2]))
    #Cálculo dos scores de anomalias e aplicação do threshold para deteção de anomalias
    anomalyScores = mapeScores(trans_lhud_test, yp)
    density_threshold = np.percentile(anomalyScores, thres)
    anomalyScores = anomalyScores.to_frame().reset_index(drop=True)
    anomalyScores.set_index(df_lhud_test.index, inplace=True)
    anomalyScores['anom'] = np.where((anomalyScores[0] > density_threshold),-1,1)
    anomalyScores.columns = ['scores','anom']
    anomalyScores = pd.concat([df_lhud_test,anomalyScores], axis=1)
    return anomalyScores, density_threshold

In [None]:
### Retorna os scores de erro e se é anomalia de acordo com threshold
def dnn_tf_anomScores(y_pred, trans_lhud_test, df_lhud_test, thres):
    # Se o modelo é RNN, precisa mudar a dimensionalidade de 3d para 2d
    if len(y_pred.shape) == 3:
        y_pred = np.reshape(y_pred,(y_pred.shape[0]*y_pred.shape[1],y_pred.shape[2]))
    #Cálculo dos scores de anomalias e aplicação do threshold para deteção de anomalias
    anomalyScores = maeScores(trans_lhud_test, y_pred)
    density_threshold = np.percentile(anomalyScores, thres)
    #print(density_threshold)
    anomalyScores = anomalyScores.to_frame().reset_index(drop=True)
    anomalyScores.set_index(df_lhud_test.index, inplace=True)
    anomalyScores['anom'] = np.where((anomalyScores[0] > density_threshold),-1,1)
    anomalyScores.columns = ['scores','anom']
    anomalyScores = pd.concat([df_lhud_test,anomalyScores], axis=1)
    return anomalyScores, density_threshold


In [None]:
def benchmark_snorkel(gold_anom, pred_anom):
    # Anomalies preditas para o usuário
    
    # Calcula acurácia
    n_correct = sum(pred_anom.iloc[:,-1].values == gold_anom.iloc[:,-1].values)
    accuracy = n_correct / len(gold_anom)

    #Confusion Matrix
    cm = confusion_matrix(gold_anom.iloc[:,-1].values, 
                          pred_anom.iloc[:,-1].values,labels = [1,-1])
    
    precision = precision_score(gold_anom.iloc[:,-1].values, 
                      pred_anom.iloc[:,-1].values,
                      pos_label=-1)
    
    recall = recall_score(gold_anom.iloc[:,-1].values, 
                      pred_anom.iloc[:,-1].values,
                      pos_label=-1)
    
    f1 = f1_score(gold_anom.iloc[:,-1].values, 
                      pred_anom.iloc[:,-1].values,
                      pos_label=-1)
    
    
    fpr, tpr, thresholds = roc_curve(gold_anom.iloc[:,-1].values, 
                      pred_anom.iloc[:,-2].values, pos_label=-1)
    
    auc_score = auc(fpr, tpr)
    
    return accuracy, precision, recall, f1, cm, auc_score

In [None]:
def benchmark_user(user_anom, anomSc):
    #Identifica o usuário a partir do dataset de anomalias rotulado
    user = user_anom['user'].value_counts().index.tolist()[0]
    
    # Anomalies preditas para o usuário
    pred_anom = anomSc[anomSc['user']==user]
    
    # Calcula acurácia
    n_correct = sum(pred_anom.iloc[:,-1] == user_anom.iloc[:,-1])
    accuracy = n_correct / len(user_anom)

    #Confusion Matrix
    cm = confusion_matrix(user_anom.iloc[:,-1], 
                          pred_anom.iloc[:,-1],labels = [1,-1])
    
    precision = precision_score(user_anom.iloc[:,-1], 
                      pred_anom.iloc[:,-1],
                      pos_label=-1)
    
    recall = recall_score(user_anom.iloc[:,-1], 
                      pred_anom.iloc[:,-1],
                      pos_label=-1)
    
    f1 = f1_score(user_anom.iloc[:,-1], 
                      pred_anom.iloc[:,-1],
                      pos_label=-1)
    
    
    fpr, tpr, thresholds = roc_curve(user_anom.iloc[:,-1], 
                      pred_anom.iloc[:,-2], pos_label=-1)
    
    auc_score = auc(fpr, tpr)
    
    return accuracy, precision, recall, f1, cm, auc_score


In [None]:
def convert_sparse_matrix_to_sparse_tensor(X):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensor(indices, coo.data, coo.shape)