# Helper Utils

This notebook contains miscellaneous helper functions that aid various processes in the library.

In [None]:
%run "imports.ipynb" #import external libraries

In [None]:
def my_contrastive_loss(labels, logits):
    """
    A function defining contrastive loss, used as the loss function for our siamese neural networks.
    
    Params:
        np_array: labels. The true labels for a dataset.
        np_array: logits. The predictions of a model for the same dataset.
    Returns:
        float: loss. The contrastive loss.
    """
    margin = 1
    loss = K.mean(labels * K.square(logits) + (1 - labels) * K.square(K.maximum(margin - logits, 0)))
    return loss

def compute_accuracy(predictions, labels,threshold=0.5): # legacy function
        return labels[predictions.ravel() < threshold].mean()


    
def siamese_model_evaluate(model,x,y,threshold=0.5):
    """
    Evaluate a siamese neural network model on a given dataset.
    
    Params:
        tensorflow model: model
        np_array: x. The paired dataset.
        np_array: y. The dataset's labels.
        float: threshold. The decision threshold.
    Returns:
        float: accuracy.
        float: precision.
        float: auc. Area under curve.
        float: loss.
    """
    
    # make predictions
    pred = model.predict([x[:, 0], x[:, 1]])

    def compute_precision(predictions, labels,threshold=threshold):
        # calculate the precision of the model
        return labels[predictions.ravel() < threshold].mean()
    def compute_accuracy(predictions, labels):
        #calculate the accuracy of the model
        i = 0
        correct = 0
        for pred in predictions:
            if pred >=0.5:
                pred = 0.
            else:
                pred = 1.
            if pred == labels[i]:
                correct += 1
            i += 1
        return correct/i
    
    def compute_contrastive_loss(y_true, y_pred):
        # calculate the contrastive loss
        margin = 1
        loss = K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
        return loss.numpy()
    
    precision = compute_precision(pred, y)
    if np.isnan(precision):
        precision = 0 # if the precision is nan, which can happen with adersarial examples, set to 0
    
    pred_labels = pred.ravel() < threshold # generate prediction labels
    accuracy = accuracy_score(y, pred_labels)
    loss = compute_contrastive_loss(y, pred)
    auc = roc_auc_score(y,pred_labels)
    
    return accuracy, precision, auc, loss

In [None]:
def contrastive_loss(y, preds, margin=1):
    """
    Duplicate function to earlier contrastive loss function.
    """
    # explicitly cast the true class label data type to the predicted
    # class label data type (otherwise we run the risk of having two
    # separate data types, causing TensorFlow to error out)
    y = tf.cast(y, preds.dtype)
    # calculate the contrastive loss between the true labels and
    # the predicted labels
    squaredPreds = K.square(preds)
    squaredMargin = K.square(K.maximum(margin - preds, 0))
    loss = K.mean(y * squaredPreds + (1 - y) * squaredMargin)
    # return the computed contrastive loss to the calling function
    return loss

def euclidean_distance(vectors):
    """
    Calculate the euclidean distance between two vectors. Used as the similarity metric for our siamese neural networks.
    
    Params:
        list: vectors.
    Returns:
        float: euclidean distance.
    """
    # unpack the vectors into separate lists
    (featsA, featsB) = vectors
    # compute the sum of squared distances between the vectors
    sumSquared = K.sum(K.square(featsA - featsB), axis=1,
        keepdims=True)
    # return the euclidean distance between the vectors
    return K.sqrt(K.maximum(sumSquared, K.epsilon()))

In [None]:
def plot_training(H, plot_path):
    """ 
    construct a plot that plots and saves the training history 
    """
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(H.history["loss"], label="train_loss")
    plt.plot(H.history["val_loss"], label="val_loss")
    plt.title("Training Loss")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss")
    plt.legend(loc="lower left")
    plt.savefig(plot_path)

In [None]:
"""The following four functions get either the mean accuracy or all accuracy values from a set of evaluations,
either for a standard neural network or for a siamese neural network."""

def get_mean_accuracy(trained_evals): #pass in the evals from attack_model_variations and get the mean accuracy
    accs = []
    for model_evals in trained_evals:
        for evals in model_evals:
            accs.append(evals[:][1])
    return mean(accs)

def get_accuracies(trained_evals):
    accs = []
    for model_evals in trained_evals:
        for evals in model_evals:
            accs.append(evals[:][1])
    return accs

def get_mean_accuracy_siamese(model_evals):
    accs = []
    for evals in model_evals:
        accs.append(evals[1])
    return mean(accs)

def get_accuracies_siamese(model_evals):
    accs = []
    for evals in model_evals:
        accs.append(evals[1])
    return accs

In [1]:
def cliffsdelta(x,y):
    """
    We use this function to measure the effect size between our neural networks' accuracies, aiding evaluation. 
    The function takes two arrays as input and returns a float between -1 and 1.
    """
    n = len(x)
    m = len(y)
    xy = [(xi,yi) for xi in x for yi in y]
    d = sum([1 if xi > yi else -1 if xi < yi else 0 for (xi,yi) in xy]) / (n*m)
    return d