# **Evaluation**

In [1]:
#import pandas as pd
import numpy as np

In [50]:
gt = np.genfromtxt("evaluate.csv", delimiter=",").astype(int)
results = np.genfromtxt("results.csv", delimiter=",").astype(int)
decision = gt[:, -1]

In [35]:
accuracy = np.sum(decision == results) / len(gt)

In [36]:
accuracy

0.9825096899224807

In [51]:
def is_fake(results):
    results = results==1
    return np.all(results) or np.all(~np.array(results))

def precision(gt, result):
    tp = np.sum(np.logical_and((decision == result), decision == True))
    fp = np.sum(np.logical_and((decision != result), results == True))
    return tp/(tp+fp) if tp+fp>0 else 0

def recall(gt, result):
    tp = np.sum(np.logical_and((decision == result), decision == True))
    fn = np.sum(np.logical_and((decision != result), results == False))
    return tp/(tp+fn) if tp+fn>0 else 0

def f2_score(gt, result):
    prec = precision(gt, result)
    reca = recall(gt, result)
    return 2*(prec*reca)/(prec+reca) if prec+reca > 0 else 0

f2_score(gt, results)

0.9544824107930905

In [52]:
is_fake(results)

False

In [53]:
precision(gt, results), recall(gt, results)

(0.9989443124835049, 0.9138097537421536)

In [9]:
np.sum(gt["decision"] == results[0])

20640

In [10]:
np.logical_and(gt["decision"] == results[0], gt["decision"] == True)

0        False
1        False
2         True
3        False
4        False
         ...  
20635    False
20636     True
20637    False
20638    False
20639    False
Length: 20640, dtype: bool

## Entropy evaluator

In [54]:
n_above = np.random.randint(1, 100)
n_below = np.random.randint(1, 100)

In [55]:
def get_entropy(n_above, n_below):
    if n_above == 0 or n_below == 0: return 0.0
    p_above = n_above/(n_above+n_below)
    p_below = n_below/(n_above+n_below)
    return -(p_above*np.log2(p_above))-(p_below*np.log2(p_below))

In [56]:
gt_entropy = get_entropy(n_above, n_below)

In [57]:
import os.path,subprocess
from subprocess import STDOUT,PIPE

def compile_java(java_file):
    subprocess.check_call(['javac', java_file])

def execute_java(java_file, inputs):
    java_class,ext = os.path.splitext(java_file)
    cmd = ['java', java_class]
    for inp in inputs:
        cmd.append(inp)
    proc = subprocess.run(cmd, capture_output=True, text=True)
    return proc.stdout

compile_java('EvalEntropy.java')
entropy_result = float(execute_java('EvalEntropy.java', [str(n_above), str(n_below)]))

In [58]:
gt_entropy-entropy_result

0.0

In [59]:
np.abs(gt_entropy-entropy_result)<1e-5

True

## Separator evaluator

In [63]:
np.random.shuffle(gt)
eval_df = gt[:100]
features = eval_df[:, :-1]
features_f = features.flatten()
labels = eval_df[:, -1]

In [64]:
def eval_separation(features, labels, separation):
    features = np.array(features)
    labels = np.array(labels).reshape((len(labels), 1))
    data = np.hstack([features, labels])
    
    gains = {}
    best_gain = 0.0
    
    parent_entropy = get_entropy(np.sum(labels), len(labels)-np.sum(labels))    
    for j in range(features.shape[1]):
        uniques = np.unique(features[:, j])
        for u in uniques:
            sepa = "%d@%d"%(j,u)
            #print(sepa)
            split = np.logical_and(data.T[-1],data.T[1]<=u)
            #print(split)
            entropy = get_entropy(np.sum(split), len(split)-np.sum(split))
            gain = parent_entropy-entropy
            gains[sepa] = gain
            if gain>best_gain:
                best_gain = gain
    
    return not(best_gain > gains[separation])

In [65]:
#features = [[3,5],
#            [2,3],
#            [1,5],
#            [0,3]]
#features_f = np.array(features).flatten()
#labels = [1, 1, 0, 0]

compile_java('EvalSeparation.java')
command = [str(features.shape[0]),
           str(features.shape[1])]
for f in features_f:
    command.append(str(f))
for l in labels:
    command.append('0' if l==False else '1')
sep_result = execute_java('EvalSeparation.java', command)[:-1]

In [66]:
sep_result

'1@3'

In [67]:
eval_separation(features, labels, sep_result)

True