/
scores.py
64 lines (53 loc) · 1.46 KB
/
scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from __future__ import print_function, division
__author__ = 'amrit'
import sys
sys.dont_write_bytecode = True
from sklearn.metrics import auc
def subtotal(x):
xx = [0]
for i, t in enumerate(x):
xx += [xx[-1] + t]
return xx[1:]
def get_recall(true):
total_true = float(len([i for i in true if i == 1]))
hit = 0.0
recall = []
for i in xrange(len(true)):
if true[i] == 1:
hit += 1
recall += [hit / total_true if total_true else 0.0]
return recall
# pass the whole testing data with label
def get_popt(data):
data.sort_values(by=["bug", "loc"], ascending=[0, 1], inplace=True)
x_sum = float(sum(data['loc']))
x = data['loc'].apply(lambda t: t / x_sum)
xx = subtotal(x)
# get AUC_optimal
yy = get_recall(data['bug'].values)
xxx = [i for i in xx if i <= 0.2]
yyy = yy[:len(xxx)]
s_opt = round(auc(xxx, yyy), 3)
# get AUC_worst
xx = subtotal(x[::-1])
yy = get_recall(data['bug'][::-1].values)
xxx = [i for i in xx if i <= 0.2]
yyy = yy[:len(xxx)]
try:
s_wst = round(auc(xxx, yyy), 3)
except:
# print "s_wst forced = 0"
s_wst = 0
# get AUC_prediction
data.sort_values(by=["prediction", "loc"], ascending=[0, 1], inplace=True)
x = data['loc'].apply(lambda t: t / x_sum)
xx = subtotal(x)
yy = get_recall(data['bug'].values)
xxx = [k for k in xx if k <= 0.2]
yyy = yy[:len(xxx)]
try:
s_m = round(auc(xxx, yyy), 3)
except:
return 0
Popt = (s_m - s_wst) / (s_opt - s_wst)
return round(Popt,2)