/
plot.py
159 lines (137 loc) · 5.69 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import matplotlib.pyplot as plt
from helpers import subtotal, get_recall, auc
PRE, REC, SPEC, FPR, NPV, ACC, F1 = 7, 6, 5, 4, 3, 2, 1
COLORS = ["#800000", "#6B8E23", "#0000CD", "#FFFF00", "#8A2BE2", "#00FF00", "#00FFFF", "#FF00FF"]
MARKERS = ['v', 2, ',', 'h', ">", 's', '*', 'p', '8']
"Plot ROC"
def plotROC(fft, soa, img_path="~/tmp"):
fig, ax = plt.subplots()
ax.set_title('ROC: FFT splits on ' + fft.criteria + ' | Data ' + fft.data_name)
ax.set_xlabel("False Alarm Rates")
ax.set_ylabel("Recall")
ax.set_xlim(-0.05, 1.05)
ax.set_ylim(-0.05, 1.05)
# plot diagonal
x, y = [0.001 * i for i in range(1000)], [0.001 * i for i in range(1000)]
ax.scatter(x, y, s=4)
# plot fft peformances
fft.roc = [None] * fft.tree_cnt
roc = fft.roc
tmp = {"Accuracy": 0, "Dist2Heaven": 1, "Gini": 2, "LOC_AUC": 3}
k = tmp[fft.criteria]
s_id = fft.best
for i in range(fft.tree_cnt):
metric = fft.performance_on_test[i][:11]
roc[i] = [metric[-FPR], metric[-REC]]
if i == s_id:
continue
ax.scatter(roc[i][0], roc[i][1], c=COLORS[k], s=100)
ax.annotate(i, (roc[i][0], roc[i][1]))
t = 0 if s_id != 0 else 1
ax.scatter(roc[t][0], roc[t][1], c=COLORS[k], s=100, label="FFT")
# plot the best fft in red
ax.scatter(roc[s_id][0], roc[s_id][1], c='r', marker=MARKERS[0], s=100, label="Best_FFT")
ax.annotate("B_FFT", (roc[s_id][0], roc[s_id][1]))
# plot state of the art performance
for i in range(4):
ax.scatter(soa.performances[i][-FPR], soa.performances[i][-REC], s=120, \
c=COLORS[i + 4], marker=MARKERS[i + 4], label=soa.names[i])
ax.annotate(soa.names[i], (soa.performances[i][-FPR], soa.performances[i][-REC]))
legend = ax.legend(loc='lower right', shadow=True, fontsize='small')
# Put a nicer background color on the legend.
legend.get_frame().set_facecolor('#CEE5DD')
# plt.show()
plt.savefig(img_path)
plt.close(fig)
"Plot ROC"
def plotLOC(data, learners, names, img_path="~/tmp"):
fig, ax = plt.subplots()
ax.set_title('LOC: Data ' + learners[0].data_name)
ax.set_xlabel("%code")
ax.set_ylabel("%bug detection")
ax.set_xlim(-0.05, 1.05)
ax.set_ylim(-0.05, 1.05)
# plot diagonal
x, y = [0.001 * i for i in range(1000)], [0.001 * i for i in range(1000)]
ax.scatter(x, y, s=4)
data.sort_values(by=["bug", "loc"], ascending=[False, True], inplace=True)
x_sum = float(sum(data['loc']))
x = data['loc'].apply(lambda t: t / x_sum)
xx = subtotal(x)
markers = ['*', '.', 'o', '+', 'x', '_', '|']
# plot optimal
k = 5
yy = get_recall(data['bug'].values)
ax.plot(xx, yy, markersize=10, color=COLORS[-k], marker=markers[-k], label="Optimal")
xxx = [i for i in xx if i <= 0.2]
yyy = yy[:len(xxx)]
s_opt = round(auc(xxx, yyy), 3)
# plot worst
xx = subtotal(x[::-1])
yy = get_recall(data['bug'][::-1].values)
ax.plot(xx, yy, markersize=10, color=COLORS[-k-1], marker=markers[-k-1], label="Worst")
xxx = [i for i in xx if i <= 0.2]
yyy = yy[:len(xxx)]
s_wst = round(auc(xxx, yyy), 3)
tmp = {}
p_opt = [None] * len(learners)
for i, clf in enumerate(learners):
if names[i].startswith("FFT"):
y = clf.predict(data)
tmp.update({'data': clf.data_name})
else:
y = clf.predict(data.iloc[:, :-2]).tolist()
data['prediction'] = y
data.sort_values(by=["prediction", "loc"], ascending=[False, True], inplace=True)
x = data['loc'].apply(lambda t: t / x_sum)
xx = subtotal(x)
yy = get_recall(data['bug'].values)
ax.plot(xx, yy, markersize=10, color=COLORS[-i], marker=markers[-i], label=names[i])
xxx = [k for k in xx if k <= 0.2]
yyy = yy[:len(xxx)]
s_m = round(auc(xxx, yyy), 3)
p_opt[i] = (s_m - s_wst) / (s_opt - s_wst)
tmp.update({names[i]: p_opt[i]})
legend = ax.legend(loc='lower right', shadow=True, fontsize='small')
# Put a nicer background color on the legend.
legend.get_frame().set_facecolor('#CEE5DD')
# plt.show()
plt.savefig(img_path)
plt.close(fig)
return tmp
def plot_compare(fft1, fft2, img_path="~/tmp"):
# plot ROC
fig, ax = plt.subplots()
ax.set_title('FFT Comparison | Data: ' + fft1.data_name)
ax.set_xlabel("False Alarm Rates")
ax.set_ylabel("Recall")
ax.set_xlim(-0.05, 1.05)
ax.set_ylim(-0.05, 1.05)
# plot diagonal
x, y = [0.001 * i for i in range(1000)], [0.001 * i for i in range(1000)]
ax.scatter(x, y, s=4)
# plot fft peformances
tmp = {"Accuracy": 0, "Dist2Heaven": 1, "Gini": 2, "InfoGain": 3}
markers = ['*', 'o']
colors = ['#800000', '#6B8E23', '#65ff00', '#ff0000']
for fft in [fft1, fft2]:
roc = fft.roc
s_id = fft.best
k = tmp[fft.criteria]
for i in range(fft.tree_cnt):
if i == s_id:
continue
ax.scatter(roc[i][0], roc[i][1], marker=markers[k], c=colors[k], s=400-k*300)
ax.annotate(i, (roc[i][0], roc[i][1]))
t = 0 if s_id != 0 else 1
ax.scatter(roc[t][0], roc[t][1], c=colors[k], marker=markers[k], s=400-k*300, label="FFT(" + fft.criteria + ")")
# plot the best fft
ax.scatter(roc[s_id][0], roc[s_id][1], c=colors[-k-1], \
marker=markers[k], s=400-k*300, label="Best_FFT(" + fft.criteria + ")")
ax.annotate("B_FFT(" + fft.criteria[0] + ")", (roc[s_id][0], roc[s_id][1]))
legend = ax.legend(loc='lower right', shadow=True, fontsize='small')
# Put a nicer background color on the legend.
legend.get_frame().set_facecolor('#CEE5DD')
# plt.show()
plt.savefig(img_path)
plt.close(fig)