-
Notifications
You must be signed in to change notification settings - Fork 2
/
eval.py
153 lines (116 loc) · 5.19 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.font_manager as fm
def find_mix(repo_name, weighted_methods, use_rank=True, bsorted=True):
repo_sims_dict = {}
for f, w in weighted_methods:
sims = f(repo_name)
if not bsorted:
sims = sorted(sims, key=lambda x: -x[1])
if use_rank:
sims = [(r, i+1) for i, (r, s) in enumerate(sims)]
for r, s in sims:
repo_sims_dict[r] = repo_sims_dict.get(r, 0) + w * s
# use_rank -> asc, otherwise -> desc
repo_score_list = sorted(repo_sims_dict.items(), key=lambda x: x[1] if use_rank else -x[1])
repo_ordered_list = [r for r, s in repo_score_list]
return repo_ordered_list
# return (precision, recall, F1score)
def eval(k, our_repos, showcase_repos):
# cast to set
showcase_repos = set(showcase_repos)
our_repos = set(our_repos[:k])
# number of common repos
num_commons = len(our_repos.intersection(showcase_repos))
num_commons = float(num_commons)
# precision: num of repos in common / num of our repos
precision = num_commons / k
# recall: num of repos in common / num of showcase's repos
recall = num_commons / len(showcase_repos)
# F1score
F1 = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
return precision, recall, F1
def plot_precision_recall(precision_list, recall_list, title=""):
fig = plt.figure()
plt.plot(recall_list, precision_list, 'b.-')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title(title)
plt.legend(loc='upper right')
def plot_f1score(f1score_list, precision_list, recall_list, title=""):
depths = range(1, len(f1score_list)+1)
fig = plt.figure()
plt.plot(depths, precision_list, 'b-', label="Precision")
plt.plot(depths, recall_list, 'r-', label="Recall")
plt.plot(depths, f1score_list, 'm-', label="F1 score")
plt.xlabel("Depths")
plt.ylabel('Scores')
plt.title(title)
plt.legend(loc='upper right')
def plot_comparison(eval_funs, methods_short=[], methods_long=[], title=""):
length = len(eval_funs)
max_f1score_list = []
for f in eval_funs:
plist, rlist, f1list = f()
f1score_max = max(f1list) if len(f1list) > 0 else 0
max_f1score_list.append(f1score_max)
bar_width = 0.7
index = np.arange(length)
color = cm.rainbow(np.linspace(0,1,length))
fig = plt.figure()
barlist = plt.bar(index + bar_width, max_f1score_list, width=bar_width, color=color)
plt.xticks(index + 1.5*bar_width, methods_short)
plt.xlabel('Methods')
plt.ylabel('Best F1 Scores')
plt.title(title)
fontP = fm.FontProperties()
fontP.set_size('small')
lgd = plt.legend(barlist, [": ".join(t) for t in zip(methods_short, methods_long)],
loc='upper center', bbox_to_anchor=(0.5, 1.25),
ncol=2, fancybox=True, shadow=True, prop = fontP)
fig.savefig('image_output.png', dpi=300, format='png', bbox_extra_artists=(lgd,), bbox_inches='tight')
# test_repo_name = 'lalala/lalala'
# compared_repos = ['repo1', 'repo2', ...]
# my_function = [(f1, 0.5), (f2, 0.5)]
# return three lists: precision_list, recall_list, f1score_list
def eval_single_repo(test_repo_name, compared_repos, find_similar_repos_func):
precision_list = []
recall_list = []
f1score_list = []
# ranked_similar_repos: list of repo names in sorted order
ranked_similar_repos = find_similar_repos_func(test_repo_name)
print test_repo_name
for depth in range(1, len(ranked_similar_repos)+1):
precision, recall, f1score = eval(depth, ranked_similar_repos, compared_repos)
precision_list.append(precision)
recall_list.append(recall)
f1score_list.append(f1score)
return precision_list, recall_list, f1score_list
cache = {}
def eval_group_repos(group_repos, find_similar_repos_func):
if cache.has_key(str(group_repos) + str(find_similar_repos_func)):
return cache[str(group_repos) + str(find_similar_repos_func)]
sum_precision_list = []
sum_recall_list = []
sum_f1score_list = []
empty_sum_lists = True
for current_repo in group_repos:
precision_list, recall_list, f1score_list = eval_single_repo(current_repo, group_repos, find_similar_repos_func)
if empty_sum_lists:
sum_precision_list = precision_list
sum_recall_list = recall_list
sum_f1score_list = f1score_list
empty_sum_lists = False
else:
sum_precision_list = [a + b for a, b in zip(sum_precision_list, precision_list)]
sum_recall_list = [a + b for a, b in zip(sum_recall_list, recall_list)]
sum_f1score_list = [a + b for a, b in zip(sum_f1score_list, f1score_list)]
group_size = len(group_repos)
mean_precision_list = [x / group_size for x in sum_precision_list]
mean_recall_list = [x / group_size for x in sum_recall_list]
mean_f1score_list = [x / group_size for x in sum_f1score_list]
cache[str(group_repos) + str(find_similar_repos_func)] = (mean_precision_list, mean_recall_list, mean_f1score_list)
return mean_precision_list, mean_recall_list, mean_f1score_list
if __name__ == "__main__":
pass