-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor main C++ function to avoid use "constant" memory and avoid new/delete #55
Changes from 1 commit
f5444a3
5d5338f
88e3625
a705de8
cff1cb6
603b6d4
de33a67
a458ed0
7d2e66c
9666eae
6fe3663
66d9b6e
05246f2
166f6e9
9cbc243
cf26901
c986021
ff3bf51
c8c11c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -139,14 +139,17 @@ extern "C" | |
const uint64_t *comp1 = (const uint64_t *) one; | ||
const uint64_t *comp2 = (const uint64_t *) many; | ||
|
||
// TODO: Given that k is 10 by default, often 5 in practice, | ||
// and probably never ever more than 20 or so, the use of a | ||
// priority_queue here is expensive overkill. Better to just | ||
// store the scores in an array and do a linear search every | ||
// time. | ||
std::priority_queue<Node, std::vector<Node>, score_cmp> max_k_scores; | ||
|
||
uint32_t count_one = builtin_popcnt_unrolled_errata_manual(comp1); | ||
|
||
uint64_t combined[KEYWORDS]; | ||
|
||
double *all_scores = new double[n]; | ||
|
||
uint32_t max_popcnt_delta = 1024; | ||
if(threshold > 0) { | ||
max_popcnt_delta = calculate_max_difference(count_one, threshold); | ||
|
@@ -155,39 +158,32 @@ extern "C" | |
|
||
for (int j = 0; j < n; j++) { | ||
const uint64_t *current = comp2 + j * KEYWORDS; | ||
const uint32_t counts_many_j = counts_many[j]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It always suprises me what you have to help the compiler with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this can be more or less important depending on the context. Here it's more about showing intent to fellow programmers than to the compiler. |
||
|
||
if(count_one > counts_many[j]){ | ||
current_delta = count_one - counts_many[j]; | ||
if (count_one > counts_many_j) { | ||
current_delta = count_one - counts_many_j; | ||
} else { | ||
current_delta = counts_many[j] - count_one; | ||
current_delta = counts_many_j - count_one; | ||
} | ||
|
||
if(current_delta <= max_popcnt_delta){ | ||
for (unsigned int i = 0 ; i < KEYWORDS; i++ ) { | ||
if (current_delta <= max_popcnt_delta) { | ||
for (int i = 0; i < (int)KEYWORDS; i++) { | ||
combined[i] = current[i] & comp1[i]; | ||
} | ||
|
||
uint32_t count_curr = builtin_popcnt_unrolled_errata_manual(combined); | ||
|
||
double score = 2 * count_curr / (double) (count_one + counts_many[j]); | ||
all_scores[j] = score; | ||
} else { | ||
// Skipping because popcount difference too large | ||
all_scores[j] = -1; | ||
} | ||
} | ||
|
||
for (int j = 0; j < n; j++) { | ||
|
||
if(all_scores[j] >= threshold) { | ||
max_k_scores.push(Node(j, all_scores[j])); | ||
} | ||
|
||
if(max_k_scores.size() > k) max_k_scores.pop(); | ||
// TODO: double precision is overkill for this | ||
// problem; just use float. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good |
||
double score = 2 * count_curr / (double) (count_one + counts_many_j); | ||
if (score >= threshold) { | ||
max_k_scores.push(Node(j, score)); | ||
if (max_k_scores.size() > k) | ||
max_k_scores.pop(); | ||
} | ||
} // else skip because popcount difference too large | ||
} | ||
|
||
delete[] all_scores; | ||
|
||
int i = 0; | ||
while (!max_k_scores.empty()) { | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This statement isn't (well shouldn't be) correct -
anonlink
needs to support computing all raw similarity scores.