forked from lintool/c-bfscan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Scan1.c
92 lines (73 loc) · 2.23 KB
/
Scan1.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include "heap.h"
#include "topics2011.h"
#include "topics2011_time.h"
// #include "topics_1000.h"
// #include "topics_1000_time.h"
#include "constants.h"
extern void init_tf();
int main(int argc, const char* argv[]) {
init_tf();
int i=0, j=0;
clock_t begin, end;
double time_spent;
begin = clock();
int base = 0;
float score;
int n;
int t;
for (n=0; n<NUM_TOPICS; n++) {
// printf("Processing topic %d...\n", topics2011[n][0]);
heap h;
heap_create(&h,0,NULL);
float* min_key;
int* min_val;
base = 0;
for (i=0; i<NUM_DOCS; i++) {
if (tweetids[i] > topics2011_time[n]) {
base += doclengths_ordered[i];
continue;
}
score = 0;
for (j=0; j<doclengths_ordered[i]; j++) {
for (t=2; t<2+topics2011[n][1]; t++) {
if (collection_tf[base+j] == topics2011[n][t]) {
score += log(1 + tf[base+j]/(MU * (cf[collection_tf[base+j]] + 1) / (TOTAL_TERMS + 1))) + log(MU / (doclengths[i] + MU));
}
}
}
if (score > 0) {
int size = heap_size(&h);
if ( size < TOP_K ) {
int *docid = malloc(sizeof(int)); *docid = i;
float *scorez = malloc(sizeof(float)); *scorez = score;
heap_insert(&h, scorez, docid);
} else {
heap_min(&h, (void**)&min_key, (void**)&min_val);
if (score > *min_key) {
heap_delmin(&h, (void**)&min_key, (void**)&min_val);
int *docid = malloc(sizeof(int)); *docid = i;
float *scorez = malloc(sizeof(float)); *scorez = score;
heap_insert(&h, scorez, docid);
}
}
}
base += doclengths_ordered[i];
}
int rank = TOP_K;
while (heap_delmin(&h, (void**)&min_key, (void**)&min_val)) {
printf("MB%02d Q0 %ld %d %f Scan1\n", (n+1), tweetids[*min_val], rank, *min_key);
rank--;
}
heap_destroy(&h);
}
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("Total time = %f ms\n", time_spent * 1000);
printf("Time per query = %f ms\n", (time_spent * 1000)/NUM_TOPICS);
printf("Throughput: %f qps\n", NUM_TOPICS/time_spent);
}