Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 349 lines (300 sloc) 8.218 kB
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
1 #include <assert.h>
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
2 #include <math.h>
85f6136 @abhinav-upadhyay apropos.c: Implemented remove_stopwords to remove stopwords from the …
authored
3 #include <search.h>
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7
8 #include "sqlite3.h"
9
dbd309f @abhinav-upadhyay apropos.c: Small refactoring. Now using macro DBPATH to represent pat…
authored
10 #define DBPATH "./apropos.db"
11
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
12 static double get_tf(int, const char *);
13 static double get_idf(const char *);
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
14 static void rank_func(sqlite3_context *, int, sqlite3_value **);
85f6136 @abhinav-upadhyay apropos.c: Implemented remove_stopwords to remove stopwords from the …
authored
15 static void remove_stopwords(char **);
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
16 static int search(const char *);
17 static void usage(void);
18
19 int
20 main(int argc, char *argv[])
21 {
22 char *query = NULL; // the user query
23
24 if (argc < 2)
25 usage();
26
27 query = argv[1];
85f6136 @abhinav-upadhyay apropos.c: Implemented remove_stopwords to remove stopwords from the …
authored
28 remove_stopwords(&query);
29
30 /* if any error occured in remove_stopwords, we continue with the initial
31 * query input by the user
32 */
33 if (query == NULL)
34 query = argv[1];
35 else if (!strcmp(query, "")) {
36 fprintf(stderr, "Try specifying more relevant keywords to get some matches\n");
37 exit(1);
38 }
39
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
40 if (search(query) < 0)
41 return -1;
42 return 0;
43
44 }
45
46 /*
47 * search --
48 * Opens apropos.db and performs the search for the keywords entered by the user
49 */
50 static int
51 search(const char *query)
52 {
53 sqlite3 *db = NULL;
54 int rc = 0;
55 int idx = -1;
56 char *sqlstr = NULL;
57 char *name = NULL;
87216ed @abhinav-upadhyay makemandb.c: Added function get_section() which extracts the section …
authored
58 char *section = NULL;
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
59 char *snippet = NULL;
60 sqlite3_stmt *stmt = NULL;
61
62 sqlite3_initialize();
dbd309f @abhinav-upadhyay apropos.c: Small refactoring. Now using macro DBPATH to represent pat…
authored
63 rc = sqlite3_open_v2(DBPATH, &db, SQLITE_OPEN_READONLY, NULL);
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
64 if (rc != SQLITE_OK) {
65 fprintf(stderr, "Database does not exist. Try running makemandb and "
66 "then try again\n");
67 sqlite3_close(db);
68 sqlite3_shutdown();
69 return -1;
70 }
71
72 sqlite3_extended_result_codes(db, 1);
73
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
74 rc = sqlite3_create_function(db, "rank_func", 2, SQLITE_ANY, NULL,
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
75 rank_func, NULL, NULL);
76 if (rc != SQLITE_OK) {
77 fprintf(stderr, "Not able to register function\n");
78 sqlite3_close(db);
79 sqlite3_shutdown();
80 exit(-1);
81 }
82
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
83 sqlstr = "select docid, section, name, snippet(mandb, \"\033[1m\", \"\033[0m\", \"...\" ) "
84 "from mandb where mandb match :query order by rank_func(docid, :query) limit 10 OFFSET 0";
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
85
86 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
87 if (rc != SQLITE_OK) {
88 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
89 sqlite3_close(db);
90 sqlite3_shutdown();
91 return -1;
92 }
93
94 idx = sqlite3_bind_parameter_index(stmt, ":query");
95 rc = sqlite3_bind_text(stmt, idx, query, -1, NULL);
96 if (rc != SQLITE_OK) {
97 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
98 sqlite3_finalize(stmt);
99 sqlite3_close(db);
100 sqlite3_shutdown();
101 return -1;
102 }
103
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
104 idx = sqlite3_bind_parameter_index(stmt, ":query");
105 rc = sqlite3_bind_text(stmt, idx, query, -1, NULL);
106 if (rc != SQLITE_OK) {
107 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
108 sqlite3_finalize(stmt);
109 sqlite3_close(db);
110 sqlite3_shutdown();
111 return -1;
112 }
113
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
114 while (sqlite3_step(stmt) == SQLITE_ROW) {
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
115 section = (char *) sqlite3_column_text(stmt, 1);
116 name = (char *) sqlite3_column_text(stmt, 2);
117 snippet = (char *) sqlite3_column_text(stmt, 3);
87216ed @abhinav-upadhyay makemandb.c: Added function get_section() which extracts the section …
authored
118 printf("%s(%s)\n%s\n\n", name, section, snippet);
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
119 }
120
121 sqlite3_finalize(stmt);
122 sqlite3_close(db);
123 sqlite3_shutdown();
124
125 return 0;
126
127 }
85f6136 @abhinav-upadhyay apropos.c: Implemented remove_stopwords to remove stopwords from the …
authored
128 /*
129 * remove_stopwords--
130 * Scans the query and removes any stop words from it.
131 * It scans the query word by word, and looks up a hash table of stop words
132 * to check if it is a stopword or a valid keyword. In the we only have the
133 * relevant keywords left in the query.
134 * Error Cases:
135 * 1. In case of any error, it will set the query to NULL.
136 * 2. In case the query is found to be consisting only of stop words, it will
137 * set the query to a blank string ""
138 */
139 static void
140 remove_stopwords(char **query)
141 {
142 int i = 0;
143 char *temp, *buf = NULL;
4ca2b33 @abhinav-upadhyay Added functions build_term_weights, compute_term_weight, and store_te…
authored
144 char *stopwords[] = {"a", "about", "also", "all", "an", "another", "and", "are", "be",
145 "how", "is", "new", "or", "the", "to", "how", "what", "when", "which", "why", NULL};
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
146
85f6136 @abhinav-upadhyay apropos.c: Implemented remove_stopwords to remove stopwords from the …
authored
147 /* initialize the hash table for stop words */
148 if (!hcreate(sizeof(stopwords) * sizeof(char)))
149 return;
150
151 /* store the stopwords in the hashtable */
152 for (temp = stopwords[i]; temp != NULL; temp = stopwords[i++]) {
153 ENTRY ent;
154 ent.key = strdup(temp);
155 ent.data = (void *) "y";
156 hsearch(ent, ENTER);
157 }
158
159 /* filter out the stop words from the query */
160 for (temp = strtok(*query, " "); temp; temp = strtok(NULL, " ")) {
161 ENTRY ent;
162 ent.key = temp;
163 ent.data = NULL;
164 if (hsearch(ent, FIND) == NULL) {
165 if (buf == NULL) {
166 if ((buf = strdup(temp)) == NULL) {
167 *query = NULL;
168 hdestroy();
169 return;
170 }
171 }
172 else {
173 if ((buf = realloc(buf, strlen(buf) + strlen(temp) + 2)) == NULL) {
174 *query = NULL;
175 hdestroy();
176 return;
177 }
178 strcat(buf, " ");
179 strcat(buf, temp);
180 }
181
182 }
183 }
184
185 hdestroy();
186 if (buf != NULL)
187 *query = strdup(buf);
188 else
189 *query = strdup("");
190 free(buf);
191 }
e653ec9 @abhinav-upadhyay apropos.c: Wrote a bare bones implementation of apropos.
authored
192
193 /*
194 * usage --
195 * print usage message and die
196 */
197 static void
198 usage(void)
199 {
200
201 (void)fprintf(stderr,
202 "usage: %s query\n", getprogname());
203 exit(1);
204 }
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
205
206 /*
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
207 * rank_func
208 * Sqlite user defined function for ranking the documents.
209 * For each phrase of the query, it fetches the tf and idf from the db and adds them over.
210 * It computes the final rank, by multiplying tf and idf together.
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
211 */
212 static void
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
213 rank_func(sqlite3_context *pCtx, int nVal, sqlite3_value **apval)
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
214 {
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
215 int docid;
216 char *query, *temp;
217 double tf = 0.0;
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
218 double idf = 0.0;
219 double score = 0.0;
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
220
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
221 /* Check that the number of arguments passed to this function is correct.
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
222 ** If not, jump to wrong_number_args.
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
223 */
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
224 if( nVal != 2 ) {
225 fprintf(stderr, "nval != ncol\n");
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
226 goto wrong_number_args;
227 }
228
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
229 docid = (int)sqlite3_value_blob(apval[0]);
230 query = strdup((char *) sqlite3_value_blob(apval[1]));
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
231
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
232 for (temp = strtok(query, " "); temp; temp = strtok(NULL, " ")) {
233 tf += get_tf(docid, temp);
234 idf += get_idf(temp);
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
235 }
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
236
237 score = tf * idf;
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
238
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
239 sqlite3_result_double(pCtx, score);
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
240 free(query);
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
241 return;
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
242
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
243 /* Jump here if the wrong number of arguments are passed to this function */
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
244 wrong_number_args:
6b8d3bd @abhinav-upadhyay Tweaked the ranking function a bit. Also updated the code of the rank…
authored
245 sqlite3_result_error(pCtx, "wrong number of arguments to function rank()", -1);
001a679 @abhinav-upadhyay apropos.c: Implemented a ranking function, picked it up from Sqlite's…
authored
246 }
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
247
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
248 static double
249 get_tf(int docid, const char *term)
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
250 {
251 sqlite3 *db = NULL;
252 int rc = 0;
253 int idx = -1;
254 char *sqlstr = NULL;
255 sqlite3_stmt *stmt = NULL;
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
256 double ret_val = 0.0;
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
257
258 sqlite3_initialize();
259 rc = sqlite3_open_v2(DBPATH, &db, SQLITE_OPEN_READONLY, NULL);
260 if (rc != SQLITE_OK) {
261 sqlite3_close(db);
262 sqlite3_shutdown();
263 return 0;
264 }
265
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
266 sqlstr = "select tf from mandb_tf where docid = :docid and term = :term";
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
267 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
268 if (rc != SQLITE_OK) {
269 sqlite3_close(db);
270 sqlite3_shutdown();
271 return 0;
272 }
273
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
274 idx = sqlite3_bind_parameter_index(stmt, ":docid");
275 rc = sqlite3_bind_int(stmt, idx, docid);
276 if (rc != SQLITE_OK) {
277 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
278 sqlite3_finalize(stmt);
279 sqlite3_close(db);
280 sqlite3_shutdown();
281 return -1;
282 }
283
284 idx = sqlite3_bind_parameter_index(stmt, ":term");
285 rc = sqlite3_bind_text(stmt, idx, term, -1, NULL);
286 if (rc != SQLITE_OK) {
287 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
288 sqlite3_finalize(stmt);
289 sqlite3_close(db);
290 sqlite3_shutdown();
291 return 0.0;
292 }
293
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
294 if (sqlite3_step(stmt) == SQLITE_ROW) {
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
295 ret_val = (double) sqlite3_column_double(stmt, 0);
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
296 }
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
297
298 sqlite3_finalize(stmt);
299 sqlite3_close(db);
300 sqlite3_shutdown();
301 return ret_val;
302 }
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
303
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
304 static double
305 get_idf(const char *term)
306 {
307 sqlite3 *db = NULL;
308 int rc = 0;
309 int idx = -1;
310 char *sqlstr = NULL;
311 sqlite3_stmt *stmt = NULL;
312 double ret_val = 0.0;
313
314 sqlite3_initialize();
315 rc = sqlite3_open_v2(DBPATH, &db, SQLITE_OPEN_READONLY, NULL);
316 if (rc != SQLITE_OK) {
317 sqlite3_close(db);
318 sqlite3_shutdown();
319 return 0.0;
320 }
321
322 sqlstr = "select idf from mandb_idf where term = :term";
323 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
324 if (rc != SQLITE_OK) {
325 sqlite3_close(db);
326 sqlite3_shutdown();
327 return 0.0;
328 }
329
330 idx = sqlite3_bind_parameter_index(stmt, ":term");
331 rc = sqlite3_bind_text(stmt, idx, term, -1, NULL);
332 if (rc != SQLITE_OK) {
333 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
334 sqlite3_finalize(stmt);
335 sqlite3_close(db);
336 sqlite3_shutdown();
337 return 0.0;
338 }
339
340 if (sqlite3_step(stmt) == SQLITE_ROW) {
341 ret_val = (double) sqlite3_column_double(stmt, 0);
342 }
343
344 sqlite3_finalize(stmt);
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
345 sqlite3_close(db);
346 sqlite3_shutdown();
76817d1 @abhinav-upadhyay Modified the ranking function to fetch the tf and idf from the databa…
authored
347 return ret_val;
598e8f8 @abhinav-upadhyay Implemented tf-idf ranking
authored
348 }
Something went wrong with that request. Please try again.