Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
AJ ONeal
committed
Jul 21, 2012
0 parents
commit 84b7eff
Showing
6 changed files
with
196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,3 @@ | |||
*.sqlext | |||
*.sqlite3 | |||
*.sw* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,13 @@ | |||
all: linux | |||
#Linux | |||
|
|||
linux: | |||
gcc -shared -fPIC -Isqlite3 -o fts4-rank.sqlext fts4-rank.c | |||
#Using GCC on Mac OSX | |||
|
|||
darwin: | |||
gcc -bundle -fPIC -Isqlite3 -o fts4-rank.sqlext fts4-rank.c | |||
|
|||
win32: | |||
#Microsoft Tools on Windows | |||
cl /Gd fts4-rank.c /I sqlite3 /DDLL /LD /link /export:sqlite3_extension_init /out:fts4-rank.sqlext |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,20 @@ | |||
Loadable Extensions in SQLite | |||
=== | |||
|
|||
There is a wonderful [example of a rank extension](http://www.sqlite.org/fts3.html#appendix_a) provided in the sqlite3 documentation for full-text search. | |||
However, the example requires building an extension which is not explicitly detailed in the example. | |||
This repository is for the sole purpose of building the un-adulterated example from the docs. | |||
|
|||
git clone http://github.com/coolaj86/sqlite3-fts4-rank.git | |||
cd sqlite3-fts4-rank | |||
make linux # or darwin (OSX) or win32 (Visual Studio) | |||
|
|||
sqlite3 --init test-fts4.sql ':memory:' | |||
|
|||
See Also | |||
=== | |||
|
|||
* <http://www.sqlite.org/cvstrac/wiki?p=LoadableExtensions> | |||
* <http://www.sqlite.org/c3ref/create_function.html> | |||
* <http://www.sqlite.org/fts3.html#appendix_a> | |||
* <http://www.sqlite.org/c3ref/load_extension.html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,103 @@ | |||
#include <sqlite3ext.h> | |||
#include <assert.h> | |||
SQLITE_EXTENSION_INIT1 | |||
|
|||
/* | |||
** SQLite user defined function to use with matchinfo() to calculate the | |||
** relevancy of an FTS match. The value returned is the relevancy score | |||
** (a real value greater than or equal to zero). A larger value indicates | |||
** a more relevant document. | |||
** | |||
** The overall relevancy returned is the sum of the relevancies of each | |||
** column value in the FTS table. The relevancy of a column value is the | |||
** sum of the following for each reportable phrase in the FTS query: | |||
** | |||
** (<hit count> / <global hit count>) * <column weight> | |||
** | |||
** where <hit count> is the number of instances of the phrase in the | |||
** column value of the current row and <global hit count> is the number | |||
** of instances of the phrase in the same column of all rows in the FTS | |||
** table. The <column weight> is a weighting factor assigned to each | |||
** column by the caller (see below). | |||
** | |||
** The first argument to this function must be the return value of the FTS | |||
** matchinfo() function. Following this must be one argument for each column | |||
** of the FTS table containing a numeric weight factor for the corresponding | |||
** column. Example: | |||
** | |||
** CREATE VIRTUAL TABLE documents USING fts3(title, content) | |||
** | |||
** The following query returns the docids of documents that match the full-text | |||
** query <query> sorted from most to least relevant. When calculating | |||
** relevance, query term instances in the 'title' column are given twice the | |||
** weighting of those in the 'content' column. | |||
** | |||
** SELECT docid FROM documents | |||
** WHERE documents MATCH <query> | |||
** ORDER BY rank(matchinfo(documents), 1.0, 0.5) DESC | |||
*/ | |||
static void rankfunc(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal){ | |||
int *aMatchinfo; /* Return value of matchinfo() */ | |||
int nCol; /* Number of columns in the table */ | |||
int nPhrase; /* Number of phrases in the query */ | |||
int iPhrase; /* Current phrase */ | |||
double score = 0.0; /* Value to return */ | |||
|
|||
assert( sizeof(int)==4 ); | |||
|
|||
/* Check that the number of arguments passed to this function is correct. | |||
** If not, jump to wrong_number_args. Set aMatchinfo to point to the array | |||
** of unsigned integer values returned by FTS function matchinfo. Set | |||
** nPhrase to contain the number of reportable phrases in the users full-text | |||
** query, and nCol to the number of columns in the table. | |||
*/ | |||
if( nVal<1 ) goto wrong_number_args; | |||
aMatchinfo = (unsigned int *)sqlite3_value_blob(apVal[0]); | |||
nPhrase = aMatchinfo[0]; | |||
nCol = aMatchinfo[1]; | |||
if( nVal!=(1+nCol) ) goto wrong_number_args; | |||
|
|||
/* Iterate through each phrase in the users query. */ | |||
for(iPhrase=0; iPhrase<nPhrase; iPhrase++){ | |||
int iCol; /* Current column */ | |||
|
|||
/* Now iterate through each column in the users query. For each column, | |||
** increment the relevancy score by: | |||
** | |||
** (<hit count> / <global hit count>) * <column weight> | |||
** | |||
** aPhraseinfo[] points to the start of the data for phrase iPhrase. So | |||
** the hit count and global hit counts for each column are found in | |||
** aPhraseinfo[iCol*3] and aPhraseinfo[iCol*3+1], respectively. | |||
*/ | |||
int *aPhraseinfo = &aMatchinfo[2 + iPhrase*nCol*3]; | |||
for(iCol=0; iCol<nCol; iCol++){ | |||
int nHitCount = aPhraseinfo[3*iCol]; | |||
int nGlobalHitCount = aPhraseinfo[3*iCol+1]; | |||
double weight = sqlite3_value_double(apVal[iCol+1]); | |||
if( nHitCount>0 ){ | |||
score += ((double)nHitCount / (double)nGlobalHitCount) * weight; | |||
} | |||
} | |||
} | |||
|
|||
sqlite3_result_double(pCtx, score); | |||
return; | |||
|
|||
/* Jump here if the wrong number of arguments are passed to this function */ | |||
wrong_number_args: | |||
sqlite3_result_error(pCtx, "wrong number of arguments to function rank()", -1); | |||
} | |||
|
|||
/* SQLite invokes this routine once when it loads the extension. | |||
** Create new functions, collating sequences, and virtual table | |||
** modules here. This is usually the only exported symbol in | |||
** the shared library. | |||
*/ | |||
int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi) { | |||
SQLITE_EXTENSION_INIT2(pApi) | |||
|
|||
// rank call | |||
sqlite3_create_function(db, "rank", 3, SQLITE_ANY, 0, rankfunc, 0, 0); | |||
return 0; | |||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,18 @@ | |||
.load fts4-rank.sqlext | |||
|
|||
-- This example (and all others in this section) assumes the following schema | |||
CREATE VIRTUAL TABLE documents USING fts4(title, content); | |||
|
|||
INSERT INTO mail VALUES('hello world', 'This message is a hello world message.'); | |||
INSERT INTO mail VALUES('urgent: serious', 'This mail is seen as a more serious mail'); | |||
|
|||
SELECT title FROM documents WHERE documents MATCH 'effect* known'; | |||
|
|||
SELECT title FROM documents JOIN ( | |||
SELECT docid, rank(matchinfo(documents)) AS rank | |||
FROM documents | |||
WHERE documents MATCH 'effect* known once' | |||
ORDER BY rank DESC | |||
LIMIT 10 OFFSET 0 | |||
) AS ranktable USING(docid) | |||
ORDER BY ranktable.rank DESC; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,39 @@ | |||
-- Create and populate an FTS4 table with two columns: | |||
CREATE VIRTUAL TABLE t1 USING fts4(a, b); | |||
INSERT INTO t1 VALUES('transaction default models default', 'Non transaction reads'); | |||
INSERT INTO t1 VALUES('the default transaction', 'these semantics present'); | |||
INSERT INTO t1 VALUES('single request', 'default data'); | |||
|
|||
-- In the following query, no format string is specified and so it defaults | |||
-- to "pcx". It therefore returns a single row consisting of a single blob | |||
-- value 80 bytes in size (20 32-bit integers - 1 for "p", 1 for "c" and | |||
-- 3*2*3 for "x"). If each block of 4 bytes in the blob is interpreted | |||
-- as an unsigned integer in machine byte-order, the values will be: | |||
-- | |||
-- 3 2 1 3 2 0 1 1 1 2 2 0 1 1 0 0 0 1 1 1 | |||
-- | |||
-- The row returned corresponds to the second entry inserted into table t1. | |||
-- The first two integers in the blob show that the query contained three | |||
-- phrases and the table being queried has two columns. The next block of | |||
-- three integers describes column 0 (in this case column "a") and phrase | |||
-- 0 (in this case "default"). The current row contains 1 hit for "default" | |||
-- in column 0, of a total of 3 hits for "default" that occur in column | |||
-- 0 of any table row. The 3 hits are spread across 2 different rows. | |||
-- | |||
-- The next set of three integers (0 1 1) pertain to the hits for "default" | |||
-- in column 1 of the table (0 in this row, 1 in all rows, spread across | |||
-- 1 rows). | |||
-- | |||
SELECT matchinfo(t1) FROM t1 WHERE t1 MATCH 'default transaction "these semantics"'; | |||
|
|||
-- The format string for this query is "ns". The output array will therefore | |||
-- contain 3 integer values - 1 for "n" and 2 for "s". The query returns | |||
-- two rows (the first two rows in the table match). The values returned are: | |||
-- | |||
-- 3 1 1 | |||
-- 3 2 0 | |||
-- | |||
-- The first value in the matchinfo array returned for both rows is 3 (the | |||
-- number of rows in the table). The following two values are the lengths | |||
-- of the longest common subsequence of phrase matches in each column. | |||
SELECT matchinfo(t1, 'ns') FROM t1 WHERE t1 MATCH 'default transaction'; |