/
index.js
111 lines (85 loc) · 2.75 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
function InvertedIndex(options) {
options = options || {};
this._terms = {};
this._dcmp = options.compareDocument || function(l,r) { return l - r; };
}
InvertedIndex.prototype._pushTerm = function(doc, stats) {
if (this._dcmp(doc, stats.postings[stats.postings.length-1]) !== 0) {
stats.documentFrequency++;
stats.postings.push(doc);
}
}
InvertedIndex.prototype._spliceTerm = function(doc, stats) {
// TODO: optimise binary search
// if you're hitting this method you prboably aren't too concerned with
// performance anyway...
var ps = stats.postings,
l = ps.length;
for (var i = 0; i < l; ++i) {
var d = this._dcmp(doc, ps[i]);
if (d === 0) {
return;
} else if (d < 0) {
stats.documentFrequency++;
ps.splice(i, 0, doc);
return;
}
}
stats.documentFrequency++;
ps.push(doc);
}
InvertedIndex.prototype._newTerm = function(term, doc) {
this._terms[term] = {
documentFrequency : 1,
postings : [doc]
};
}
InvertedIndex.prototype.postDocument = function(doc, terms, cb) {
for (var i = 0, l = terms.length; i < l; ++i) {
var t = terms[i],
termStats = this._terms[t];
if (termStats) {
this._pushTerm(doc, termStats);
} else {
this._newTerm(t, doc);
}
}
cb && process.nextTick(cb);
}
InvertedIndex.prototype.postDocumentUnsorted = function(doc, terms, cb) {
for (var i = 0, l = terms.length; i < l; ++i) {
var t = terms[i],
termStats = this._terms[t];
if (termStats) {
this._spliceTerm(doc, termStats);
} else {
this._newTerm(t, doc);
}
}
cb && process.nextTick(cb);
}
InvertedIndex.prototype.postTerm = function(doc, term, cb) {
return this.postDocument(doc, [term], cb);
}
InvertedIndex.prototype.postTermUnsorted = function(doc, term, cb) {
return this.postDocumentUnsorted(doc, [term], cb);
}
InvertedIndex.prototype.containsTerm = function(term, cb) {
var result = term in this._terms
process.nextTick(function() { cb(result); });
}
InvertedIndex.prototype.documentFrequencyOfTerm = function(term, cb) {
var result = term in this._terms
? this._terms[term].documentFrequency
: 0;
process.nextTick(function() { cb(result); });
}
InvertedIndex.prototype.postingsForTerm = function(term, cb) {
var result = term in this._terms
? this._terms[term].postings
: [];
process.nextTick(function() { cb(result); });
}
module.exports = function(options) {
return new InvertedIndex(options);
}