Skip to content

Commit

Permalink
fix and tidy up
Browse files Browse the repository at this point in the history
  • Loading branch information
fergiemcdowall committed Mar 5, 2015
1 parent 46008e4 commit 51f20c2
Showing 1 changed file with 173 additions and 165 deletions.
338 changes: 173 additions & 165 deletions lib/search/searcher.js
Expand Up @@ -3,130 +3,29 @@ var searchIndexLogger = require('../logger/searchIndexLogger');
var stopwords = require('natural').stopwords;
var totalDocs = 0;

var sortOnValueDesc = function(a,b) {
if (a.value < b.value) return 1; if (a.value > b.value) return -1; return 0;
}
var sortOnValueAsc = function(a,b) {
if (a.value < b.value) return -1; if (a.value > b.value) return 1; return 0;
}
var sortOnKeyDesc = function(a,b) {
if (a.key < b.key) return 1; if (a.key > b.key) return -1; return 0;
}
var sortOnKeyAsc = function(a,b) {
if (a.key < b.key) return -1; if (a.key > b.key) return 1; return 0;
}



//used on startup
exports.setTotalDocs = function (td) {
totalDocs = td;
}

var getFacetSets = function(facetks, facetsSoFar, indexes, filterks, counter, q, callback) {
var thisFacetCat = facetks[counter].split('~')[3];
indexes.createReadStream({
valueEncoding: 'json',
limit: -1,
start: facetks[counter],
end: facetks[counter] + '~'})
.on('data', function (data) {
var thisFacetName = data.key.split('~')[4];

var filteredFacetSet = data.value; //its actually unfiltered at this point
//handle intersections for filters
for (var i = 0; i < filterks.length; i++) {
var filters = filterks[i].slice(0);
filteredFacetSet = intersectionDestructive(filteredFacetSet, filters);
}
if (!facetsSoFar[thisFacetCat][thisFacetName]) {
if (filteredFacetSet.length > 0)
facetsSoFar[thisFacetCat][thisFacetName] = filteredFacetSet;
}
else {
//handle intersections for multiword queries
var intersection = intersectionDestructive(facetsSoFar[thisFacetCat][thisFacetName], filteredFacetSet);
if (intersection.length > 0)
facetsSoFar[thisFacetCat][thisFacetName] = intersection;
else
delete facetsSoFar[thisFacetCat][thisFacetName];
}
})
.on('end', function () {
if (++counter < facetks.length) {getFacetSets(facetKeySet, facetsSoFar, indexes, filterks, counter, q);}
else {
//flatten sets to counts
var finalFacets = {};
for (k in facetsSoFar) {
finalFacets[k] = [];
for (kk in facetsSoFar[k]) {

//if facet is active add flag
var thisFacetEntry = {'key':kk,'value':facetsSoFar[k][kk].length};
if (q['filter']) if (q['filter'][k]) if (q['filter'][k].indexOf(kk) != -1)
thisFacetEntry['active'] = true;

finalFacets[k].push(thisFacetEntry);
}
if (q.facetSort) {
if (q.facetSort == 'keyAsc') finalFacets[k].sort(sortOnKeyAsc);
else if (q.facetSort == 'keyDesc') finalFacets[k].sort(sortOnKeyDesc);
else if (q.facetSort == 'valueAsc') finalFacets[k].sort(sortOnValueAsc);
else if (q.facetSort == 'valueDesc') finalFacets[k].sort(sortOnValueDesc);
}
else {
finalFacets[k].sort(sortOnValueDesc);
}
var facetLength = q.facetLength || 10;
finalFacets[k] = finalFacets[k].slice(0, facetLength);
}
//finished!
callback(finalFacets);
}
});
}


var getDocumentFreqencies = function(indexesMultiply, keySet, callback) {
//Check document frequencies
var err = false;
var docFreqs = {};
var tfsets = [];
var totalHits = 0;
indexesMultiply.get(keySet, function(err, data) {
for (k in data) {
if (data[k] == null) {
//return a zero result
docFreqs[keySet[i]] = 0;
searchIndexLogger.debug('KEY ' + k + ' NOT FOUND IN DICTIONARY');
}
else {
// totalHits = data[k].length;
docFreqs[k] = data[k].length;
tfsets.push(data[k]);
}
}
var intersection = tfsets[0];
for (var i = 1; i < tfsets.length; i++) {
intersection = intersectionDestructive(intersection, tfsets[i]);
}

if(intersection === undefined)
err = true;
else
totalHits = intersection.length;
callback(err, {docFreqs:docFreqs, allDocsIDsInResultSet:intersection});
});
};

exports.search = function (indexes, indexesMultiply, q, callback) {
var cleanQuery = {};
var canSearch = true;
var keySet = [];

var cleanQuery = getCleanQuery(q);
var filterKeySet = getFilterKeySet(q);
var keySet = getKeySet(q, cleanQuery);

var getFacets = function(q, indexes, callback) {
//if no facet request callback and return
if (!q.facets){callback({});return;}

var facetLength = 10;
if (q.facetLength) facetLength = q.facetLength;

var sortOnValueDesc = function(a,b) {if (a.value < b.value) return 1; if (a.value > b.value) return -1; return 0;}
var sortOnValueAsc = function(a,b) {if (a.value < b.value) return -1; if (a.value > b.value) return 1; return 0;}
var sortOnKeyDesc = function(a,b) {if (a.key < b.key) return 1; if (a.key > b.key) return -1; return 0;}
var sortOnKeyAsc = function(a,b) {if (a.key < b.key) return -1; if (a.key > b.key) return 1; return 0;}

var facetKeySet = [];
var filterKeySet = [];
for (var searchField in q.query) {
Expand Down Expand Up @@ -156,70 +55,88 @@ exports.search = function (indexes, indexesMultiply, q, callback) {
var facetsSoFar = {};
for (var i = 0; i < q.facets.length; i++)
facetsSoFar[q.facets[i]] = {};


//could be speeded up by always taking the least frequent keys first?
var getFacetSets = function(facetks, filterks, counter) {
var thisFacetCat = facetks[counter].split('~')[3];
indexes.createReadStream({
valueEncoding: 'json',
limit: -1,
start: facetks[counter],
end: facetks[counter] + '~'})
.on('data', function (data) {
var thisFacetName = data.key.split('~')[4];

var filteredFacetSet = data.value; //its actually unfiltered at this point
//handle intersections for filters
for (var i = 0; i < filterks.length; i++) {
var filters = filterks[i].slice(0);
filteredFacetSet = intersectionDestructive(filteredFacetSet, filters);
}
if (!facetsSoFar[thisFacetCat][thisFacetName]) {
if (filteredFacetSet.length > 0)
facetsSoFar[thisFacetCat][thisFacetName] = filteredFacetSet;
}
else {
//handle intersections for multiword queries
var intersection = intersectionDestructive(facetsSoFar[thisFacetCat][thisFacetName], filteredFacetSet);
if (intersection.length > 0)
facetsSoFar[thisFacetCat][thisFacetName] = intersection;
else
delete facetsSoFar[thisFacetCat][thisFacetName];
}
})
.on('end', function () {
if (++counter < facetks.length) {getFacetSets(facetKeySet, filterks, counter);}
else {
//flatten sets to counts
var finalFacets = {};
for (k in facetsSoFar) {
finalFacets[k] = [];
for (kk in facetsSoFar[k]) {

//if facet is active add flag
var thisFacetEntry = {'key':kk,'value':facetsSoFar[k][kk].length};
if (q['filter']) if (q['filter'][k]) if (q['filter'][k].indexOf(kk) != -1)
thisFacetEntry['active'] = true;

finalFacets[k].push(thisFacetEntry);
}
if (q.facetSort) {
if (q.facetSort == 'keyAsc') finalFacets[k].sort(sortOnKeyAsc);
else if (q.facetSort == 'keyDesc') finalFacets[k].sort(sortOnKeyDesc);
else if (q.facetSort == 'valueAsc') finalFacets[k].sort(sortOnValueAsc);
else if (q.facetSort == 'valueDesc') finalFacets[k].sort(sortOnValueDesc);
}
else {
finalFacets[k].sort(sortOnValueDesc);
}
finalFacets[k] = finalFacets[k].slice(0, facetLength);
}
//finished!
callback(finalFacets);
}
});
}



indexesMultiply.get(filterKeySet, function (err, data) {
var filters = [];
for (var i in data)
filters.push(data[i]);
getFacetSets(facetKeySet, facetsSoFar, indexes, filters, 0, q, function(msg){
callback(msg);
});
getFacetSets(facetKeySet, filters, 0);
});
};

//remove stopwords
cleanQuery['query'] = {};
for (var searchField in q['query']) {
var cleanQueryFieldToken = searchField;
cleanQuery['query'][cleanQueryFieldToken] = [];
for (var k = 0; k < q['query'][searchField].length; k++) {
cleanQuery['query'][cleanQueryFieldToken].push(q['query'][searchField][k]);
}
//TODO: does this make sense?
if (cleanQuery['query'][cleanQueryFieldToken].length == 0) {
canSearch = false;
}
}

if (q.weight) {
cleanQuery['weight'] = q.weight;
}

var filterKeySet = [];
for (k in q.filter) {
for (var i = 0; i < q.filter[k].length; i++) {
filterKeySet.push(k + '~' + q.filter[k][i]);
}
}
};

//generate keyset
// var RIKeySet = [];
for (var queryField in cleanQuery.query) {
for (var j = 0; j < cleanQuery.query[queryField].length; j++) {
if (q.filter) {
for (k in q.filter) {
for (var i = 0; i < q.filter[k].length; i++) {
keySet.push('TF~' + queryField + '~' + cleanQuery.query[queryField][j] + '~'
+ k + '~' + q.filter[k][i]);
}
}
}
else {
keySet.push('TF~' + queryField + '~' + cleanQuery.query[queryField][j] + '~~');
// RIKeySet.push('RI~' + queryField + '~' + cleanQuery.query[queryField][j] + '~~');
}
}
}


var getEmptyResultSet = function() {
var resultSet = {};
resultSet['hits'] = [];
return resultSet;
}


getDocumentFreqencies(indexesMultiply, keySet, function(err, frequencies) {
getDocumentFreqencies(indexesMultiply, q, keySet, function(err, frequencies) {
if (err) return callback(getEmptyResultSet());
getFacets(q, indexes, function(facets) {
var response = {};
Expand Down Expand Up @@ -367,3 +284,94 @@ function intersectionDestructive(a, b) {
}
return result;
}


var getCleanQuery = function(q) {
var cleanQuery = {};
//remove stopwords
cleanQuery['query'] = {};
for (var searchField in q['query']) {
var cleanQueryFieldToken = searchField;
cleanQuery['query'][cleanQueryFieldToken] = [];
for (var k = 0; k < q['query'][searchField].length; k++) {
cleanQuery['query'][cleanQueryFieldToken].push(q['query'][searchField][k]);
}
}
if (q.weight) {
cleanQuery['weight'] = q.weight;
}
return cleanQuery;
}

var getFilterKeySet = function(q) {
var filterKeySet = [];
for (k in q.filter) {
for (var i = 0; i < q.filter[k].length; i++) {
filterKeySet.push(k + '~' + q.filter[k][i]);
}
}
return filterKeySet;
}

var getKeySet = function(q, cleanQuery) {
//generate keyset
var keySet = [];
for (var queryField in cleanQuery.query) {
for (var j = 0; j < cleanQuery.query[queryField].length; j++) {
if (q.filter) {
for (k in q.filter) {
for (var i = 0; i < q.filter[k].length; i++) {
keySet.push('TF~' + queryField + '~' + cleanQuery.query[queryField][j] + '~'
+ k + '~' + q.filter[k][i]);
}
}
}
else {
keySet.push('TF~' + queryField + '~' + cleanQuery.query[queryField][j] + '~~');
}
}
}
return keySet;
}

var getEmptyResultSet = function() {
var resultSet = {};
resultSet['hits'] = [];
return resultSet;
}

var getDocumentFreqencies = function(indexesMultiply, q, keySet, callback) {
//Check document frequencies
var err = false;
var docFreqs = {};
var tfsets = [];
var totalHits = 0;
if (!q.pageSize)
q.pageSize = 100; //default
if (!q.offset)
q.offset = 0; //default
indexesMultiply.get(keySet, function(err, data) {
for (k in data) {
if (data[k] == null) {
//return a zero result
docFreqs[keySet[i]] = 0;
searchIndexLogger.debug('KEY ' + k + ' NOT FOUND IN DICTIONARY');
}
else {
// totalHits = data[k].length;
docFreqs[k] = data[k].length;
tfsets.push(data[k]);
}
}
var intersection = tfsets[0];
for (var i = 1; i < tfsets.length; i++) {
intersection = intersectionDestructive(intersection, tfsets[i]);
}

if(intersection === undefined)
err = true;
else
totalHits = intersection.length;
callback(err, {docFreqs:docFreqs, allDocsIDsInResultSet:intersection});
});
};

0 comments on commit 51f20c2

Please sign in to comment.