diff --git a/package.json b/package.json index 5157fd8..7d8c935 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fergies-inverted-index", - "version": "13.0.0-rc.3", + "version": "13.0.0-rc.4", "description": "An inverted index that allows javascript objects to be easily serialised and retrieved using promises and map-reduce", "browser": "src/entrypoints/browser.js", "main": "src/entrypoints/node.js", diff --git a/src/main.js b/src/main.js index 9bfabc5..b0477de 100644 --- a/src/main.js +++ b/src/main.js @@ -17,12 +17,13 @@ export class Main { keyEncoding: charwise, valueEncoding: 'json' }), - tokenParser: new TokenParser(), ...ops } - const r = read(ops) - const w = write(ops) + const tokenParser = new TokenParser(ops.caseSensitive) + + const r = read(ops, tokenParser) + const w = write(ops, tokenParser) // timestamp with time of creation (if not created already) // note: async, so this is "fire and forget" @@ -58,7 +59,7 @@ export class Main { this.SORT = r.SORT this.STORE = ops.db this.TIMESTAMP_LAST_UPDATED = w.TIMESTAMP_LAST_UPDATED - this.TOKEN_PARSER = ops.tokenParser + this.TOKEN_PARSER = tokenParser } flattenMatchArrayInResults (results) { diff --git a/src/parseToken.js b/src/parseToken.js index 2918418..95cdcaa 100644 --- a/src/parseToken.js +++ b/src/parseToken.js @@ -6,14 +6,29 @@ charwise.HI = undefined // :. Turn key into json object that is of the // format {FIELD: ..., VALUE: {GTE: ..., LTE ...}} export class TokenParser { - constructor (availableFields = []) { - this.setAvailableFields(availableFields) + availableFields = [] + #caseSensitive + + constructor (caseSensitive) { + this.#caseSensitive = caseSensitive } - setAvailableFields (availableFields) { + setAvailableFields = availableFields => { this.availableFields = availableFields } + #setCaseSensitivity = token => { + const setCase = str => + this.#caseSensitive || typeof str !== 'string' ? str : str.toLowerCase() + return { + FIELD: token.FIELD.map(setCase), + VALUE: { + GTE: setCase(token.VALUE.GTE), + LTE: setCase(token.VALUE.LTE) + } + } + } + parse (token) { // case: // case: : @@ -30,23 +45,23 @@ export class TokenParser { // a part of the value. This accounts for occasions where the value itself // has a ':'. if (token.indexOf(':') === -1) { - return { + return this.#setCaseSensitivity({ FIELD: this.availableFields, VALUE: { GTE: token, LTE: token } - } + }) } const [field, ...value] = token.split(':') - return { + return this.#setCaseSensitivity({ FIELD: [field], VALUE: { GTE: value.join(':'), LTE: value.join(':') } - } + }) } if (typeof token === 'number') { @@ -103,14 +118,14 @@ export class TokenParser { // parse object FIELD if (typeof token.FIELD === 'undefined') { - return { + return this.#setCaseSensitivity({ FIELD: this.availableFields, ...token - } + }) } // Allow FIELD to be an array or a string token.FIELD = [token.FIELD].flat() - return token + return this.#setCaseSensitivity(token) } } diff --git a/src/read.js b/src/read.js index 103995b..e7f54ea 100644 --- a/src/read.js +++ b/src/read.js @@ -4,7 +4,7 @@ import charwise from 'charwise' charwise.LO = null charwise.HI = undefined -export default function (ops) { +export default function (ops, tokenParser) { const isString = s => typeof s === 'string' const queryReplace = token => { @@ -32,18 +32,6 @@ export default function (ops) { return token } - const setCaseSensitivity = token => { - const setCase = str => - ops.caseSensitive || typeof str !== 'string' ? str : str.toLowerCase() - return { - FIELD: token.FIELD.map(setCase), - VALUE: { - GTE: setCase(token.VALUE.GTE), - LTE: setCase(token.VALUE.LTE) - } - } - } - // If this token is a stopword then return 'undefined' const removeStopwords = token => token.VALUE.GTE === token.VALUE.LTE && @@ -67,9 +55,10 @@ export default function (ops) { try { testForBreak(token) - token = ops.tokenParser.parse(token) + token = tokenParser.parse(token) + // testForBreak(token) // ? - token = await setCaseSensitivity(token) + // token = await setCaseSensitivity(token) // testForBreak(token) // ? token = await removeStopwords(token) // testForBreak(token) // ? @@ -216,7 +205,7 @@ export default function (ops) { // return a bucket of IDs. Key is an object like this: // {gte:..., lte:...} (gte/lte == greater/less than or equal) const BUCKET = token => { - token = ops.tokenParser.parse(token) + token = tokenParser.parse(token) return GET(token).then(result => ({ _id: [...result.reduce((acc, cur) => acc.add(cur._id), new Set())].sort(), VALUE: token.VALUE, @@ -245,7 +234,7 @@ export default function (ops) { const BOUNDING_VALUE = (token, reverse) => RANGE({ - ...ops.tokenParser.parse(token), + ...tokenParser.parse(token), LIMIT: 1, REVERSE: reverse }).then(max => @@ -265,7 +254,7 @@ export default function (ops) { ) const DIST = token => { - token = ops.tokenParser.parse(token) + token = tokenParser.parse(token) return Promise.all( token.FIELD.map(field => { let lte = token.VALUE.LTE @@ -313,7 +302,7 @@ export default function (ops) { ) const FACET = token => { - token = ops.tokenParser.parse(token) + token = tokenParser.parse(token) return Promise.all( token.FIELD.map(field => getRange({ diff --git a/src/write.js b/src/write.js index 7a158bb..c2ac382 100644 --- a/src/write.js +++ b/src/write.js @@ -1,7 +1,7 @@ import trav from 'traverse' import reader from './read.js' -export default function (ops) { +export default function (ops, tokenParser) { // TODO: set reset this to the max value every time the DB is restarted let incrementalId = 0 @@ -32,7 +32,13 @@ export default function (ops) { if (!ops.stopwords.includes(this.node)) { const key = JSON.stringify([ fieldName, - [this.node].flat(Infinity) + [this.node] + .flat(Infinity) + .map(item => + typeof item === 'string' && !ops.caseSensitive + ? item.toLowerCase() + : item + ) ]) // bump to lower case if not case sensitive keys.push(ops.caseSensitive ? key : key.toLowerCase()) @@ -207,7 +213,7 @@ export default function (ops) { ) ) .then(() => reader(ops).FIELDS()) - .then(fields => ops.tokenParser.setAvailableFields(fields)) + .then(fields => tokenParser.setAvailableFields(fields)) const PUT = (docs, putOptions = {}) => writer( @@ -223,7 +229,7 @@ export default function (ops) { .then(TIMESTAMP_LAST_UPDATED) .then(async passThrough => { // TODO: reader should not be inited here - ops.tokenParser.setAvailableFields(await reader(ops).FIELDS()) + tokenParser.setAvailableFields(await reader(ops).FIELDS()) return passThrough }) diff --git a/test/src/DISTINCT-test.js b/test/src/DISTINCT-test.js index 30e07a6..105af36 100644 --- a/test/src/DISTINCT-test.js +++ b/test/src/DISTINCT-test.js @@ -101,7 +101,13 @@ const data = [ test('create index', t => { t.plan(1) - t.ok((global[indexName] = new InvertedIndex({ name: indexName })), !undefined) + t.ok( + (global[indexName] = new InvertedIndex({ + name: indexName, + caseSensitive: false + })), + !undefined + ) }) test('can add some data', t => { @@ -117,10 +123,10 @@ test('get DISTINCT values for one field', t => { }) .then(result => t.deepEqual(result, [ - { FIELD: 'drivetrain', VALUE: 'Diesel' }, - { FIELD: 'drivetrain', VALUE: 'Electric' }, - { FIELD: 'drivetrain', VALUE: 'Hybrid' }, - { FIELD: 'drivetrain', VALUE: 'Petrol' } + { FIELD: 'drivetrain', VALUE: 'diesel' }, + { FIELD: 'drivetrain', VALUE: 'electric' }, + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' } ]) ) }) @@ -133,13 +139,13 @@ test('get DISTINCT values for two fields', t => { }) .then(result => t.deepEqual(result, [ - { FIELD: 'drivetrain', VALUE: 'Diesel' }, - { FIELD: 'drivetrain', VALUE: 'Electric' }, - { FIELD: 'drivetrain', VALUE: 'Hybrid' }, - { FIELD: 'drivetrain', VALUE: 'Petrol' }, - { FIELD: 'make', VALUE: 'BMW' }, - { FIELD: 'make', VALUE: 'Tesla' }, - { FIELD: 'make', VALUE: 'Volvo' } + { FIELD: 'drivetrain', VALUE: 'diesel' }, + { FIELD: 'drivetrain', VALUE: 'electric' }, + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' }, + { FIELD: 'make', VALUE: 'bmw' }, + { FIELD: 'make', VALUE: 'tesla' }, + { FIELD: 'make', VALUE: 'volvo' } ]) ) }) @@ -155,10 +161,29 @@ test('get DISTINCT values for two fields with GTE', t => { }) .then(result => t.deepEqual(result, [ - { FIELD: 'drivetrain', VALUE: 'Hybrid' }, - { FIELD: 'drivetrain', VALUE: 'Petrol' }, - { FIELD: 'make', VALUE: 'Tesla' }, - { FIELD: 'make', VALUE: 'Volvo' } + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' }, + { FIELD: 'make', VALUE: 'tesla' }, + { FIELD: 'make', VALUE: 'volvo' } + ]) + ) +}) + +test('get DISTINCT values for two fields with GTE (case insensitive)', t => { + t.plan(1) + global[indexName] + .DISTINCT({ + FIELD: ['drivetrain', 'make'], + VALUE: { + GTE: 'f' + } + }) + .then(result => + t.deepEqual(result, [ + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' }, + { FIELD: 'make', VALUE: 'tesla' }, + { FIELD: 'make', VALUE: 'volvo' } ]) ) }) @@ -176,13 +201,13 @@ test('get DISTINCT values with two clauses', t => { ) .then(result => t.deepEqual(result, [ - { FIELD: 'drivetrain', VALUE: 'Diesel' }, - { FIELD: 'drivetrain', VALUE: 'Electric' }, - { FIELD: 'drivetrain', VALUE: 'Hybrid' }, - { FIELD: 'drivetrain', VALUE: 'Petrol' }, - { FIELD: 'make', VALUE: 'BMW' }, - { FIELD: 'make', VALUE: 'Tesla' }, - { FIELD: 'make', VALUE: 'Volvo' } + { FIELD: 'drivetrain', VALUE: 'diesel' }, + { FIELD: 'drivetrain', VALUE: 'electric' }, + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' }, + { FIELD: 'make', VALUE: 'bmw' }, + { FIELD: 'make', VALUE: 'tesla' }, + { FIELD: 'make', VALUE: 'volvo' } ]) ) }) @@ -201,11 +226,11 @@ test('get DISTINCT values with two clauses', t => { ) .then(result => t.deepEqual(result, [ - { FIELD: 'drivetrain', VALUE: 'Diesel' }, - { FIELD: 'drivetrain', VALUE: 'Electric' }, - { FIELD: 'make', VALUE: 'BMW' }, - { FIELD: 'make', VALUE: 'Tesla' }, - { FIELD: 'make', VALUE: 'Volvo' } + { FIELD: 'drivetrain', VALUE: 'diesel' }, + { FIELD: 'drivetrain', VALUE: 'electric' }, + { FIELD: 'make', VALUE: 'bmw' }, + { FIELD: 'make', VALUE: 'tesla' }, + { FIELD: 'make', VALUE: 'volvo' } ]) ) }) @@ -223,10 +248,10 @@ test('get DISTINCT values with two identical clauses', t => { ) .then(result => t.deepEqual(result, [ - { FIELD: 'drivetrain', VALUE: 'Diesel' }, - { FIELD: 'drivetrain', VALUE: 'Electric' }, - { FIELD: 'drivetrain', VALUE: 'Hybrid' }, - { FIELD: 'drivetrain', VALUE: 'Petrol' } + { FIELD: 'drivetrain', VALUE: 'diesel' }, + { FIELD: 'drivetrain', VALUE: 'electric' }, + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' } ]) ) }) @@ -239,18 +264,18 @@ test('get DISTINCT values for three fields', t => { }) .then(result => t.deepEqual(result, [ - { FIELD: 'drivetrain', VALUE: 'Diesel' }, - { FIELD: 'drivetrain', VALUE: 'Electric' }, - { FIELD: 'drivetrain', VALUE: 'Hybrid' }, - { FIELD: 'drivetrain', VALUE: 'Petrol' }, - { FIELD: 'make', VALUE: 'BMW' }, - { FIELD: 'make', VALUE: 'Tesla' }, - { FIELD: 'make', VALUE: 'Volvo' }, - { FIELD: 'colour', VALUE: 'Black' }, - { FIELD: 'colour', VALUE: 'Blue' }, - { FIELD: 'colour', VALUE: 'Red' }, - { FIELD: 'colour', VALUE: 'Silver' }, - { FIELD: 'colour', VALUE: 'White' } + { FIELD: 'drivetrain', VALUE: 'diesel' }, + { FIELD: 'drivetrain', VALUE: 'electric' }, + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' }, + { FIELD: 'make', VALUE: 'bmw' }, + { FIELD: 'make', VALUE: 'tesla' }, + { FIELD: 'make', VALUE: 'volvo' }, + { FIELD: 'colour', VALUE: 'black' }, + { FIELD: 'colour', VALUE: 'blue' }, + { FIELD: 'colour', VALUE: 'red' }, + { FIELD: 'colour', VALUE: 'silver' }, + { FIELD: 'colour', VALUE: 'white' } ]) ) }) @@ -259,24 +284,24 @@ test('get DISTINCT values for ALL fields using {}', t => { t.plan(1) global[indexName].DISTINCT({}).then(result => t.deepEqual(result, [ - { FIELD: 'colour', VALUE: 'Black' }, - { FIELD: 'colour', VALUE: 'Blue' }, - { FIELD: 'colour', VALUE: 'Red' }, - { FIELD: 'colour', VALUE: 'Silver' }, - { FIELD: 'colour', VALUE: 'White' }, - { FIELD: 'drivetrain', VALUE: 'Diesel' }, - { FIELD: 'drivetrain', VALUE: 'Electric' }, - { FIELD: 'drivetrain', VALUE: 'Hybrid' }, - { FIELD: 'drivetrain', VALUE: 'Petrol' }, - { FIELD: 'make', VALUE: 'BMW' }, - { FIELD: 'make', VALUE: 'Tesla' }, - { FIELD: 'make', VALUE: 'Volvo' }, + { FIELD: 'colour', VALUE: 'black' }, + { FIELD: 'colour', VALUE: 'blue' }, + { FIELD: 'colour', VALUE: 'red' }, + { FIELD: 'colour', VALUE: 'silver' }, + { FIELD: 'colour', VALUE: 'white' }, + { FIELD: 'drivetrain', VALUE: 'diesel' }, + { FIELD: 'drivetrain', VALUE: 'electric' }, + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' }, + { FIELD: 'make', VALUE: 'bmw' }, + { FIELD: 'make', VALUE: 'tesla' }, + { FIELD: 'make', VALUE: 'volvo' }, { FIELD: 'model', VALUE: '3-series' }, { FIELD: 'model', VALUE: '5-series' }, - { FIELD: 'model', VALUE: 'S' }, - { FIELD: 'model', VALUE: 'X' }, - { FIELD: 'model', VALUE: 'XC60' }, - { FIELD: 'model', VALUE: 'XC90' }, + { FIELD: 'model', VALUE: 's' }, + { FIELD: 'model', VALUE: 'x' }, + { FIELD: 'model', VALUE: 'xc60' }, + { FIELD: 'model', VALUE: 'xc90' }, { FIELD: 'price', VALUE: 33114 }, { FIELD: 'price', VALUE: 37512 }, { FIELD: 'price', VALUE: 44274 }, @@ -305,24 +330,24 @@ test('get DISTINCT values for ALL fields using no param (DISTINCT())', t => { t.plan(1) global[indexName].DISTINCT().then(result => t.deepEqual(result, [ - { FIELD: 'colour', VALUE: 'Black' }, - { FIELD: 'colour', VALUE: 'Blue' }, - { FIELD: 'colour', VALUE: 'Red' }, - { FIELD: 'colour', VALUE: 'Silver' }, - { FIELD: 'colour', VALUE: 'White' }, - { FIELD: 'drivetrain', VALUE: 'Diesel' }, - { FIELD: 'drivetrain', VALUE: 'Electric' }, - { FIELD: 'drivetrain', VALUE: 'Hybrid' }, - { FIELD: 'drivetrain', VALUE: 'Petrol' }, - { FIELD: 'make', VALUE: 'BMW' }, - { FIELD: 'make', VALUE: 'Tesla' }, - { FIELD: 'make', VALUE: 'Volvo' }, + { FIELD: 'colour', VALUE: 'black' }, + { FIELD: 'colour', VALUE: 'blue' }, + { FIELD: 'colour', VALUE: 'red' }, + { FIELD: 'colour', VALUE: 'silver' }, + { FIELD: 'colour', VALUE: 'white' }, + { FIELD: 'drivetrain', VALUE: 'diesel' }, + { FIELD: 'drivetrain', VALUE: 'electric' }, + { FIELD: 'drivetrain', VALUE: 'hybrid' }, + { FIELD: 'drivetrain', VALUE: 'petrol' }, + { FIELD: 'make', VALUE: 'bmw' }, + { FIELD: 'make', VALUE: 'tesla' }, + { FIELD: 'make', VALUE: 'volvo' }, { FIELD: 'model', VALUE: '3-series' }, { FIELD: 'model', VALUE: '5-series' }, - { FIELD: 'model', VALUE: 'S' }, - { FIELD: 'model', VALUE: 'X' }, - { FIELD: 'model', VALUE: 'XC60' }, - { FIELD: 'model', VALUE: 'XC90' }, + { FIELD: 'model', VALUE: 's' }, + { FIELD: 'model', VALUE: 'x' }, + { FIELD: 'model', VALUE: 'xc60' }, + { FIELD: 'model', VALUE: 'xc90' }, { FIELD: 'price', VALUE: 33114 }, { FIELD: 'price', VALUE: 37512 }, { FIELD: 'price', VALUE: 44274 },