update initialisation to use synchronous 'new' syntax

fergiemcdowall · Aug 11, 2023 · 527ef9d · 527ef9d
1 parent 5fdca45
commit 527ef9d
Show file tree

Hide file tree

Showing 29 changed files with 275 additions and 278 deletions.
diff --git a/README.md b/README.md
@@ -13,11 +13,11 @@ This lib will work in node and also in the browser
 ### Initialise and populate an index
 
 ```javascript
-import fii from 'fergies-inverted-index'
+import { InvertedIndex } from 'fergies-inverted-index'
 
-const db = fii()
+const { PUT, AND, BUCKETS, FACETS, OR, NOT, OBJECT, GET } = new InvertedIndex(ops)
 
-db.PUT([ /* my array of objects to be searched */ ]).then(doStuff)
+PUT([ /* my array of objects to be searched */ ]).then(doStuff)
 
 ```
 
@@ -28,10 +28,10 @@ db.PUT([ /* my array of objects to be searched */ ]).then(doStuff)
 // (given objects that contain: { land: <land>, colour: <colour>, population: <number> ... })
 
 // get all object IDs where land=SCOTLAND and colour=GREEN
-db.AND(|'land:SCOTLAND', 'colour:GREEN']).then(result)
+AND(|'land:SCOTLAND', 'colour:GREEN']).then(result)
 
 // the query strings above can alternatively be expressed using JSON objects
-db.AND([
+AND([
   {
     FIELD: 'land'
     VALUE: 'SCOTLAND'
@@ -42,43 +42,65 @@ db.AND([
 ]).then(result)
 
 // as above, but return whole objects
-db.AND(['land:SCOTLAND', 'colour:GREEN']).then(db.OBJECT).then(result)
+AND(['land:SCOTLAND', 'colour:GREEN']).then(OBJECT).then(result)
 
 // Get all object IDs where land=SCOTLAND, and those where land=IRELAND
-db.OR(['land:SCOTLAND', 'land:IRELAND']).then(result)
+OR(['land:SCOTLAND', 'land:IRELAND']).then(result)
 
 // queries can be embedded within each other
-db.AND([
+AND([
   'land:SCOTLAND',
-  db.OR(['colour:GREEN', 'colour:BLUE'])
+  OR(['colour:GREEN', 'colour:BLUE'])
 ]).then(result)
 
 // get all object IDs where land=SCOTLAND and colour is NOT GREEN
-db.NOT(
-  db.GET('land:SCOTLAND'),                 // everything in this set
-  db.GET('colour:GREEN', 'colour:RED').    // minus everything in this set
+NOT(
+  GET('land:SCOTLAND'),                 // everything in this set
+  GET('colour:GREEN', 'colour:RED').    // minus everything in this set
 ).then(result)
 
 // Get max population
-db.MAX('population').then(result)
+MAX('population').then(result)
+
+// Aggregate
+BUCKETS(
+  {
+    FIELD: ['year'],
+    VALUE: {
+      LTE: 2010
+    }
+  },
+  {
+    FIELD: ['year'],
+    VALUE: {
+      GTE: 2010
+    }
+  }
+).then(result)
 
+FACETS({
+  FIELD: 'year'
+}).then(result)
+
+//(see also AGGREGATION_FILTER)
 ```
 
 (See the [tests](https://github.com/fergiemcdowall/fergies-inverted-index/tree/master/test) for more examples.)
 
 
 ## API
 
-- <a href="#open"><code><b>fii()</b></code></a>
+- <a href="#InvertedIndex"><code><b>new InvertedIndex(ops)</b></code></a>
 - <a href="#AGGREGATION_FILTER"><code>db.<b>AGGREGATION_FILTER()</b></code></a>
 - <a href="#AND"><code>db.<b>AND()</b></code></a>
+- <a href="#BUCKET"><code>db.<b>BUCKET()</b></code></a>
 - <a href="#BUCKETS"><code>db.<b>BUCKETS()</b></code></a>
 - <a href="#CREATED"><code>db.<b>CREATED()</b></code></a>
 - <a href="#DELETE"><code>db.<b>DELETE()</b></code></a>
 - <a href="#DISTINCT"><code>db.<b>DISTINCT()</b></code></a>
 - <a href="#EXIST"><code>db.<b>EXIST()</b></code></a>
 - <a href="#EXPORT"><code>db.<b>EXPORT()</b></code></a>
-- <a href="#FACET"><code>db.<b>FACET()</b></code></a>
+- <a href="#FACETS"><code>db.<b>FACETS()</b></code></a>
 - <a href="#FIELDS"><code>db.<b>FIELDS()</b></code></a>
 - <a href="#GET"><code>db.<b>GET()</b></code></a>
 - <a href="#IMPORT"><code>db.<b>IMPORT()</b></code></a>
@@ -94,31 +116,27 @@ db.MAX('population').then(result)
 - <a href="#TIMESTAMP_LAST_UPDATED"><code>db.<b>TIMESTAMP_LAST_UPDATED</b></code></a>
 
 
-<a name="fii"></a>
+<a name="InvertedIndex"></a>
 
-### `fii(options)`
+### `InvertedIndex(options)`
 
-Returns a promise
+Returns an `InvertedIndex` instance
 
 ```javascript
-import fii from 'fergies-inverted-index'
+import { InvertedIndex } from 'fergies-inverted-index'
 
-// creates a DB called "myDB" using levelDB (node.js), or indexedDB (browser)
-const db = await fii({ name: 'myDB' })
+const ii = await InvertedIndex({ name: 'myIndex' })
 ```
 
-In some cases you will want to start operating on the database
-instentaneously. In these cases you can wait for the callback:
-
-```javascript
-import fii from 'fergies-inverted-index'
-
-// creates a DB called "myDB" using levelDB (node.js), or indexedDB (browser)
-fii({ name: 'myDB' }, (err, db) => {
-  // db is guaranteed to be open and available
-})
-```
+#### `options`
 
+| options | default value | notes |
+| ------- | ------------- | ------------- |
+| `caseSensistive` | `true` | |
+| `stopwords` | `[]` | [stopwords](https://en.wikipedia.org/wiki/Stop_word) |
+| `doNotIndexField` | `[]` | All field names specified in this array will not be indexed. They will however still be present in the retrieved objects |
+| `storeVectors` |  `true` | Used for among other things deletion. Set to `false` if your index is read-only |
+| `Level` | Defaults to `ClassicLevel` for node and `BrowserLevel` for web | Specify any [`abstract-level`](https://www.npmjs.com/package/abstract-level?activeTab=dependents) compatible backend for your index. The defaults provide LevelDB for node environments and IndexedDB for browsers |
 
 <a name="AGGREGATION_FILTER"></a>
 
@@ -154,6 +172,23 @@ db.AND([ 'land:scotland', 'year:1975', 'color:blue' ]).then(result)
 ```
 
 
+<a name="BUCKET"></a>
+
+### `db.BUCKET( token ).then(result)`
+
+Bucket returns all object ids for objects that contain the given token
+
+```javascript
+BUCKET(
+  {
+    FIELD: ['year'],
+    VALUE: {
+      LTE: 2010
+    }
+  }).then(result)
+```
+
+
 <a name="BUCKETS"></a>
 
 ### `db.BUCKETS( ...token ).then(result)`

diff --git a/src/main.js b/src/main.js
@@ -3,84 +3,82 @@ import read from './read.js'
 import write from './write.js'
 import { TokenParser } from './parseToken.js'
 
-// _match is nested by default so that AND and OR work correctly under
-// the bonnet. Flatten array before presenting to consumer
-const flattenMatchArrayInResults = results =>
-  typeof results === 'undefined'
-    ? undefined
-    : results.map(result => {
-      // Sort _match consistently (FIELD -> VALUE -> SCORE)
-      result._match = result._match
-        .flat(Infinity)
-        .map(m => (typeof m === 'string' ? JSON.parse(m) : m))
-        .sort((a, b) => {
-          if (a.FIELD < b.FIELD) return -1
-          if (a.FIELD > b.FIELD) return 1
-          if (a.VALUE < b.VALUE) return -1
-          if (a.VALUE > b.VALUE) return 1
-          if (a.SCORE < b.SCORE) return -1
-          if (a.SCORE > b.SCORE) return 1
-          return 0
-        })
-      return result
-    })
+export class Main {
+  constructor (ops = {}) {
+    ops = {
+      caseSensitive: true,
+      isLeaf: item => typeof item === 'string' || typeof item === 'number',
+      stopwords: [],
+      doNotIndexField: [],
+      storeVectors: true,
+      docExistsSpace: 'DOC', // the field used to verify that doc exists
+      // with the new *Levels, this doesn't need to be async
+      db: new ops.Level(ops.name, {
+        keyEncoding: charwise,
+        valueEncoding: 'json'
+      }),
+      tokenParser: new TokenParser(),
+      ...ops
+    }
 
-const init = async (ops = {}) => {
-  ops = {
-    caseSensitive: true,
-    isLeaf: item => typeof item === 'string' || typeof item === 'number',
-    stopwords: [],
-    doNotIndexField: [],
-    storeVectors: true,
-    docExistsSpace: 'DOC', // the field used to verify that doc exists
-    db: await new ops.Level(ops.name, {
-      keyEncoding: charwise,
-      valueEncoding: 'json'
-    }),
-    tokenParser: new TokenParser(),
-    ...ops
-  }
+    const r = read(ops)
+    const w = write(ops)
 
-  const r = read(ops)
-  const w = write(ops)
+    // timestamp with time of creation (if not created already)
+    // note: async, so this is "fire and forget"
+    w.TIMESTAMP()
 
-  return w.TIMESTAMP_CREATED().then(() => ({
-    AGGREGATION_FILTER: r.AGGREGATION_FILTER,
-    AND: (tokens, pipeline) =>
-      r.INTERSECTION(tokens, pipeline).then(flattenMatchArrayInResults),
-    BUCKET: r.BUCKET,
-    BUCKETS: r.BUCKETS,
-    CREATED: r.CREATED,
-    DELETE: w.DELETE,
-    DISTINCT: r.DISTINCT,
-    EXIST: r.EXIST,
-    EXPORT: r.EXPORT,
-    FACETS: r.FACETS,
-    FIELDS: r.FIELDS,
-    GET: (tokens, pipeline) =>
-      r.GET(tokens, pipeline).then(flattenMatchArrayInResults),
-    IMPORT: w.IMPORT,
-    LAST_UPDATED: r.LAST_UPDATED,
-    MAX: r.MAX,
-    MIN: r.MIN,
-    NOT: (...keys) =>
-      r.SET_SUBTRACTION(...keys).then(flattenMatchArrayInResults),
-    OBJECT: r.OBJECT,
-    OR: (tokens, pipeline) =>
+    this.AGGREGATION_FILTER = r.AGGREGATION_FILTER
+    this.AND = (tokens, pipeline) =>
+      r.INTERSECTION(tokens, pipeline).then(this.flattenMatchArrayInResults)
+    this.BUCKET = r.BUCKET
+    this.BUCKETS = r.BUCKETS
+    this.CREATED = r.CREATED
+    this.DELETE = w.DELETE
+    this.DISTINCT = r.DISTINCT
+    this.EXIST = r.EXIST
+    this.EXPORT = r.EXPORT
+    this.FACETS = r.FACETS
+    this.FIELDS = r.FIELDS
+    this.GET = (tokens, pipeline) =>
+      r.GET(tokens, pipeline).then(this.flattenMatchArrayInResults)
+    this.IMPORT = w.IMPORT
+    this.LAST_UPDATED = r.LAST_UPDATED
+    this.MAX = r.MAX
+    this.MIN = r.MIN
+    this.NOT = (...keys) =>
+      r.SET_SUBTRACTION(...keys).then(this.flattenMatchArrayInResults)
+    this.OBJECT = r.OBJECT
+    this.OR = (tokens, pipeline) =>
       r
         .UNION(tokens, pipeline)
         .then(result => result.union)
-        .then(flattenMatchArrayInResults),
-    PUT: w.PUT,
-    SORT: r.SORT,
-    STORE: ops.db,
-    TIMESTAMP_LAST_UPDATED: w.TIMESTAMP_LAST_UPDATED,
-    TOKEN_PARSER: ops.tokenParser
-  }))
-}
+        .then(this.flattenMatchArrayInResults)
+    this.PUT = w.PUT
+    this.SORT = r.SORT
+    this.STORE = ops.db
+    this.TIMESTAMP_LAST_UPDATED = w.TIMESTAMP_LAST_UPDATED
+    this.TOKEN_PARSER = ops.tokenParser
+  }
 
-export class Main {
-  constructor (ops) {
-    return init(ops)
+  flattenMatchArrayInResults (results) {
+    return typeof results === 'undefined'
+      ? undefined
+      : results.map(result => {
+        // Sort _match consistently (FIELD -> VALUE -> SCORE)
+        result._match = result._match
+          .flat(Infinity)
+          .map(m => (typeof m === 'string' ? JSON.parse(m) : m))
+          .sort((a, b) => {
+            if (a.FIELD < b.FIELD) return -1
+            if (a.FIELD > b.FIELD) return 1
+            if (a.VALUE < b.VALUE) return -1
+            if (a.VALUE > b.VALUE) return 1
+            if (a.SCORE < b.SCORE) return -1
+            if (a.SCORE > b.SCORE) return 1
+            return 0
+          })
+        return result
+      })
   }
 }
diff --git a/src/write.js b/src/write.js
@@ -228,7 +228,7 @@ export default function (ops) {
   const TIMESTAMP_LAST_UPDATED = passThrough =>
     ops.db.put(['~LAST_UPDATED'], Date.now()).then(() => passThrough)
 
-  const TIMESTAMP_CREATED = () =>
+  const TIMESTAMP = () =>
     ops.db
       .get(['~CREATED'])
       .then(/* already created- do nothing */)
@@ -240,7 +240,7 @@ export default function (ops) {
     DELETE,
     IMPORT,
     PUT,
-    TIMESTAMP_CREATED,
+    TIMESTAMP,
     TIMESTAMP_LAST_UPDATED
   }
 }
diff --git a/test/src/AGGREGATION_FILTER-test.js b/test/src/AGGREGATION_FILTER-test.js
@@ -101,10 +101,7 @@ const data = [
 
 test('create index', t => {
   t.plan(1)
-  new InvertedIndex({ name: indexName }).then(db => {
-    global[indexName] = db
-    t.ok(db, !undefined)
-  })
+  t.ok((global[indexName] = new InvertedIndex({ name: indexName })), !undefined)
 })
 
 test('can add some data', t => {

diff --git a/test/src/AND-test.js b/test/src/AND-test.js
@@ -101,10 +101,7 @@ const data = [
 
 test('create index', t => {
   t.plan(1)
-  new InvertedIndex({ name: indexName }).then(db => {
-    global[indexName] = db
-    t.ok(db, !undefined)
-  })
+  t.ok((global[indexName] = new InvertedIndex({ name: indexName })), !undefined)
 })
 
 test('can add some data', t => {