From 7ecb2f04099785591d00b2438d4b65c19acb08e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:00:40 +0100
Subject: [PATCH 01/19] Update Babel config

---
 .babelrc | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/.babelrc b/.babelrc
index 7a0c5a7..738dfbe 100644
--- a/.babelrc
+++ b/.babelrc
@@ -1,21 +1,15 @@
 {
-  "env": {
-    "development": {
-      "presets": [
-        ["@babel/env"]
-      ],
-      "plugins": [
-        "add-module-exports"
-      ]
-    },
-    "production": {
-      "presets": [
-        ["@babel/env"],
-        "minify"
-      ],
-      "plugins": [
-        "add-module-exports"
-      ]
-    }
-  }
+	"env": {
+		"test": {
+			"plugins": ["@babel/plugin-transform-modules-commonjs"]
+		},
+		"development": {
+			"presets": [["@babel/env"]],
+			"plugins": ["add-module-exports"]
+		},
+		"production": {
+			"presets": [["@babel/env"], "minify"],
+			"plugins": ["add-module-exports"]
+		}
+	}
 }

From 441ee4df8524f8954fae8f4b4f1482ab8219e409 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:01:01 +0100
Subject: [PATCH 02/19] Update ESLint config

---
 .eslintrc    | 19 -------------------
 .eslintrc.js | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 19 deletions(-)
 delete mode 100644 .eslintrc
 create mode 100755 .eslintrc.js

diff --git a/.eslintrc b/.eslintrc
deleted file mode 100644
index d199a23..0000000
--- a/.eslintrc
+++ /dev/null
@@ -1,19 +0,0 @@
-{
-  "parserOptions": {
-    "ecmaVersion": 9,
-    "sourceType": "module"
-  },
-  "rules": {
-    "semi": ["warn", "never"],
-    "no-mixed-spaces-and-tabs": "warn",
-    "indent": [
-      "warn"
-    ],
-    "max-statements-per-line": [
-      "warn",
-      {
-        "max": 2
-      }
-    ]
-  }
-}
diff --git a/.eslintrc.js b/.eslintrc.js
new file mode 100755
index 0000000..5a000bb
--- /dev/null
+++ b/.eslintrc.js
@@ -0,0 +1,21 @@
+module.exports = {
+	env: {
+		browser: true,
+		node: true,
+		es2021: true,
+		jest: true
+	},
+	extends: ['eslint:recommended'],
+	parserOptions: {
+		ecmaVersion: 12,
+		sourceType: 'module'
+	},
+	rules: {
+		quotes: ['error', 'single', { avoidEscape: true }],
+		semi: ['error', 'never'],
+		indent: 'off',
+		'no-mixed-spaces-and-tabs': ['warn', 'smart-tabs'],
+		'linebreak-style': ['error', 'unix'],
+		'no-unused-vars': 'warn'
+	}
+}

From 59ddb6bed88c378ff250565628975fe709dbdb81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:01:18 +0100
Subject: [PATCH 03/19] Update gitignore

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 670be12..d5505e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,5 +14,5 @@ jspm_packages
 .idea
 lib
 package-lock.json
-yarn.lock
 .DS_Store
+Thumbs.db

From 5ea10314c389da26ddd9cb1eb185cbc357ffa38a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:12:11 +0100
Subject: [PATCH 04/19] Fix conflicting EditorConfig settings

---
 .editorconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.editorconfig b/.editorconfig
index 9b9a53d..c1a910e 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,8 +8,7 @@ end_of_line = lf
 charset = utf-8
 trim_trailing_whitespace = true
 insert_final_newline = true
-indent_style = space
-indent_size = 4
+indent_style = tab
 
 [*.md]
 trim_trailing_whitespace = false

From fe6fd92ab94184626924dcf7eac2abb7b923e0d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:12:20 +0100
Subject: [PATCH 05/19] Update npmignore

---
 .npmignore | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/.npmignore b/.npmignore
index 4058c52..da0fa57 100644
--- a/.npmignore
+++ b/.npmignore
@@ -1,15 +1,20 @@
 *.log
 npm-debug.log*
 coverage
+docs
+.vscode
 .nyc_output
 node_modules
 package-lock.json
 yarn.lock
 src
 test
-CHANGELOG.md
-.travis.yml
+CONTRIBUTING.md
 .editorconfig
-.eslintrc
+.eslintrc.js
+.vscode
 .babelrc
+webpack.config.js
 .gitignore
+.DS_Store
+Thumb.db

From b043c27706f32b1263516f9dc8629cc2fb4d7ce5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:12:41 +0100
Subject: [PATCH 06/19] Add Prettier config

---
 .prettierrc | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100755 .prettierrc

diff --git a/.prettierrc b/.prettierrc
new file mode 100755
index 0000000..67d0eb5
--- /dev/null
+++ b/.prettierrc
@@ -0,0 +1,7 @@
+{
+	"trailingComma": "none",
+	"tabWidth": 4,
+	"useTabs": true,
+	"semi": false,
+	"singleQuote": true
+}

From 1391769af0d7d65b32e1a0d61624e8673654b764 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:12:54 +0100
Subject: [PATCH 07/19] Add VSCode config

---
 .vscode/extensions.json | 3 +++
 .vscode/settings.json   | 6 ++++++
 2 files changed, 9 insertions(+)
 create mode 100644 .vscode/extensions.json
 create mode 100755 .vscode/settings.json

diff --git a/.vscode/extensions.json b/.vscode/extensions.json
new file mode 100644
index 0000000..64dbfec
--- /dev/null
+++ b/.vscode/extensions.json
@@ -0,0 +1,3 @@
+{
+	"recommendations": ["esbenp.prettier-vscode"]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100755
index 0000000..0c4a68e
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": true,
+    "editor.defaultFormatter": "esbenp.prettier-vscode",
+    "prettier.useTabs": true
+}

From 679778980d193ae4a9ad4cad7fff8c9668772ef2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:13:27 +0100
Subject: [PATCH 08/19] Remove unused Travis config

---
 .travis.yml | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 24b8730..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-language: node_js
-node_js:
-  - '8'
-  - '6'
-script:
-  - npm run test
-  - npm run build
-branches:
-  only:
-    - master

From 706c8af0c28da4c89a811fb2a2d071fc8164c71d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:14:30 +0100
Subject: [PATCH 09/19] Update package dependencies and scripts

---
 package.json | 171 +++++++++++++++++++++++++--------------------------
 1 file changed, 83 insertions(+), 88 deletions(-)

diff --git a/package.json b/package.json
index 7b2c717..dff5988 100644
--- a/package.json
+++ b/package.json
@@ -1,90 +1,85 @@
 {
-  "name": "ml-classify-text",
-  "version": "2.0.0",
-  "description": "Text classification using n-grams and cosine similarity",
-  "module": "./lib",
-  "main": "./lib",
-  "scripts": {
-    "clean": "rimraf lib",
-    "test": "npm run lint && npm run cover",
-    "test:prod": "cross-env BABEL_ENV=production npm run test",
-    "test:only": "mocha --require @babel/register --require @babel/core --recursive",
-    "test:watch": "npm test -- --watch",
-    "cover": "nyc --check-coverage npm run test:only",
-    "lint": "eslint src test",
-    "build": "webpack --mode=production --no-progress --hide-modules --config=webpack.config.js",
-    "prepublish": "npm run clean && npm run lint && npm run test && npm run build"
-  },
-  "files": [
-    "lib",
-    "src"
-  ],
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/andreekeberg/ml-classify-text-js.git"
-  },
-  "keywords": [
-    "text classification",
-    "classification",
-    "classify",
-    "classifier",
-    "machine learning",
-    "machine",
-    "learning",
-    "ai",
-    "artificial intelligence",
-    "artificial",
-    "intelligence",
-    "n-gram",
-    "n-grams",
-    "cosine similarity",
-    "cosine",
-    "similarity",
-    "confidence",
-    "predict",
-    "prediction",
-    "model",
-    "train"
-  ],
-  "author": "André Ekeberg <hello@andreekeberg.se> (https://andreekeberg.se/en/)",
-  "license": "MIT",
-  "bugs": {
-    "url": "https://github.com/andreekeberg/ml-classify-text-js/issues"
-  },
-  "homepage": "https://github.com/andreekeberg/ml-classify-text-js",
-  "devDependencies": {
-    "@babel/core": "^7.10.4",
-    "@babel/plugin-transform-modules-amd": "^7.10.5",
-    "@babel/plugin-transform-modules-commonjs": "^7.6.0",
-    "@babel/plugin-transform-runtime": "^7.6.2",
-    "@babel/polyfill": "^7.10.4",
-    "@babel/preset-env": "^7.8.3",
-    "@babel/register": "^7.10.4",
-    "@babel/runtime": "^7.6.2",
-    "@babel/runtime-corejs3": "^7.6.2",
-    "babel-cli": "^6.26.0",
-    "babel-eslint": "^10.0.1",
-    "babel-loader": "^8.0.6",
-    "babel-plugin-add-module-exports": "^1.0.2",
-    "babel-polyfill": "^6.26.0",
-    "babel-preset-env": "^1.6.1",
-    "babel-preset-minify": "^0.5.0",
-    "babel-runtime": "^6.26.0",
-    "chai": "^4.1.2",
-    "core-js": "^3.2.1",
-    "cross-env": "^5.2.1",
-    "eslint": "^5.16.0",
-    "eslint-config-standard": "^14.1.1",
-    "eslint-plugin-node": "^11.1.0",
-    "jsdoc": "^3.6.5",
-    "jsdoc-to-markdown": "^6.0.1",
-    "mocha": "^6.1.3",
-    "nyc": "^13.3.0",
-    "rimraf": "^2.6.2",
-    "webpack": "^4.40.2",
-    "webpack-cli": "^3.3.9"
-  },
-  "dependencies": {
-    "xregexp": "^4.3.0"
-  }
+	"name": "ml-classify-text",
+	"version": "2.0.0",
+	"description": "Text classification using n-grams and cosine similarity",
+	"module": "./lib",
+	"main": "./lib",
+	"scripts": {
+		"clean": "rimraf lib",
+		"test": "jest --coverage",
+		"test:watch": "jest --watchAll",
+		"test:prod": "cross-env BABEL_ENV=production npm run test",
+		"lint": "eslint src test",
+		"build": "webpack --mode=production --config=webpack.config.js",
+		"prepublish": "npm run clean && npm run lint && npm run test && npm run build"
+	},
+	"files": [
+		"lib"
+	],
+	"repository": {
+		"type": "git",
+		"url": "git+https://github.com/andreekeberg/ml-classify-text-js.git"
+	},
+	"keywords": [
+		"text classification",
+		"classification",
+		"classify",
+		"classifier",
+		"machine learning",
+		"machine",
+		"learning",
+		"ai",
+		"artificial intelligence",
+		"artificial",
+		"intelligence",
+		"n-gram",
+		"n-grams",
+		"cosine similarity",
+		"cosine",
+		"similarity",
+		"confidence",
+		"predict",
+		"prediction",
+		"model",
+		"train"
+	],
+	"author": "André Ekeberg <hello@andreekeberg.se> (https://andreekeberg.se/en/)",
+	"license": "MIT",
+	"bugs": {
+		"url": "https://github.com/andreekeberg/ml-classify-text-js/issues"
+	},
+	"homepage": "https://github.com/andreekeberg/ml-classify-text-js",
+	"devDependencies": {
+		"@babel/core": "^7.20.12",
+		"@babel/plugin-transform-modules-amd": "^7.20.11",
+		"@babel/plugin-transform-modules-commonjs": "^7.20.11",
+		"@babel/plugin-transform-runtime": "^7.19.6",
+		"@babel/polyfill": "^7.12.1",
+		"@babel/preset-env": "^7.20.2",
+		"@babel/register": "^7.18.9",
+		"@babel/runtime": "^7.20.13",
+		"@babel/runtime-corejs3": "^7.20.13",
+		"babel-cli": "^6.26.0",
+		"babel-eslint": "^10.1.0",
+		"babel-loader": "^9.1.2",
+		"babel-plugin-add-module-exports": "^1.0.4",
+		"babel-polyfill": "^6.26.0",
+		"babel-preset-env": "^1.7.0",
+		"babel-preset-minify": "^0.5.2",
+		"babel-runtime": "^6.26.0",
+		"core-js": "^3.27.2",
+		"cross-env": "^7.0.3",
+		"eslint": "^8.33.0",
+		"eslint-config-standard": "^17.0.0",
+		"eslint-plugin-node": "^11.1.0",
+		"jest": "^29.4.1",
+		"jsdoc": "^4.0.0",
+		"jsdoc-to-markdown": "^8.0.0",
+		"rimraf": "^4.1.2",
+		"webpack": "^5.75.0",
+		"webpack-cli": "^5.0.1"
+	},
+	"dependencies": {
+		"xregexp": "^5.1.1"
+	}
 }

From 758e8ef1275c6585a2c2db553fa1e0f110461d01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:16:06 +0100
Subject: [PATCH 10/19] Fix several bugs and migrate tests to Jest

---
 src/classifier.js       | 612 +++++++++++++++++++++-------------------
 src/index.js            |   6 +-
 src/model.js            | 265 ++++++++---------
 src/prediction.js       |  96 +++----
 src/vocabulary.js       | 218 +++++++-------
 test/Classifier.test.js | 427 ++++++++++++++++++++++++++++
 test/Model.test.js      | 206 ++++++++++++++
 test/Prediction.test.js |  73 +++++
 test/Vocabulary.test.js | 177 ++++++++++++
 test/classifier.js      | 363 ------------------------
 test/model.js           | 186 ------------
 test/prediction.js      |  74 -----
 test/vocabulary.js      | 166 -----------
 13 files changed, 1507 insertions(+), 1362 deletions(-)
 create mode 100644 test/Classifier.test.js
 create mode 100644 test/Model.test.js
 create mode 100644 test/Prediction.test.js
 create mode 100644 test/Vocabulary.test.js
 delete mode 100644 test/classifier.js
 delete mode 100644 test/model.js
 delete mode 100644 test/prediction.js
 delete mode 100644 test/vocabulary.js

diff --git a/src/classifier.js b/src/classifier.js
index 073f74a..6c50012 100644
--- a/src/classifier.js
+++ b/src/classifier.js
@@ -1,6 +1,7 @@
 import XRegExp from 'xregexp'
-import Model from './model'
-import Prediction from './prediction'
+import Model from './Model'
+import Prediction from './Prediction'
+import Vocabulary from './Vocabulary'
 
 /**
  * @param {(Model|Object)} [model]
@@ -11,290 +12,329 @@ import Prediction from './prediction'
  * @constructor
  */
 class Classifier {
-    constructor(model = {}) {
-        if (!(model instanceof Model)) {
-            model = new Model(model)
-        }
-
-        this._model = model
-    }
-
-    /**
-     * Model instance
-     *
-     * @type {Model}
-     */
-    get model() {
-        return this._model
-    }
-
-    set model(model) {
-        if (!(model instanceof Model)) {
-            model = new Model(model)
-        }
-
-        this._model = model
-    }
-
-    /**
-     * Train the current model using an input string (or array of strings) and a corresponding label
-     *
-     * @param {(string|string[])} input - String, or an array of strings
-     * @param {string} label - Corresponding label
-     * @return {this}
-     */
-    train(input, label) {
-        if (typeof input !== 'string' && !(input instanceof Array)) {
-            throw new Error('input must be either a string or Array')
-        }
-
-        if (typeof label !== 'string') {
-            throw new Error('label must be a string')
-        }
-
-        // If input isn't an array, convert to a single item array
-        if (!(input instanceof Array)) {
-            input = [input]
-        }
-
-        input.forEach(string => {
-            // Convert the string to a tokenized object
-            let tokens = this.tokenize(string)
-
-            // If we're using a vocabulary, convert the tokens to a vector where all
-            // indexes reference vocabulary terms (all terms not already in the
-            // vocabulary are automatically added)
-            if (this._model.vocabulary !== false) {
-                tokens = this.vectorize(tokens)
-            }
-
-            // Set up an empty entry for the label if it does not exist 
-            if (typeof this._model.data[label] === 'undefined') {
-                this._model.data[label] = {}
-            }
-
-            // Add all occurrences to our model entry
-            Object.keys(tokens).forEach(index => {
-                let occurrences = tokens[index]
-
-                if (typeof this._model.data[label][index] === 'undefined') {
-                    this._model.data[label][index] = 0
-                }
-
-                this._model.data[label][index] += occurrences
-            })
-        })
-
-        return this
-    }
-
-    /**
-     * Return an array of one or more Prediction instances
-     *
-     * @param {string} input - Input string to make a prediction from
-     * @param {int} [maxMatches=1] Maximum number of predictions to return
-     * @param {float} [minimumConfidence=0.2] Minimum confidence required to include a prediction
-     * @return {Array}
-     */
-    predict(input, maxMatches = 1, minimumConfidence = 0.2) {
-        if (typeof input !== 'string') {
-            throw new Error('input must be a string')
-        }
-
-        if (typeof minimumConfidence !== 'number') {
-            throw new Error('minimumConfidence must be a number')
-        }
-
-        if (minimumConfidence < 0) {
-            throw new Error('minimumConfidence can not be lower than 0')
-        }
-
-        if (minimumConfidence > 1) {
-            throw new Error('minimumConfidence can not be higher than 1')
-        }
-
-        let tokens = this.tokenize(input)
-
-        if (this.vocabulary !== false) {
-            tokens = this.vectorize(tokens)
-        }
-
-        let predictions = []
-
-        Object.keys(this._model.data).forEach(label => {
-            let entry = this._model.data[label]
-
-            let confidence = this.cosineSimilarity(tokens, entry)
-
-            if (confidence >= minimumConfidence) {
-                predictions.push(new Prediction({
-                    label,
-                    confidence
-                }))
-            }
-        })
-
-        /* istanbul ignore next */
-        predictions.sort((a, b) => {
-            if (a.confidence === b.confidence) {
-                return 0
-            }
-
-            return a.confidence > b.confidence ? -1 : 1
-        })
-
-        return predictions.slice(0, Math.min(predictions.length, maxMatches))
-    }
-
-    /**
-     * Split a string into an array of lowercase words, with all non-letter characters removed
-     * 
-     * @param {string} input
-     * @return {Array}
-     */
-    splitWords(input) {
-        if (typeof input !== 'string') {
-            throw new Error('input must be a string')
-        }
-
-        // Remove all apostrophes and dashes to keep words intact
-        input = input.replace(/'|´|’|-/g, '')
-
-        // Lowercase all letters and replace all non-letter characters with a space
-        input = XRegExp.replace(input.toLocaleLowerCase(), XRegExp('\\P{L}+', 'g'), ' ').trim()
-
-        return input.split(' ')
-    }
-
-    /**
-     * Create an object literal of unique tokens (n-grams) as keys, and their
-     * respective occurrences as values based on an input string, or array of words
-     *
-     * @param {(string|string[])} input
-     * @return {Object}
-     */
-    tokenize(input) {
-        let words = typeof input === 'string' ? this.splitWords(input) : input
-
-        if (!(words instanceof Array)) {
-            throw new Error('input must be either a string or Array')
-        }
-        
-        if (this._model.nGramMax < this._model.nGramMin) {
-            throw new Error('Invalid nGramMin/nGramMax combination in model config')
-        }
-
-        let tokens = {}
-
-        // Generate a list of n-grams along with their respective occurrences
-        // based on the models configured min/max values
-        words.forEach((word, index) => {
-            let sequence = ''
-
-            words.slice(index).forEach(nextWord => {
-                sequence += sequence ? (' ' + nextWord) : nextWord
-                let tokenCount = sequence.split(' ').length
-
-                if (tokenCount < this._model.nGramMin || tokenCount > this._model.nGramMax) {
-                    return
-                }
-
-                if (typeof tokens[sequence] === 'undefined') {
-                    tokens[sequence] = 0
-                }
-
-                ++tokens[sequence]
-            })
-        })
-
-        return tokens
-    }
-
-    /**
-     * Convert a tokenized object into a new object with all keys (terms)
-     * translated to their index in the vocabulary (adding all terms to
-     * the vocabulary that do not already exist)
-     *
-     * @param {Object} tokens
-     * @return {Object}
-     */
-    vectorize(tokens) {
-        if (!(tokens instanceof Object) || tokens.constructor !== Object) {
-            throw new Error('tokens must be an object literal')
-        }
-
-        /* istanbul ignore next */
-        if (this._model.vocabulary === false) {
-            throw new Error('Cannot vectorize tokens when vocabulary is false')
-        }
-
-        let vector = {}
-
-        Object.keys(tokens).forEach(token => {
-            let vocabularyIndex = this._model.vocabulary.indexOf(token)
-
-            if (vocabularyIndex === -1) {
-                this._model.vocabulary.add(token)
-
-                vocabularyIndex = this._model.vocabulary.size - 1
-            }
-
-            vector[vocabularyIndex] = tokens[token]
-        })
-
-        return vector
-    }
-
-    /**
-     * Return the cosine similarity between two vectors
-     *
-     * @param {Object} v1
-     * @param {Object} v2
-     * @return {float}
-     */
-    cosineSimilarity(v1, v2) {
-        if (!(v1 instanceof Object) || v1.constructor !== Object) {
-            throw new Error('v1 must be an object literal')
-        }
-        if (!(v2 instanceof Object) || v2.constructor !== Object) {
-            throw new Error('v2 must be an object literal')
-        }
-
-        let prod = 0.0
-        let v1Norm = 0.0
-
-        Object.keys(v1).forEach(i => {
-            let xi = v1[i]
-
-            if (typeof v2[i] !== 'undefined') {
-                prod += xi * v2[i]
-            }
-
-            v1Norm += xi * xi
-        })
-
-        v1Norm = Math.sqrt(v1Norm)
-
-        if (v1Norm === 0) {
-            return 0
-        }
-
-        let v2Norm = 0.0
-
-        Object.keys(v2).forEach(i => {
-            let xi = v2[i]
-
-            v2Norm += xi * xi
-        })
-
-        v2Norm = Math.sqrt(v2Norm)
-
-        if (v2Norm === 0) {
-            return 0
-        }
-
-        return prod / (v1Norm * v2Norm)
-    }
+	constructor(model = {}) {
+		if (!(model instanceof Model)) {
+			model = new Model(model)
+		}
+
+		this._model = model
+	}
+
+	/**
+	 * Model instance
+	 *
+	 * @type {Model}
+	 */
+	get model() {
+		return this._model
+	}
+
+	set model(model) {
+		if (!(model instanceof Model)) {
+			model = new Model(model)
+		}
+
+		this._model = model
+	}
+
+	/**
+	 * Train the current model using an input string (or array of strings) and a corresponding label
+	 *
+	 * @param {(string|string[])} input - String, or an array of strings
+	 * @param {string} label - Corresponding label
+	 * @return {this}
+	 */
+	train(input, label) {
+		if (typeof input !== 'string' && !(input instanceof Array)) {
+			throw new Error('input must be either a string or Array')
+		}
+
+		if (typeof label !== 'string') {
+			throw new Error('label must be a string')
+		}
+
+		// If input isn't an array, convert to a single item array
+		if (!(input instanceof Array)) {
+			input = [input]
+		}
+
+		input.forEach((string) => {
+			// Convert the string to a tokenized object
+			let tokens = this.tokenize(string)
+
+			if (this._model.vocabulary !== false) {
+				// If we're using a vocabulary, convert the tokens to a vector where all
+				// indexes reference vocabulary terms
+				const { vector, vocabulary } = this.vectorize(tokens)
+
+				// Overwrite the tokens object with our new vectorized object
+				tokens = vector
+
+				// Update the model vocabulary
+				this._model.vocabulary = vocabulary
+			}
+
+			// Set up an empty entry for the label if it does not exist
+			if (
+				!Object.prototype.hasOwnProperty.call(this._model.data, label)
+			) {
+				this._model.data[label] = {}
+			}
+
+			// Add all occurrences to our model entry
+			Object.keys(tokens).forEach((index) => {
+				let occurrences = tokens[index]
+
+				if (
+					!Object.prototype.hasOwnProperty.call(
+						this._model.data[label],
+						index
+					)
+				) {
+					this._model.data[label][index] = 0
+				}
+
+				this._model.data[label][index] += occurrences
+			})
+		})
+
+		return this
+	}
+
+	/**
+	 * Return an array of one or more Prediction instances
+	 *
+	 * @param {string} input - Input string to make a prediction from
+	 * @param {int} [maxMatches=1] Maximum number of predictions to return
+	 * @param {float} [minimumConfidence=0.2] Minimum confidence required to include a prediction
+	 * @return {Array}
+	 */
+	predict(input, maxMatches = 1, minimumConfidence = 0.2) {
+		if (typeof input !== 'string') {
+			throw new Error('input must be a string')
+		}
+
+		if (!['number', 'undefined'].includes(typeof maxMatches)) {
+			throw new Error('maxMatches must be either a number or undefined')
+		}
+
+		if (!['number', 'undefined'].includes(typeof minimumConfidence)) {
+			throw new Error(
+				'minimumConfidence must be either a number or undefined'
+			)
+		}
+
+		if (minimumConfidence < 0) {
+			throw new Error('minimumConfidence can not be lower than 0')
+		}
+
+		if (minimumConfidence > 1) {
+			throw new Error('minimumConfidence can not be higher than 1')
+		}
+
+		// Convert the string to a tokenized object
+		let tokens = this.tokenize(input)
+
+		if (this.vocabulary !== false) {
+			// If we're using a vocabulary, convert the tokens to a vector where all
+			// indexes reference vocabulary terms
+			const { vector } = this.vectorize(tokens)
+
+			// Overwrite the tokens object with our new vectorized object
+			tokens = vector
+		}
+
+		const predictions = []
+
+		Object.keys(this._model.data).forEach((label) => {
+			let entry = this._model.data[label]
+
+			let confidence = this.cosineSimilarity(tokens, entry)
+
+			if (confidence >= minimumConfidence) {
+				predictions.push(
+					new Prediction({
+						label,
+						confidence
+					})
+				)
+			}
+		})
+
+		/* istanbul ignore next */
+		predictions.sort((a, b) => {
+			if (a.confidence === b.confidence) {
+				return 0
+			}
+
+			return a.confidence > b.confidence ? -1 : 1
+		})
+
+		return predictions.slice(0, Math.min(predictions.length, maxMatches))
+	}
+
+	/**
+	 * Split a string into an array of lowercase words, with all non-letter characters removed
+	 *
+	 * @param {string} input
+	 * @return {Array}
+	 */
+	splitWords(input) {
+		if (typeof input !== 'string') {
+			throw new Error('input must be a string')
+		}
+
+		// Remove all apostrophes and dashes to keep words intact
+		input = input.replace(/'|´|’|-/g, '')
+
+		// Lowercase all letters and replace all non-letter characters with a space
+		input = XRegExp.replace(
+			input.toLocaleLowerCase(),
+			XRegExp('\\P{L}+', 'g'),
+			' '
+		).trim()
+
+		return input.split(' ')
+	}
+
+	/**
+	 * Create an object literal of unique tokens (n-grams) as keys, and their
+	 * respective occurrences as values based on an input string, or array of words
+	 *
+	 * @param {(string|string[])} input
+	 * @return {Object}
+	 */
+	tokenize(input) {
+		let words = typeof input === 'string' ? this.splitWords(input) : input
+
+		if (!(words instanceof Array)) {
+			throw new Error('input must be either a string or Array')
+		}
+
+		if (this._model.nGramMax < this._model.nGramMin) {
+			throw new Error(
+				'Invalid nGramMin/nGramMax combination in model config'
+			)
+		}
+
+		let tokens = {}
+
+		// Generate a list of n-grams along with their respective occurrences
+		// based on the models configured min/max values
+		words.forEach((word, index) => {
+			let sequence = ''
+
+			words.slice(index).forEach((nextWord) => {
+				sequence += sequence ? ' ' + nextWord : nextWord
+				let tokenCount = sequence.split(' ').length
+
+				if (
+					tokenCount < this._model.nGramMin ||
+					tokenCount > this._model.nGramMax
+				) {
+					return
+				}
+
+				if (!Object.prototype.hasOwnProperty.call(tokens, sequence)) {
+					tokens[sequence] = 0
+				}
+
+				++tokens[sequence]
+			})
+		})
+
+		return tokens
+	}
+
+	/**
+	 * Convert a tokenized object into a new object with all keys (terms)
+	 * translated to their index in the returned vocabulary (which is also
+	 * returned along with the object, with any new terms added to the end)
+	 *
+	 * @param {Object} tokens
+	 * @return {Object}
+	 */
+	vectorize(tokens) {
+		if (Object.getPrototypeOf(tokens) !== Object.prototype) {
+			throw new Error('tokens must be an object literal')
+		}
+
+		/* istanbul ignore next */
+		if (this._model.vocabulary === false) {
+			throw new Error('Cannot vectorize tokens when vocabulary is false')
+		}
+
+		const vector = {}
+		const vocabulary = new Vocabulary(this._model.vocabulary.terms)
+
+		Object.keys(tokens).forEach((token) => {
+			let vocabularyIndex = vocabulary.indexOf(token)
+
+			if (vocabularyIndex === -1) {
+				vocabulary.add(token)
+
+				vocabularyIndex = vocabulary.size - 1
+			}
+
+			vector[vocabularyIndex] = tokens[token]
+		})
+
+		return {
+			vector,
+			vocabulary
+		}
+	}
+
+	/**
+	 * Return the cosine similarity between two vectors
+	 *
+	 * @param {Object} v1
+	 * @param {Object} v2
+	 * @return {float}
+	 */
+	cosineSimilarity(v1, v2) {
+		if (Object.getPrototypeOf(v1) !== Object.prototype) {
+			throw new Error('v1 must be an object literal')
+		}
+		if (Object.getPrototypeOf(v2) !== Object.prototype) {
+			throw new Error('v2 must be an object literal')
+		}
+
+		let prod = 0.0
+		let v1Norm = 0.0
+
+		Object.keys(v1).forEach((i) => {
+			let xi = v1[i]
+
+			if (Object.prototype.hasOwnProperty.call(v2, i)) {
+				prod += xi * v2[i]
+			}
+
+			v1Norm += xi * xi
+		})
+
+		v1Norm = Math.sqrt(v1Norm)
+
+		if (v1Norm === 0) {
+			return 0
+		}
+
+		let v2Norm = 0.0
+
+		Object.keys(v2).forEach((i) => {
+			let xi = v2[i]
+
+			v2Norm += xi * xi
+		})
+
+		v2Norm = Math.sqrt(v2Norm)
+
+		if (v2Norm === 0) {
+			return 0
+		}
+
+		return prod / (v1Norm * v2Norm)
+	}
 }
 
 export default Classifier
diff --git a/src/index.js b/src/index.js
index 8fdb968..4332d7a 100644
--- a/src/index.js
+++ b/src/index.js
@@ -1,7 +1,7 @@
-import Classifier from './classifier'
+import Classifier from './Classifier'
 
-export { default as Model } from './model'
-export { default as Cocabulary } from './vocabulary'
+export { default as Model } from './Model'
+export { default as Vocabulary } from './Vocabulary'
 export { Classifier as Classifier }
 
 export default Classifier
diff --git a/src/model.js b/src/model.js
index d9badbc..a27dbb6 100644
--- a/src/model.js
+++ b/src/model.js
@@ -1,4 +1,4 @@
-import Vocabulary from './vocabulary'
+import Vocabulary from './Vocabulary'
 
 /**
  * @param {Object} [config]
@@ -9,136 +9,139 @@ import Vocabulary from './vocabulary'
  * @constructor
  */
 class Model {
-    constructor(config = {}) {
-        if (!(config instanceof Object) || config.constructor !== Object) {
-            throw new Error('config must be an object literal')
-        }
-
-        config = {
-            nGramMin: 1,
-            nGramMax: 1,
-            vocabulary: [],
-            data: {},
-            ...config
-        }
-
-        if (config.nGramMin !== parseInt(config.nGramMin, 10)) {
-            throw new Error('Config value nGramMin must be an integer')
-        }
-
-        if (config.nGramMax !== parseInt(config.nGramMax, 10)) {
-            throw new Error('Config value nGramMax must be an integer')
-        }
-
-        if (config.nGramMin < 1) {
-            throw new Error('Config value nGramMin must be at least 1')
-        }
-
-        if (config.nGramMax < 1) {
-            throw new Error('Config value nGramMax must be at least 1')
-        }
-
-        if (config.nGramMax < config.nGramMin) {
-            throw new Error('Invalid nGramMin/nGramMax combination in config')
-        }
-
-        if (config.vocabulary !== false && !(config.vocabulary instanceof Vocabulary)) {
-            config.vocabulary = new Vocabulary(config.vocabulary)
-        }
-
-        if (!(config.data instanceof Object) || config.data.constructor !== Object) {
-            throw new Error('Config value data must be an object literal')
-        }
-
-        this._nGramMin = config.nGramMin
-        this._nGramMax = config.nGramMax
-        this._vocabulary = config.vocabulary
-        this._data = {...config.data}
-    }
-
-    /**
-     * Minimum n-gram size
-     *
-     * @type {int}
-     */
-    get nGramMin() {
-        return this._nGramMin
-    }
-
-    set nGramMin(size) {
-        if (size !== parseInt(size, 10)) {
-            throw new Error('nGramMin must be an integer')
-        }
-
-        this._nGramMin = size
-    }
-
-    /**
-     * Maximum n-gram size
-     *
-     * @type {int}
-     */
-    get nGramMax() {
-        return this._nGramMax
-    }
-
-    set nGramMax(size) {
-        if (size !== parseInt(size, 10)) {
-            throw new Error('nGramMax must be an integer')
-        }
-
-        this._nGramMax = size
-    }
-
-    /**
-     * Vocabulary instance 
-     *
-     * @type {(Vocabulary|false)}
-     */
-    get vocabulary() {
-        return this._vocabulary
-    }
-
-    set vocabulary(vocabulary) {
-        if (vocabulary !== false && !(vocabulary instanceof Vocabulary)) {
-            vocabulary = new Vocabulary(vocabulary)
-        }
-
-        this._vocabulary = vocabulary
-    }
-
-    /**
-     * Model data
-     *
-     * @type {Object}
-     */
-    get data() {
-        return this._data
-    }
-
-    set data(data) {
-        if (!(data instanceof Object) || data.constructor !== Object) {
-            throw new Error('data must be an object literal')
-        }
-
-        this._data = {...data}
-    }
-
-    /**
-     * Return the model in its current state an an object literal, including the
-     * configured n-gram min/max values, the vocabulary as an array (if any,
-     * otherwise false), and an object literal with all the training data
-     *
-     * @return {Object}
-     */
-    serialize() {
-        return {
-            nGramMin: this._nGramMin,
-            nGramMax: this._nGramMax,
-            vocabulary: Array.from(this._vocabulary.terms),
-            data: this._data
-        }
-    }
+	constructor(config = {}) {
+		if (Object.getPrototypeOf(config) !== Object.prototype) {
+			throw new Error('config must be an object literal')
+		}
+
+		config = {
+			nGramMin: 1,
+			nGramMax: 1,
+			vocabulary: [],
+			data: {},
+			...config
+		}
+
+		if (config.nGramMin !== parseInt(config.nGramMin, 10)) {
+			throw new Error('Config value nGramMin must be an integer')
+		}
+
+		if (config.nGramMax !== parseInt(config.nGramMax, 10)) {
+			throw new Error('Config value nGramMax must be an integer')
+		}
+
+		if (config.nGramMin < 1) {
+			throw new Error('Config value nGramMin must be at least 1')
+		}
+
+		if (config.nGramMax < 1) {
+			throw new Error('Config value nGramMax must be at least 1')
+		}
+
+		if (config.nGramMax < config.nGramMin) {
+			throw new Error('Invalid nGramMin/nGramMax combination in config')
+		}
+
+		if (
+			config.vocabulary !== false &&
+			!(config.vocabulary instanceof Vocabulary)
+		) {
+			config.vocabulary = new Vocabulary(config.vocabulary)
+		}
+
+		if (Object.getPrototypeOf(config.data) !== Object.prototype) {
+			throw new Error('Config value data must be an object literal')
+		}
+
+		this._nGramMin = config.nGramMin
+		this._nGramMax = config.nGramMax
+		this._vocabulary = config.vocabulary
+		this._data = { ...config.data }
+	}
+
+	/**
+	 * Minimum n-gram size
+	 *
+	 * @type {int}
+	 */
+	get nGramMin() {
+		return this._nGramMin
+	}
+
+	set nGramMin(size) {
+		if (size !== parseInt(size, 10)) {
+			throw new Error('nGramMin must be an integer')
+		}
+
+		this._nGramMin = size
+	}
+
+	/**
+	 * Maximum n-gram size
+	 *
+	 * @type {int}
+	 */
+	get nGramMax() {
+		return this._nGramMax
+	}
+
+	set nGramMax(size) {
+		if (size !== parseInt(size, 10)) {
+			throw new Error('nGramMax must be an integer')
+		}
+
+		this._nGramMax = size
+	}
+
+	/**
+	 * Vocabulary instance
+	 *
+	 * @type {(Vocabulary|false)}
+	 */
+	get vocabulary() {
+		return this._vocabulary
+	}
+
+	set vocabulary(vocabulary) {
+		if (vocabulary !== false && !(vocabulary instanceof Vocabulary)) {
+			vocabulary = new Vocabulary(vocabulary)
+		}
+
+		this._vocabulary = vocabulary
+	}
+
+	/**
+	 * Model data
+	 *
+	 * @type {Object}
+	 */
+	get data() {
+		return this._data
+	}
+
+	set data(data) {
+		if (!(data instanceof Object) || data.constructor !== Object) {
+			throw new Error('data must be an object literal')
+		}
+
+		this._data = { ...data }
+	}
+
+	/**
+	 * Return the model in its current state an an object literal, including the
+	 * configured n-gram min/max values, the vocabulary as an array (if any,
+	 * otherwise false), and an object literal with all the training data
+	 *
+	 * @return {Object}
+	 */
+	serialize() {
+		return {
+			nGramMin: this._nGramMin,
+			nGramMax: this._nGramMax,
+			vocabulary: Array.from(this._vocabulary.terms),
+			data: this._data
+		}
+	}
 }
 
 export default Model
diff --git a/src/prediction.js b/src/prediction.js
index f3045dc..6bd4db5 100644
--- a/src/prediction.js
+++ b/src/prediction.js
@@ -4,54 +4,54 @@
  * @hideconstructor
  */
 class Prediction {
-    constructor(prediction = {}) {
-        if (!(prediction instanceof Object) || prediction.constructor !== Object) {
-            throw new Error('prediction must be an object literal')
-        }
-
-        prediction = {
-            label: '',
-            confidence: 0,
-            ...prediction
-        }
-
-        this._label = prediction.label
-        this._confidence = prediction.confidence
-    }
-
-    /**
-     * Label of the prediction
-     *
-     * @type {string}
-     */
-    get label() {
-        return this._label
-    }
-
-    set label(label) {
-        if (typeof label !== 'string') {
-            throw new Error('label must be a string')
-        }
-
-        this._label = label
-    }
-
-    /**
-     * Confidence of the prediction
-     *
-     * @type {number}
-     */
-    get confidence() {
-        return this._confidence
-    }
-
-    set confidence(confidence) {
-        if (typeof confidence !== 'number') {
-            throw new Error('confidence must be a number')
-        }
-
-        this._confidence = confidence
-    }
+	constructor(prediction = {}) {
+		if (Object.getPrototypeOf(prediction) !== Object.prototype) {
+			throw new Error('prediction must be an object literal')
+		}
+
+		prediction = {
+			label: '',
+			confidence: 0,
+			...prediction
+		}
+
+		this._label = prediction.label
+		this._confidence = prediction.confidence
+	}
+
+	/**
+	 * Label of the prediction
+	 *
+	 * @type {string}
+	 */
+	get label() {
+		return this._label
+	}
+
+	set label(label) {
+		if (typeof label !== 'string') {
+			throw new Error('label must be a string')
+		}
+
+		this._label = label
+	}
+
+	/**
+	 * Confidence of the prediction
+	 *
+	 * @type {number}
+	 */
+	get confidence() {
+		return this._confidence
+	}
+
+	set confidence(confidence) {
+		if (typeof confidence !== 'number') {
+			throw new Error('confidence must be a number')
+		}
+
+		this._confidence = confidence
+	}
 }
 
 export default Prediction
diff --git a/src/vocabulary.js b/src/vocabulary.js
index 10bfdfe..d322f7f 100644
--- a/src/vocabulary.js
+++ b/src/vocabulary.js
@@ -3,111 +3,119 @@
  * @constructor
  */
 class Vocabulary {
-    constructor(terms = []) {
-        if (!(terms instanceof Array) && !(terms instanceof Set)) {
-            throw new Error('terms must be either an Array or a Set')
-        }
-
-        this._terms = new Set(terms)
-    }
-
-    /**
-     * Vocabulary size
-     * 
-     * @type {number}
-     */
-    get size() {
-        return this._terms.size
-    }
-
-    /**
-     * Vocabulary terms
-     *
-     * @type {(Array|Set)}
-     */
-    get terms() {
-        return this._terms
-    }
-
-    set terms(terms) {
-        if (!(terms instanceof Array) && !(terms instanceof Set)) {
-            throw new Error('terms must be either an Array or a Set')
-        }
-
-        this._terms = new Set(terms)
-    }
-
-    /**
-     * Add one or more terms to the vocabulary
-     *
-     * @param {(string|Array|Set)} terms
-     * @return {this}
-     */
-    add(terms) {
-        if (typeof terms !== 'string' && !(terms instanceof Array) && !(terms instanceof Set)) {
-            throw new Error('terms must be either a string, Array or Set')
-        }
-
-        if (typeof terms === 'string') {
-            terms = [terms]
-        } else if (terms instanceof Set) {
-            terms = Array.from(terms)
-        }
-
-        terms.forEach(term => {
-            this._terms.add(term)
-        })
-
-        return this
-    }
-
-    /**
-     * Remove one or more terms from the vocabulary
-     *
-     * @param {(string|Array|Set)} terms
-     * @return {this}
-     */
-    remove(terms) {
-        if (typeof terms !== 'string' && !(terms instanceof Array) && !(terms instanceof Set)) {
-            throw new Error('terms must be either a string, Array or Set')
-        }
-
-        if (typeof terms === 'string') {
-            terms = [terms]
-        } else if (terms instanceof Set) {
-            terms = Array.from(terms)
-        }
-
-        terms.forEach(term => {
-            this._terms.delete(term)
-        })
-
-        return this
-    }
-
-    /**
-     * Return whether the vocabulary contains a certain term
-     *
-     * @param {string} term
-     * @return {bool}
-     */
-    has(term) {
-        return this._terms.has(term)
-    }
-
-    /**
-     * Return the index of a term in the vocabulary (returns -1 if not found)
-     *
-     * @param {string} term
-     * @return {number}
-     */
-    indexOf(term) {
-        if (!this._terms.has(term)) {
-            return -1
-        }
-
-        return Array.from(this._terms).indexOf(term)
-    }
+	constructor(terms = []) {
+		if (!(terms instanceof Array) && !(terms instanceof Set)) {
+			throw new Error('terms must be either an Array or a Set')
+		}
+
+		this._terms = new Set(terms)
+	}
+
+	/**
+	 * Vocabulary size
+	 *
+	 * @type {number}
+	 */
+	get size() {
+		return this._terms.size
+	}
+
+	/**
+	 * Vocabulary terms
+	 *
+	 * @type {(Array|Set)}
+	 */
+	get terms() {
+		return this._terms
+	}
+
+	set terms(terms) {
+		if (!(terms instanceof Array) && !(terms instanceof Set)) {
+			throw new Error('terms must be either an Array or a Set')
+		}
+
+		this._terms = new Set(terms)
+	}
+
+	/**
+	 * Add one or more terms to the vocabulary
+	 *
+	 * @param {(string|Array|Set)} terms
+	 * @return {this}
+	 */
+	add(terms) {
+		if (
+			typeof terms !== 'string' &&
+			!(terms instanceof Array) &&
+			!(terms instanceof Set)
+		) {
+			throw new Error('terms must be either a string, Array or Set')
+		}
+
+		if (typeof terms === 'string') {
+			terms = [terms]
+		} else if (terms instanceof Set) {
+			terms = Array.from(terms)
+		}
+
+		terms.forEach((term) => {
+			this._terms.add(term)
+		})
+
+		return this
+	}
+
+	/**
+	 * Remove one or more terms from the vocabulary
+	 *
+	 * @param {(string|Array|Set)} terms
+	 * @return {this}
+	 */
+	remove(terms) {
+		if (
+			typeof terms !== 'string' &&
+			!(terms instanceof Array) &&
+			!(terms instanceof Set)
+		) {
+			throw new Error('terms must be either a string, Array or Set')
+		}
+
+		if (typeof terms === 'string') {
+			terms = [terms]
+		} else if (terms instanceof Set) {
+			terms = Array.from(terms)
+		}
+
+		terms.forEach((term) => {
+			this._terms.delete(term)
+		})
+
+		return this
+	}
+
+	/**
+	 * Return whether the vocabulary contains a certain term
+	 *
+	 * @param {string} term
+	 * @return {bool}
+	 */
+	has(term) {
+		return this._terms.has(term)
+	}
+
+	/**
+	 * Return the index of a term in the vocabulary (returns -1 if not found)
+	 *
+	 * @param {string} term
+	 * @return {number}
+	 */
+	indexOf(term) {
+		if (!this._terms.has(term)) {
+			return -1
+		}
+
+		return Array.from(this._terms).indexOf(term)
+	}
 }
 
 export default Vocabulary
diff --git a/test/Classifier.test.js b/test/Classifier.test.js
new file mode 100644
index 0000000..bb274b7
--- /dev/null
+++ b/test/Classifier.test.js
@@ -0,0 +1,427 @@
+import Classifier from '../src/Classifier'
+import Model from '../src/Model'
+
+describe('Classifier', () => {
+	describe('constructor', () => {
+		test('should set the model when passed a model instance', () => {
+			const classifier = new Classifier(
+				new Model({
+					nGramMax: 4
+				})
+			)
+
+			expect(classifier.model.nGramMax).toStrictEqual(4)
+		})
+
+		test('should set the model when passed an object literal', () => {
+			const classifier = new Classifier({
+				nGramMax: 5
+			})
+
+			expect(classifier.model.nGramMax).toStrictEqual(5)
+		})
+	})
+
+	describe('model', () => {
+		test('should return a model instance', () => {
+			let classifier = new Classifier()
+
+			expect(classifier.model).toBeInstanceOf(Model)
+		})
+
+		test('should set the current model when passed a model instance', () => {
+			let classifier = new Classifier()
+
+			classifier.model = new Model({
+				nGramMax: 3
+			})
+
+			expect(classifier.model.nGramMax).toStrictEqual(3)
+		})
+
+		test('should set the current model to a new model instance when passed an object literal', () => {
+			let classifier = new Classifier()
+
+			classifier.model = {}
+
+			expect(classifier.model).toBeInstanceOf(Model)
+		})
+	})
+
+	describe('splitWords', () => {
+		test('should throw an error if input is not a string', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.splitWords(1)).toThrow(Error)
+		})
+
+		test('should split a string into an array of words', () => {
+			const classifier = new Classifier()
+
+			expect(classifier.splitWords('Hello world!')).toStrictEqual([
+				'hello',
+				'world'
+			])
+		})
+	})
+
+	describe('tokenize', () => {
+		test('should throw an error if input is neither a string or array', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.tokenize({})).toThrow(Error)
+		})
+
+		test('should throw an error if nGramMax is less than nGramMin in model config', () => {
+			const classifier = new Classifier()
+
+			classifier.model.nGramMin = 2
+
+			expect(() => classifier.tokenize('Hello world!')).toThrow(Error)
+		})
+
+		test('should return an object literal of tokens and their occurrences from a string', () => {
+			const classifier = new Classifier()
+
+			expect(classifier.tokenize('Hello world!')).toStrictEqual({
+				hello: 1,
+				world: 1
+			})
+		})
+
+		test('should return an object literal of tokens and their occurrences from a string', () => {
+			const classifier = new Classifier()
+
+			expect(classifier.tokenize('Hello world!')).toStrictEqual({
+				hello: 1,
+				world: 1
+			})
+		})
+
+		test('should return an object literal of tokens and their occurrences from a array', () => {
+			const classifier = new Classifier()
+
+			expect(classifier.tokenize(['hello', 'world'])).toStrictEqual({
+				hello: 1,
+				world: 1
+			})
+		})
+
+		test('should return an object literal of bigrams when nGramMin/nGramMax is 2', () => {
+			const classifier = new Classifier({
+				nGramMin: 2,
+				nGramMax: 2
+			})
+
+			expect(classifier.tokenize('Hello world!')).toStrictEqual({
+				'hello world': 1
+			})
+		})
+
+		test('should return an object literal of unigrams and bigrams when nGramMin/nGramMax is 1/2', () => {
+			const classifier = new Classifier({
+				nGramMin: 1,
+				nGramMax: 2
+			})
+
+			expect(classifier.tokenize('Hello world!')).toStrictEqual({
+				hello: 1,
+				'hello world': 1,
+				world: 1
+			})
+		})
+
+		test('should increment the occurrence of the duplicate tokens', () => {
+			const classifier = new Classifier()
+
+			expect(classifier.tokenize('Hello hello!')).toStrictEqual({
+				hello: 2
+			})
+		})
+	})
+
+	describe('vectorize', () => {
+		test('should throw an error if input is not an object literal', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.vectorize([])).toThrow(Error)
+		})
+
+		test('should throw an error if vocabulary config option is set to false', () => {
+			const classifier = new Classifier({
+				vocabulary: false
+			})
+
+			expect(() => classifier.vectorize({ hello: 1 })).toThrow(Error)
+		})
+
+		test('should convert key to its corresponding vocabulary term index', () => {
+			const classifier = new Classifier()
+			const tokens = classifier.tokenize('Hello')
+
+			const { vector } = classifier.vectorize(tokens)
+
+			expect(vector).toStrictEqual({ 0: 1 })
+		})
+
+		test('should use existing term index when token is already in vocabulary', () => {
+			const classifier = new Classifier({
+				vocabulary: ['hello', 'world']
+			})
+
+			const tokens = classifier.tokenize('world')
+
+			const { vector } = classifier.vectorize(tokens)
+
+			expect(vector).toStrictEqual({ 1: 1 })
+		})
+
+		test('should return an updated copy of the vocabulary', () => {
+			const classifier = new Classifier()
+
+			const tokens = classifier.tokenize('Hello world')
+
+			const { vocabulary } = classifier.vectorize(tokens)
+
+			const terms = vocabulary.terms
+
+			expect(Array.from(terms)).toStrictEqual(['hello', 'world'])
+		})
+	})
+
+	describe('train', () => {
+		test('should throw an error if input is not a string or array', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.train({}, 'test')).toThrow(Error)
+		})
+
+		test('should throw an error if label is not a string', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.train('test', [])).toThrow(Error)
+		})
+
+		test('should add tokens to the vocabulary (if not configured to false)', () => {
+			const classifier = new Classifier()
+
+			classifier.train('hello world', 'test')
+
+			const vocabulary = classifier.model.vocabulary
+
+			expect(vocabulary.size).toStrictEqual(2)
+		})
+
+		test('should add tokens (and their occurrences) to the model from a string', () => {
+			const classifier = new Classifier()
+
+			classifier.train('hello world', 'test')
+
+			const model = classifier.model
+
+			expect(model.data).toStrictEqual({
+				test: { 0: 1, 1: 1 }
+			})
+		})
+
+		test('should add tokens (and their occurrences) to the model from an array of strings', () => {
+			const classifier = new Classifier()
+
+			classifier.train(['hello world', 'foo', 'bar'], 'test')
+
+			const model = classifier.model
+
+			expect(model.data).toStrictEqual({
+				test: { 0: 1, 1: 1, 2: 1, 3: 1 }
+			})
+		})
+
+		test('should increment the occurrence of an existing vocabulary term', () => {
+			const classifier = new Classifier()
+
+			classifier.train(['hello world', 'foo', 'hello'], 'test')
+
+			const model = classifier.model
+
+			expect(model.data).toStrictEqual({
+				test: { 0: 2, 1: 1, 2: 1 }
+			})
+		})
+
+		test('should return classifier instance', () => {
+			const classifier = new Classifier()
+
+			expect(classifier.train('hello world', 'test')).toStrictEqual(
+				classifier
+			)
+		})
+	})
+
+	describe('cosineSimilarity', () => {
+		test('should throw an error if v1 is not an object literal', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.cosineSimilarity(false, {})).toThrow(Error)
+		})
+
+		test('should throw an error if v2 is not an object literal', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.cosineSimilarity({}, false)).toThrow(Error)
+		})
+
+		test('should return 1 on identical object literals', () => {
+			const classifier = new Classifier()
+
+			expect(
+				classifier.cosineSimilarity(
+					{
+						0: 1
+					},
+					{
+						0: 1
+					}
+				)
+			).toStrictEqual(1)
+		})
+
+		test('should return 0 on object literals with no similarity', () => {
+			const classifier = new Classifier()
+
+			expect(
+				classifier.cosineSimilarity(
+					{
+						0: 1
+					},
+					{
+						1: 1
+					}
+				)
+			).toStrictEqual(0)
+		})
+
+		test('should return > 0 on similar object literals', () => {
+			const classifier = new Classifier()
+
+			expect(
+				classifier.cosineSimilarity(
+					{
+						0: 1,
+						1: 1
+					},
+					{
+						0: 1,
+						2: 1
+					}
+				)
+			).toBeGreaterThan(0)
+		})
+
+		test('should return 0 when sum of v1 is 0', () => {
+			const classifier = new Classifier()
+
+			expect(
+				classifier.cosineSimilarity(
+					{
+						0: 0
+					},
+					{
+						0: 1
+					}
+				)
+			).toStrictEqual(0)
+		})
+
+		test('should return 0 when sum of v2 is 0', () => {
+			const classifier = new Classifier()
+
+			expect(
+				classifier.cosineSimilarity(
+					{
+						0: 1
+					},
+					{
+						0: 0
+					}
+				)
+			).toStrictEqual(0)
+		})
+	})
+
+	describe('predict', () => {
+		test('should throw an error if input is not a string', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.predict([])).toThrow(Error)
+		})
+
+		test('should throw an error if maxMatches is not a number', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.predict('', 'test')).toThrow(Error)
+		})
+
+		test('should throw an error if minimumConfidence is not a number', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.predict('', undefined, 'test')).toThrow(
+				Error
+			)
+		})
+
+		test('should throw an error if minimumConfidence is lower than 0', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.predict('', undefined, -1)).toThrow(Error)
+		})
+
+		test('should throw an error if minimumConfidence is higher than 1', () => {
+			const classifier = new Classifier()
+
+			expect(() => classifier.predict('', undefined, 2)).toThrow(Error)
+		})
+
+		test('should return an array', () => {
+			const classifier = new Classifier()
+
+			expect(classifier.predict('test')).toBeInstanceOf(Array)
+		})
+
+		test('should return one prediction when trained with a sample', () => {
+			const classifier = new Classifier()
+
+			classifier.train('hello world', 'test')
+
+			expect(classifier.predict('hello world').length).toStrictEqual(1)
+		})
+
+		test('should not include predictions with a confidence below the configured minimumConfidence', () => {
+			const classifier = new Classifier()
+
+			classifier.train('hello world', 'test')
+
+			const minimumConfidence = 0.8
+
+			const predictions = classifier.predict(
+				'hello',
+				undefined,
+				minimumConfidence
+			)
+
+			expect(
+				predictions.filter((prediction) => {
+					return prediction.confidence < minimumConfidence
+				}).length
+			).toStrictEqual(0)
+		})
+
+		test('should not update the model vocabulary', () => {
+			const classifier = new Classifier()
+
+			classifier.train('hello world', 'test')
+			classifier.predict('hello foo world')
+
+			expect(classifier.model.vocabulary.has('foo')).toStrictEqual(false)
+		})
+	})
+})
diff --git a/test/Model.test.js b/test/Model.test.js
new file mode 100644
index 0000000..33fff16
--- /dev/null
+++ b/test/Model.test.js
@@ -0,0 +1,206 @@
+import Model from '../src/Model'
+import Vocabulary from '../src/Vocabulary'
+
+describe('Model', () => {
+	describe('constructor', () => {
+		test('should throw an error if config is not an object literal', () => {
+			expect(() => new Model([])).toThrow(Error)
+		})
+
+		test('should throw an error if config option nGramMin is not a number', () => {
+			expect(
+				() =>
+					new Model({
+						nGramMin: ''
+					})
+			).toThrow(Error)
+		})
+
+		test('should throw an error if config option nGramMax is not a number', () => {
+			expect(
+				() =>
+					new Model({
+						nGramMax: ''
+					})
+			).toThrow(Error)
+		})
+
+		test('should throw an error if config option nGramMin is less than 1', () => {
+			expect(
+				() =>
+					new Model({
+						nGramMin: 0
+					})
+			).toThrow(Error)
+		})
+
+		test('should throw an error if config option nGramMax is less than 1', () => {
+			expect(
+				() =>
+					new Model({
+						nGramMax: 0
+					})
+			).toThrow(Error)
+		})
+
+		test('should throw an error if config option nGramMax is less than nGramMin', () => {
+			expect(
+				() =>
+					new Model({
+						nGramMin: 2,
+						nGramMax: 1
+					})
+			).toThrow(Error)
+		})
+
+		test('should throw an error if data is not an object literal', () => {
+			expect(
+				() =>
+					new Model({
+						data: []
+					})
+			).toThrow(Error)
+		})
+	})
+
+	describe('nGramMin', () => {
+		test('should return a number', () => {
+			const model = new Model()
+
+			expect(typeof model.nGramMin).toStrictEqual('number')
+		})
+
+		test('should return the current nGramMin value', () => {
+			const model = new Model({
+				nGramMin: 3,
+				nGramMax: 4
+			})
+
+			expect(model.nGramMin).toStrictEqual(3)
+		})
+
+		test('should set the nGramMin value', () => {
+			const model = new Model()
+
+			model.nGramMin = 2
+
+			expect(model.nGramMin).toStrictEqual(2)
+		})
+
+		test('should throw an error if size is not an integer', () => {
+			const model = new Model()
+
+			expect(() => {
+				model.nGramMin = 1.1
+			}).toThrow(Error)
+		})
+	})
+
+	describe('nGramMax', () => {
+		test('should return a number', () => {
+			const model = new Model()
+
+			expect(typeof model.nGramMax).toStrictEqual('number')
+		})
+
+		test('should return the current nGramMax value', () => {
+			const model = new Model({
+				nGramMax: 2
+			})
+
+			expect(model.nGramMax).toStrictEqual(2)
+		})
+
+		test('should set the nGramMax value', () => {
+			const model = new Model()
+
+			model.nGramMax = 3
+
+			expect(model.nGramMax).toStrictEqual(3)
+		})
+
+		test('should throw an error if size is not an integer', () => {
+			const model = new Model()
+
+			expect(() => {
+				model.nGramMax = 1.1
+			}).toThrow(Error)
+		})
+	})
+
+	describe('vocabulary', () => {
+		test('should return a vocabulary instance', () => {
+			const model = new Model()
+
+			expect(model.vocabulary).toBeInstanceOf(Vocabulary)
+		})
+
+		test('should return false when vocabulary is configured to false', () => {
+			const model = new Model({
+				vocabulary: false
+			})
+
+			expect(model.vocabulary).toStrictEqual(false)
+		})
+
+		test('should set the vocabulary value when passing an array', () => {
+			const model = new Model()
+
+			model.vocabulary = ['hello', 'world']
+
+			expect(Array.from(model.vocabulary.terms)).toStrictEqual([
+				'hello',
+				'world'
+			])
+		})
+
+		test('should set the vocabulary value when passing false', () => {
+			const model = new Model()
+
+			model.vocabulary = false
+
+			expect(model.vocabulary).toStrictEqual(false)
+		})
+	})
+
+	describe('data', () => {
+		test('should return an object literal', () => {
+			const model = new Model()
+
+			expect(model.data).toStrictEqual({})
+		})
+
+		test('should set the model data', () => {
+			const model = new Model()
+
+			model.data = {
+				test: { 0: 1 }
+			}
+
+			expect(model.data).toStrictEqual({
+				test: { 0: 1 }
+			})
+		})
+
+		test('should throw an error if data is not an object literal', () => {
+			const model = new Model()
+
+			expect(() => {
+				model.data = []
+			}).toThrow(Error)
+		})
+	})
+
+	describe('serialize', () => {
+		test('should return an object literal created from the current model', () => {
+			const model = new Model()
+
+			expect(model.serialize()).toStrictEqual({
+				nGramMin: 1,
+				nGramMax: 1,
+				vocabulary: [],
+				data: {}
+			})
+		})
+	})
+})
diff --git a/test/Prediction.test.js b/test/Prediction.test.js
new file mode 100644
index 0000000..d2b3f84
--- /dev/null
+++ b/test/Prediction.test.js
@@ -0,0 +1,73 @@
+import Prediction from '../src/Prediction'
+
+describe('Prediction', () => {
+	describe('constructor', () => {
+		test('should throw an error if prediction is not an object literal', () => {
+			expect(() => new Prediction([])).toThrow(Error)
+		})
+	})
+
+	describe('label', () => {
+		test('should throw an error if label is not a string', () => {
+			const prediction = new Prediction()
+
+			expect(() => {
+				prediction.label = []
+			}).toThrow(Error)
+		})
+
+		test('should return a string', () => {
+			const prediction = new Prediction()
+
+			expect(typeof prediction.label).toStrictEqual('string')
+		})
+
+		test('should return the defined prediction label', () => {
+			const prediction = new Prediction({
+				label: 'test'
+			})
+
+			expect(prediction.label).toStrictEqual('test')
+		})
+
+		test('should set the prediction label', () => {
+			const prediction = new Prediction()
+
+			prediction.label = 'test'
+
+			expect(prediction.label).toStrictEqual('test')
+		})
+	})
+
+	describe('confidence', () => {
+		test('should throw an error if confidence is not a number', () => {
+			const prediction = new Prediction()
+
+			expect(() => {
+				prediction.confidence = 'test'
+			}).toThrow(Error)
+		})
+
+		test('should return a number', () => {
+			const prediction = new Prediction()
+
+			expect(typeof prediction.confidence).toStrictEqual('number')
+		})
+
+		test('should return the defined prediction confidence', () => {
+			const prediction = new Prediction({
+				confidence: 0.5
+			})
+
+			expect(prediction.confidence).toBeCloseTo(0.5)
+		})
+
+		test('should set the prediction confidence', () => {
+			const prediction = new Prediction()
+
+			prediction.confidence = 1
+
+			expect(prediction.confidence).toStrictEqual(1)
+		})
+	})
+})
diff --git a/test/Vocabulary.test.js b/test/Vocabulary.test.js
new file mode 100644
index 0000000..95ed3f7
--- /dev/null
+++ b/test/Vocabulary.test.js
@@ -0,0 +1,177 @@
+import Vocabulary from '../src/Vocabulary'
+
+describe('Vocabulary', () => {
+	describe('constructor', () => {
+		test('should throw an error if terms is not an array or set', () => {
+			expect(() => new Vocabulary({})).toThrow(Error)
+		})
+	})
+
+	describe('size', () => {
+		test('should return a number', () => {
+			const vocabulary = new Vocabulary()
+
+			expect(typeof vocabulary.size).toStrictEqual('number')
+		})
+
+		test('should return the vocabulary size', () => {
+			const vocabulary = new Vocabulary(['hello'])
+
+			expect(vocabulary.size).toStrictEqual(1)
+		})
+	})
+
+	describe('terms', () => {
+		test('should return a set instance', () => {
+			const vocabulary = new Vocabulary()
+
+			expect(vocabulary.terms).toBeInstanceOf(Set)
+		})
+
+		test('should return the vocabulary terms', () => {
+			const vocabulary = new Vocabulary(['hello'])
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual(['hello'])
+		})
+
+		test('should set the vocabulary terms from an array', () => {
+			const vocabulary = new Vocabulary()
+
+			vocabulary.terms = ['hello', 'world']
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual([
+				'hello',
+				'world'
+			])
+		})
+
+		test('should set the vocabulary terms from a set', () => {
+			const vocabulary = new Vocabulary()
+
+			vocabulary.terms = new Set(['hello', 'world'])
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual([
+				'hello',
+				'world'
+			])
+		})
+
+		test('should throw an error if terms is not an array or set', () => {
+			const vocabulary = new Vocabulary()
+
+			expect(() => {
+				vocabulary.terms = {}
+			}).toThrow(Error)
+		})
+	})
+
+	describe('add', () => {
+		test('should throw an error if terms is not a string, array or set', () => {
+			const vocabulary = new Vocabulary()
+
+			expect(() => vocabulary.add({})).toThrow(Error)
+		})
+
+		test('should add a term to the vocabulary from a string', () => {
+			const vocabulary = new Vocabulary()
+
+			vocabulary.add('test')
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual(['test'])
+		})
+
+		test('should add terms to the vocabulary from an array', () => {
+			const vocabulary = new Vocabulary()
+
+			vocabulary.add(['hello', 'world'])
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual([
+				'hello',
+				'world'
+			])
+		})
+
+		test('should add terms to the vocabulary from a set', () => {
+			const vocabulary = new Vocabulary()
+
+			vocabulary.add(new Set(['hello', 'world']))
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual([
+				'hello',
+				'world'
+			])
+		})
+
+		test('should return vocabulary instance', () => {
+			const vocabulary = new Vocabulary()
+
+			expect(vocabulary.add('test')).toBeInstanceOf(Vocabulary)
+		})
+	})
+
+	describe('remove', () => {
+		test('should throw an error if terms is not a string, array or set', () => {
+			const vocabulary = new Vocabulary()
+
+			expect(() => vocabulary.remove({})).toThrow(Error)
+		})
+
+		test('should remove a term to the vocabulary when called with a string', () => {
+			const vocabulary = new Vocabulary(['test'])
+
+			vocabulary.remove('test')
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual([])
+		})
+
+		test('should remove terms from the vocabulary when called with an array', () => {
+			const vocabulary = new Vocabulary(['hello', 'world'])
+
+			vocabulary.remove(['world'])
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual(['hello'])
+		})
+
+		test('should remove terms from the vocabulary when called with a set', () => {
+			const vocabulary = new Vocabulary(['hello', 'world'])
+
+			vocabulary.remove(new Set(['world']))
+
+			expect(Array.from(vocabulary.terms)).toStrictEqual(['hello'])
+		})
+
+		test('should return a vocabulary instance', () => {
+			const vocabulary = new Vocabulary(['test'])
+
+			expect(vocabulary.remove('test')).toBeInstanceOf(Vocabulary)
+		})
+	})
+
+	describe('has', () => {
+		test('should return a boolean', () => {
+			const vocabulary = new Vocabulary()
+
+			expect(typeof vocabulary.has('test')).toStrictEqual('boolean')
+		})
+
+		test('should return whether a term exists in the vocabulary', () => {
+			const vocabulary = new Vocabulary(['test'])
+
+			expect(vocabulary.has('test')).toStrictEqual(true)
+		})
+	})
+
+	describe('indexOf', () => {
+		test('should return the index of an existing vocabulary term', () => {
+			const vocabulary = new Vocabulary(['test'])
+
+			expect(vocabulary.indexOf('test')).toStrictEqual(0)
+		})
+
+		test('should return -1 for non-existing vocabulary terms', () => {
+			const vocabulary = new Vocabulary()
+
+			expect(vocabulary.indexOf('test')).toStrictEqual(-1)
+		})
+	})
+})
diff --git a/test/classifier.js b/test/classifier.js
deleted file mode 100644
index a8b3791..0000000
--- a/test/classifier.js
+++ /dev/null
@@ -1,363 +0,0 @@
-import { assert, expect } from 'chai'
-import Classifier from '../src/classifier'
-import Model from '../src/model'
-
-describe('Classifier', () => {
-    describe('constructor', () => {
-        it('should set the model when passed a model instance', () => {
-            const classifier = new Classifier(new Model({
-                nGramMax: 4
-            }))
-
-            expect(classifier.model.nGramMax).to.equal(4)
-        })
-
-        it('should set the model when passed an object literal', () => {
-            const classifier = new Classifier({
-                nGramMax: 5
-            })
-
-            expect(classifier.model.nGramMax).to.equal(5)
-        })
-    })
-
-    describe('model', () => {
-        it('should return a model instance', () => {
-            let classifier = new Classifier()
-
-            assert.instanceOf(classifier.model, Model)
-        })
-
-        it('should set the current model when passed a model instance', () => {
-            let classifier = new Classifier()
-
-            classifier.model = new Model({
-                nGramMax: 3
-            })
-
-            expect(classifier.model.nGramMax).to.equal(3)
-        })
-
-        it('should set the current model to a new model instance when passed an object literal', () => {
-            let classifier = new Classifier()
-
-            classifier.model = {}
-
-            assert.instanceOf(classifier.model, Model)
-        })
-    })
-
-    describe('splitWords', () => {
-        it('should throw an error if input is not a string', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.splitWords(1)).to.throw(Error)
-        })
-
-        it('should split a string into an array of words', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.splitWords('Hello world!')).to.eql(
-                ['hello', 'world']
-            )
-        })
-    })
-
-    describe('tokenize', () => {
-        it('should throw an error if input is neither a string or array', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.tokenize({})).to.throw(Error)
-        })
-
-        it('should throw an error if nGramMax is less than nGramMin in model config', () => {
-            const classifier = new Classifier()
-
-            classifier.model.nGramMin = 2
-            
-            expect(() => classifier.tokenize('Hello world!')).to.throw(Error)
-        })
-
-        it('should return an object literal of tokens and their occurrences from a string', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.tokenize('Hello world!')).to.eql({
-                hello: 1,
-                world: 1
-            })
-        })
-
-        it('should return an object literal of tokens and their occurrences from a string', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.tokenize('Hello world!')).to.eql({
-                hello: 1,
-                world: 1
-            })
-        })
-
-        it('should return an object literal of tokens and their occurrences from a array', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.tokenize(['hello', 'world'])).to.eql({
-                hello: 1,
-                world: 1
-            })
-        })
-
-        it('should return an object literal of bigrams when nGramMin/nGramMax is 2', () => {
-            const classifier = new Classifier({
-                nGramMin: 2,
-                nGramMax: 2
-            })
-
-            expect(classifier.tokenize('Hello world!')).to.eql({
-                'hello world': 1
-            })
-        })
-
-        it('should return an object literal of unigrams and bigrams when nGramMin/nGramMax is 1/2', () => {
-            const classifier = new Classifier({
-                nGramMin: 1,
-                nGramMax: 2
-            })
-
-            expect(classifier.tokenize('Hello world!')).to.eql({
-                'hello': 1,
-                'hello world': 1,
-                'world': 1
-            })
-        })
-
-        it('should increment the occurrence of the duplicate tokens', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.tokenize('Hello hello!')).to.eql({
-                'hello': 2
-            })
-        })
-    })
-
-    describe('vectorize', () => {
-        it('should throw an error if input is not an object literal', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.vectorize([])).to.throw(Error)
-        })
-
-        it('should throw an error if vocabulary config option is set to false', () => {
-            const classifier = new Classifier({
-                vocabulary: false
-            })
-
-            expect(() => classifier.vectorize('hello')).to.throw(Error)
-        })
-
-        it('should convert key to its corresponding vocabulary term index', () => {
-            const classifier = new Classifier()
-            const tokens = classifier.tokenize('Hello')
-
-            expect(classifier.vectorize(tokens)).to.eql({ 0: 1 })
-        })
-
-        it('should use existing term index when token is already in vocabulary', () => {
-            const classifier = new Classifier({
-                vocabulary: ['hello', 'world']
-            })
-
-            const tokens = classifier.tokenize('world')
-
-            expect(classifier.vectorize(tokens)).to.eql({ 1: 1 })
-        })
-
-        it('should add new tokens to the vocabulary', () => {
-            const classifier = new Classifier()
-
-            const tokens = classifier.tokenize('Hello world')
-
-            classifier.vectorize(tokens)
-
-            const terms = classifier.model.vocabulary.terms
-
-            expect(Array.from(terms)).to.eql(['hello', 'world'])
-        })
-    })
-
-    describe('train', () => {
-        it('should throw an error if input is not a string or array', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.train({}, 'test')).to.throw(Error)
-        })
-
-        it('should throw an error if label is not a string', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.train('test', [])).to.throw(Error)
-        })
-        
-        it('should add tokens to the vocabulary (if not configured to false)', () => {
-            const classifier = new Classifier()
-
-            classifier.train('hello world', 'test')
-
-            const vocabulary = classifier.model.vocabulary
-
-            expect(vocabulary.size).to.equal(2)
-        })
-
-        it('should add tokens (and their occurrences) to the model from a string', () => {
-            const classifier = new Classifier()
-
-            classifier.train('hello world', 'test')
-
-            const model = classifier.model
-
-            expect(model.data).to.eql({
-                test: { 0: 1, 1: 1 }
-            })
-        })
-
-        it('should add tokens (and their occurrences) to the model from an array of strings', () => {
-            const classifier = new Classifier()
-
-            classifier.train([
-                'hello world',
-                'foo', 'bar'
-            ], 'test')
-
-            const model = classifier.model
-
-            expect(model.data).to.eql({
-                test: { 0: 1, 1: 1, 2: 1, 3: 1 }
-            })
-        })
-
-        it('should increment the occurrence of an existing vocabulary term', () => {
-            const classifier = new Classifier()
-
-            classifier.train([
-                'hello world',
-                'foo', 'hello'
-            ], 'test')
-
-            const model = classifier.model
-
-            expect(model.data).to.eql({
-                test: { 0: 2, 1: 1, 2: 1 }
-            })
-        })
-
-        it('should return classifier instance', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.train('hello world', 'test')).to.equal(classifier)
-        })
-    })
-    
-    describe('cosineSimilarity', () => {
-        it('should throw an error if v1 is not an object literal', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.cosineSimilarity(false, {})).to.throw(Error)
-        })
-
-        it('should throw an error if v2 is not an object literal', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.cosineSimilarity({}, false)).to.throw(Error)
-        })
-
-        it('should return 1 on identical object literals', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.cosineSimilarity({
-                0: 1
-            }, {
-                0: 1
-            })).to.equal(1)
-        })
-
-        it('should return 0 on object literals with no similarity', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.cosineSimilarity({
-                0: 1
-            }, {
-                1: 1
-            })).to.equal(0)
-        })
-
-        it('should return > 0 on similar object literals', () => {
-            const classifier = new Classifier()
-
-            assert.isAbove(classifier.cosineSimilarity({
-                0: 1,
-                1: 1
-            }, {
-                0: 1,
-                2: 1
-            }), 0)
-        })
-
-        it('should return 0 when sum of v1 is 0', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.cosineSimilarity({
-                0: 0
-            }, {
-                0: 1
-            })).to.equal(0)
-        })
-
-        it('should return 0 when sum of v2 is 0', () => {
-            const classifier = new Classifier()
-
-            expect(classifier.cosineSimilarity({
-                0: 1
-            }, {
-                0: 0
-            })).to.equal(0)
-        })
-    })
-
-    describe('predict', () => {
-        it('should throw an error if input is not a string', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.predict([])).to.throw(Error)
-        })
-
-        it('should throw an error if minimumConfidence is not a number', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.predict('', null, '')).to.throw(Error)
-        })
-
-        it('should throw an error if minimumConfidence is lower than 0', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.predict('', null, -1)).to.throw(Error)
-        })
-
-        it('should throw an error if minimumConfidence is higher than 1', () => {
-            const classifier = new Classifier()
-
-            expect(() => classifier.predict('', null, 2)).to.throw(Error)
-        })
-
-        it('should return an array', () => {
-            const classifier = new Classifier()
-
-            assert.typeOf(classifier.predict('test'), 'array')
-        })
-
-        it('should return one prediction when trained with a sample', () => {
-            const classifier = new Classifier()
-
-            classifier.train('hello world', 'test')
-
-            assert.lengthOf(classifier.predict('hello world'), 1)
-        })
-    })
-})
diff --git a/test/model.js b/test/model.js
deleted file mode 100644
index 8de3f7a..0000000
--- a/test/model.js
+++ /dev/null
@@ -1,186 +0,0 @@
-import { assert, expect } from 'chai'
-import Model from '../src/model'
-import Vocabulary from '../src/vocabulary'
-
-describe('Model', () => {
-    describe('constructor', () => {
-        it('should throw an error if config is not an object literal', () => {
-            expect(() => new Model([])).to.throw(Error)
-        })
-
-        it('should throw an error if config option nGramMin is not a number', () => {
-            expect(() => new Model({
-                nGramMin: ''
-            })).to.throw(Error)
-        })
-
-        it('should throw an error if config option nGramMax is not a number', () => {
-            expect(() => new Model({
-                nGramMax: ''
-            })).to.throw(Error)
-        })
-
-        it('should throw an error if config option nGramMin is less than 1', () => {
-            expect(() => new Model({
-                nGramMin: 0
-            })).to.throw(Error)
-        })
-
-        it('should throw an error if config option nGramMax is less than 1', () => {
-            expect(() => new Model({
-                nGramMax: 0
-            })).to.throw(Error)
-        })
-
-        it('should throw an error if config option nGramMax is less than nGramMin', () => {
-            expect(() => new Model({
-                nGramMin: 2,
-                nGramMax: 1
-            })).to.throw(Error)
-        })
-
-        it('should throw an error if data is not an object literal', () => {
-            expect(() => new Model({
-                data: []
-            })).to.throw(Error)
-        })
-    })
-
-    describe('nGramMin', () => {
-        it('should return a number', () => {
-            const model = new Model()
-
-            expect(model.nGramMin).to.be.a('number')
-        })
-
-        it('should return the current nGramMin value', () => {
-            const model = new Model({
-                nGramMin: 3,
-                nGramMax: 4
-            })
-
-            expect(model.nGramMin).to.equal(3)
-        })
-        
-        it('should set the nGramMin value', () => {
-            const model = new Model()
-
-            model.nGramMin = 2
-
-            expect(model.nGramMin).to.equal(2)
-        })
-
-        it('should throw an error if size is not an integer', () => {
-            const model = new Model()
-            
-            expect(() => {
-                model.nGramMin = 1.1
-            }).to.throw(Error)
-        })
-    })
-
-    describe('nGramMax', () => {
-        it('should return a number', () => {
-            const model = new Model()
-
-            expect(model.nGramMax).to.be.a('number')
-        })
-
-        it('should return the current nGramMax value', () => {
-            const model = new Model({
-                nGramMax: 2
-            })
-
-            expect(model.nGramMax).to.equal(2)
-        })
-
-        it('should set the nGramMax value', () => {
-            const model = new Model()
-
-            model.nGramMax = 3
-
-            expect(model.nGramMax).to.equal(3)
-        })
-
-        it('should throw an error if size is not an integer', () => {
-            const model = new Model()
-
-            expect(() => {
-                model.nGramMax = 1.1
-            }).to.throw(Error)
-        })
-    })
-
-    describe('vocabulary', () => {
-        it('should return a vocabulary instance', () => {
-            const model = new Model()
-
-            assert.instanceOf(model.vocabulary, Vocabulary)
-        })
-
-        it('should return false when vocabulary is configured to false', () => {
-            const model = new Model({
-                vocabulary: false
-            })
-
-            expect(model.vocabulary).to.equal(false)
-        })
-
-        it('should set the vocabulary value when passing an array', () => {
-            const model = new Model()
-
-            model.vocabulary = ['hello', 'world']
-
-            expect(Array.from(model.vocabulary.terms)).to.eql(['hello', 'world'])
-        })
-
-        it('should set the vocabulary value when passing false', () => {
-            const model = new Model()
-
-            model.vocabulary = false
-
-            assert.isFalse(model.vocabulary)
-        }) 
-    })
-
-    describe('data', () => {
-        it('should return an object literal', () => {
-            const model = new Model()
-
-            expect(model.data).to.eql({})
-        })
-
-        it('should set the model data', () => {
-            const model = new Model()
-
-            model.data = {
-                test: { 0: 1 }
-            }
-
-            expect(model.data).to.eql({
-                test: { 0: 1 }
-            })
-        })
-
-        it('should throw an error if data is not an object literal', () => {
-            const model = new Model()
-
-            expect(() => {
-                model.data = []
-            }).to.throw(Error)
-        })
-    })
-
-    describe('serialize', () => {
-        it('should return an object literal created from the current model', () => {
-            const model = new Model()
-
-            expect(model.serialize()).to.eql({
-                nGramMin: 1,
-                nGramMax: 1,
-                vocabulary: [],
-                data: {}
-            })
-        })
-    })
-})
diff --git a/test/prediction.js b/test/prediction.js
deleted file mode 100644
index 61be86d..0000000
--- a/test/prediction.js
+++ /dev/null
@@ -1,74 +0,0 @@
-import { assert, expect } from 'chai'
-import Prediction from '../src/prediction'
-
-describe('Prediction', () => {
-    describe('constructor', () => {
-        it('should throw an error if prediction is not an object literal', () => {
-            expect(() => new Prediction([])).to.throw(Error)
-        })
-    })
-
-    describe('label', () => {
-        it('should throw an error if label is not a string', () => {
-            const prediction = new Prediction()
-
-            expect(() => {
-                prediction.label = []
-            }).to.throw(Error)
-        })
-
-        it('should return a string', () => {
-            const prediction = new Prediction()
-
-            expect(prediction.label).to.be.a('string')
-        })
-
-        it('should return the defined prediction label', () => {
-            const prediction = new Prediction({
-                label: 'test'
-            })
-
-            expect(prediction.label).to.equal('test')
-        })
-
-        it('should set the prediction label', () => {
-            const prediction = new Prediction()
-
-            prediction.label = 'test'
-
-            expect(prediction.label).to.equal('test')
-        })
-    })
-
-    describe('confidence', () => {
-        it('should throw an error if confidence is not a number', () => {
-            const prediction = new Prediction()
-
-            expect(() => {
-                prediction.confidence = 'test'
-            }).to.throw(Error)
-        })
-
-        it('should return a number', () => {
-            const prediction = new Prediction()
-
-            expect(prediction.confidence).to.be.a('number')
-        })
-
-        it('should return the defined prediction confidence', () => {
-            const prediction = new Prediction({
-                confidence: 0.5
-            })
-
-            expect(prediction.confidence).to.equal(0.5)
-        })
-
-        it('should set the prediction confidence', () => {
-            const prediction = new Prediction()
-
-            prediction.confidence = 1
-
-            expect(prediction.confidence).to.equal(1)
-        })
-    })
-})
diff --git a/test/vocabulary.js b/test/vocabulary.js
deleted file mode 100644
index 46db994..0000000
--- a/test/vocabulary.js
+++ /dev/null
@@ -1,166 +0,0 @@
-import { assert, expect } from 'chai'
-import Vocabulary from '../src/vocabulary'
-
-describe('Vocabulary', () => {
-    describe('constructor', () => {
-        it('should throw an error if terms is not an array or set', () => {
-            expect(() => new Vocabulary({})).to.throw(Error)
-        })
-    })
-
-    describe('size', () => {
-        it('should return a number', () => {
-            const vocabulary = new Vocabulary()
-
-            expect(vocabulary.size).to.be.a('number')
-        })
-
-        it('should return the vocabulary size', () => {
-            const vocabulary = new Vocabulary([ 'hello' ])
-
-            expect(vocabulary.size).to.equal(1)
-        })
-    })
-
-    describe('terms', () => {
-        it('should return a set instance', () => {
-            const vocabulary = new Vocabulary()
-
-            assert.instanceOf(vocabulary.terms, Set)
-        })
-
-        it('should return the vocabulary terms', () => {
-            const vocabulary = new Vocabulary(['hello'])
-
-            expect(Array.from(vocabulary.terms)).to.eql(['hello'])
-        })
-
-        it('should set the vocabulary terms from an array', () => {
-            const vocabulary = new Vocabulary()
-
-            vocabulary.terms = ['hello', 'world']
-
-            expect(Array.from(vocabulary.terms)).to.eql(['hello', 'world'])
-        })
-
-        it('should set the vocabulary terms from a set', () => {
-            const vocabulary = new Vocabulary()
-
-            vocabulary.terms = new Set(['hello', 'world'])
-
-            expect(Array.from(vocabulary.terms)).to.eql(['hello', 'world'])
-        })
-
-        it('should throw an error if terms is not an array or set', () => {
-            const vocabulary = new Vocabulary()
-
-            expect(() => {
-                vocabulary.terms = {}
-            }).to.throw(Error)
-        })
-    })
-
-    describe('add', () => {
-        it('should throw an error if terms is not a string, array or set', () => {
-            const vocabulary = new Vocabulary()
-
-            expect(() => vocabulary.add({})).to.throw(Error)
-        })
-
-        it('should add a term to the vocabulary from a string', () => {
-            const vocabulary = new Vocabulary()
-
-            vocabulary.add('test')
-
-            expect(Array.from(vocabulary.terms)).to.eql(['test'])
-        })
-
-        it('should add terms to the vocabulary from an array', () => {
-            const vocabulary = new Vocabulary()
-
-            vocabulary.add(['hello', 'world'])
-
-            expect(Array.from(vocabulary.terms)).to.eql(['hello', 'world'])
-        })
-        
-        it('should add terms to the vocabulary from a set', () => {
-            const vocabulary = new Vocabulary()
-
-            vocabulary.add(new Set(['hello', 'world']))
-
-            expect(Array.from(vocabulary.terms)).to.eql(['hello', 'world'])
-        })
-
-        it('should return vocabulary instance', () => {
-            const vocabulary = new Vocabulary()
-
-            assert.instanceOf(vocabulary.add('test'), Vocabulary)
-        })
-    })
-
-    describe('remove', () => {
-        it('should throw an error if terms is not a string, array or set', () => {
-            const vocabulary = new Vocabulary()
-
-            expect(() => vocabulary.remove({})).to.throw(Error)
-        })
-
-        it('should remove a term to the vocabulary when called with a string', () => {
-            const vocabulary = new Vocabulary(['test'])
-
-            vocabulary.remove('test')
-
-            expect(Array.from(vocabulary.terms)).to.eql([])
-        })
-
-        it('should remove terms from the vocabulary when called with an array', () => {
-            const vocabulary = new Vocabulary(['hello', 'world'])
-
-            vocabulary.remove(['world'])
-
-            expect(Array.from(vocabulary.terms)).to.eql(['hello'])
-        })
-
-        it('should remove terms from the vocabulary when called with a set', () => {
-            const vocabulary = new Vocabulary(['hello', 'world'])
-
-            vocabulary.remove(new Set(['world']))
-
-            expect(Array.from(vocabulary.terms)).to.eql(['hello'])
-        })
-
-        it('should return a vocabulary instance', () => {
-            const vocabulary = new Vocabulary(['test'])
-
-            assert.instanceOf(vocabulary.remove('test'), Vocabulary)
-        })
-    })
-
-    describe('has', () => {
-        it('should return a boolean', () => {
-            const vocabulary = new Vocabulary()
-
-            assert.isBoolean(vocabulary.has('test'))
-        })
-
-        it('should return whether a term exists in the vocabulary', () => {
-            const vocabulary = new Vocabulary(['test'])
-
-            assert.isTrue(vocabulary.has('test'))
-        })
-    })
-
-    describe('indexOf', () => {
-        it('should return the index of an existing vocabulary term', () => {
-            const vocabulary = new Vocabulary(['test'])
-
-            expect(vocabulary.indexOf('test')).to.equal(0)
-        })
-
-        it('should return -1 for non-existing vocabulary terms', () => {
-            const vocabulary = new Vocabulary()
-
-            expect(vocabulary.indexOf('test')).to.equal(-1)
-        })
-    })
-})

From c46753fc7bec52ff7567c59b18ec312e7fa9205c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:17:52 +0100
Subject: [PATCH 11/19] Fix casing in class filenames

---
 src/{classifier.js => Classifier.js} | 0
 src/{model.js => Model.js}           | 0
 src/{prediction.js => Prediction.js} | 0
 src/{vocabulary.js => Vocabulary.js} | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename src/{classifier.js => Classifier.js} (100%)
 rename src/{model.js => Model.js} (100%)
 rename src/{prediction.js => Prediction.js} (100%)
 rename src/{vocabulary.js => Vocabulary.js} (100%)

diff --git a/src/classifier.js b/src/Classifier.js
similarity index 100%
rename from src/classifier.js
rename to src/Classifier.js
diff --git a/src/model.js b/src/Model.js
similarity index 100%
rename from src/model.js
rename to src/Model.js
diff --git a/src/prediction.js b/src/Prediction.js
similarity index 100%
rename from src/prediction.js
rename to src/Prediction.js
diff --git a/src/vocabulary.js b/src/Vocabulary.js
similarity index 100%
rename from src/vocabulary.js
rename to src/Vocabulary.js

From edc0bc55a046ec1c19491f302f982d31ac55c851 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:26:24 +0100
Subject: [PATCH 12/19] Add Prediction to exports to allow for type checks

---
 src/index.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/index.js b/src/index.js
index 4332d7a..c995579 100644
--- a/src/index.js
+++ b/src/index.js
@@ -2,6 +2,7 @@ import Classifier from './Classifier'
 
 export { default as Model } from './Model'
 export { default as Vocabulary } from './Vocabulary'
+export { default as Prediction } from './Prediction'
 export { Classifier as Classifier }
 
 export default Classifier

From 9d1a34d51972bbbbd12f8429ad6feeafa0a53c94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:33:48 +0100
Subject: [PATCH 13/19] Update docs

---
 docs/classifier.md | 89 +++++++++++++++++++++++++---------------------
 docs/model.md      | 35 ++++++++++--------
 docs/prediction.md |  8 +++--
 docs/vocabulary.md | 55 +++++++++++++++-------------
 4 files changed, 103 insertions(+), 84 deletions(-)

diff --git a/docs/classifier.md b/docs/classifier.md
index 90ae8e3..6c584ef 100644
--- a/docs/classifier.md
+++ b/docs/classifier.md
@@ -2,90 +2,97 @@
 
 ## Classifier
 
-* [Classifier](#Classifier)
-    * [new Classifier([model])](#new_Classifier_new)
-    * [.model](#Classifier+model) : <code>Model</code>
-    * [.train(input, label)](#Classifier+train) ⇒ <code>this</code>
-    * [.predict(input, [maxMatches], [minimumConfidence])](#Classifier+predict) ⇒ <code>Array</code>
-    * [.splitWords(input)](#Classifier+splitWords) ⇒ <code>Array</code>
-    * [.tokenize(input)](#Classifier+tokenize) ⇒ <code>Object</code>
-    * [.vectorize(tokens)](#Classifier+vectorize) ⇒ <code>Object</code>
-    * [.cosineSimilarity(v1, v2)](#Classifier+cosineSimilarity) ⇒ <code>float</code>
-
-<a name="new_Classifier_new"></a>
+-   [Classifier](#Classifier)
+    -   [new Classifier([model])](#new_Classifier)
+    -   [.model](#Classifier+model) : <code>Model</code>
+    -   [.train(input, label)](#Classifier+train) ⇒ <code>this</code>
+    -   [.predict(input, [maxMatches], [minimumConfidence])](#Classifier+predict) ⇒ <code>Array</code>
+    -   [.splitWords(input)](#Classifier+splitWords) ⇒ <code>Array</code>
+    -   [.tokenize(input)](#Classifier+tokenize) ⇒ <code>Object</code>
+    -   [.vectorize(tokens)](#Classifier+vectorize) ⇒ <code>Object</code>
+    -   [.cosineSimilarity(v1, v2)](#Classifier+cosineSimilarity) ⇒ <code>float</code>
+
+<a name="new_Classifier"></a>
 
 ### new Classifier([model])
 
-| Param | Type | Default | Description |
-| --- | --- | --- | --- |
-| [model] | `Model` \| `Object` |  |  |
-| [model.nGramMin] | `int` | `1` | Minimum n-gram size |
-| [model.nGramMax] | `int` | `1` | Maximum n-gram size |
-| [model.vocabulary] | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data, set to `false` to store terms directly in the data entries |
-| [model.data] | `Object` | `{}` | Key-value store of labels and training data vectors |
+| Param              | Type                        | Default | Description                                                                                           |
+| ------------------ | --------------------------- | ------- | ----------------------------------------------------------------------------------------------------- |
+| [model]            | `Model` \| `Object`         |         |                                                                                                       |
+| [model.nGramMin]   | `int`                       | `1`     | Minimum n-gram size                                                                                   |
+| [model.nGramMax]   | `int`                       | `1`     | Maximum n-gram size                                                                                   |
+| [model.vocabulary] | `Array` \| `Set` \| `false` | `[]`    | Terms mapped to indexes in the model data, set to `false` to store terms directly in the data entries |
+| [model.data]       | `Object`                    | `{}`    | Key-value store of labels and training data vectors                                                   |
 
 <a name="Classifier+model"></a>
 
 ### classifier.model : `Model`
+
 Model instance
 
 <a name="Classifier+train"></a>
 
 ### classifier.train(input, label) ⇒ `this`
+
 Train the current model using an input string (or array of strings) and a corresponding label
 
-| Param | Type | Description |
-| --- | --- | --- |
+| Param | Type                | Description                    |
+| ----- | ------------------- | ------------------------------ |
 | input | `string` \| `Array` | String, or an array of strings |
-| label | `string` | Corresponding label |
+| label | `string`            | Corresponding label            |
 
 <a name="Classifier+predict"></a>
 
 ### classifier.predict(input, [maxMatches], [minimumConfidence]) ⇒ `Array`
+
 Return an array of one or more Prediction instances
 
-| Param | Type | Default | Description |
-| --- | --- | --- | --- |
-| input | `string` |  | Input string to make a prediction from |
-| [maxMatches] | `int` | `1` | Maximum number of predictions to return |
-| [minimumConfidence] | `float` | `0.2` | Minimum confidence required to include a prediction |
+| Param               | Type     | Default | Description                                         |
+| ------------------- | -------- | ------- | --------------------------------------------------- |
+| input               | `string` |         | Input string to make a prediction from              |
+| [maxMatches]        | `int`    | `1`     | Maximum number of predictions to return             |
+| [minimumConfidence] | `float`  | `0.2`   | Minimum confidence required to include a prediction |
 
 <a name="Classifier+splitWords"></a>
 
 ### classifier.splitWords(input) ⇒ `Array`
+
 Split a string into an array of lowercase words, with all non-letter characters removed
 
-| Param | Type |
-| --- | --- |
-| input | `string` | 
+| Param | Type     |
+| ----- | -------- |
+| input | `string` |
 
 <a name="Classifier+tokenize"></a>
 
 ### classifier.tokenize(input) ⇒ `Object`
+
 Create an object literal of unique tokens (n-grams) as keys, and their
 respective occurrences as values based on an input string, or array of words
 
-| Param | Type |
-| --- | --- |
-| input | `string` \| `Array` | 
+| Param | Type                |
+| ----- | ------------------- |
+| input | `string` \| `Array` |
 
 <a name="Classifier+vectorize"></a>
 
 ### classifier.vectorize(tokens) ⇒ `Object`
+
 Convert a tokenized object into a new object with all keys (terms)
-translated to their index in the vocabulary (adding all terms to
-the vocabulary that do not already exist)
+translated to their index in the returned vocabulary (which is also
+returned along with the object, with any new terms added to the end)
 
-| Param | Type |
-| --- | --- |
-| tokens | `Object` | 
+| Param  | Type     |
+| ------ | -------- |
+| tokens | `Object` |
 
 <a name="Classifier+cosineSimilarity"></a>
 
 ### classifier.cosineSimilarity(v1, v2) ⇒ `float`
+
 Return the cosine similarity between two vectors
 
-| Param | Type |
-| --- | --- |
-| v1 | `Object` | 
-| v2 | `Object` | 
+| Param | Type     |
+| ----- | -------- |
+| v1    | `Object` |
+| v2    | `Object` |
diff --git a/docs/model.md b/docs/model.md
index 2c515f7..4c3e2fb 100644
--- a/docs/model.md
+++ b/docs/model.md
@@ -2,49 +2,54 @@
 
 ## Model
 
-* [Model](#Model)
-    * [new Model([config])](#new_Model_new)
-    * [.nGramMin](#Model+nGramMin) : `int`
-    * [.nGramMax](#Model+nGramMax) : `int`
-    * [.vocabulary](#Model+vocabulary) : `Vocabulary` \| `false`
-    * [.data](#Model+data) : `Object`
-    * [.serialize()](#Model+serialize) ⇒ `Object`
+-   [Model](#Model)
+    -   [new Model([config])](#new_Model)
+    -   [.nGramMin](#Model+nGramMin) : `int`
+    -   [.nGramMax](#Model+nGramMax) : `int`
+    -   [.vocabulary](#Model+vocabulary) : `Vocabulary` \| `false`
+    -   [.data](#Model+data) : `Object`
+    -   [.serialize()](#Model+serialize) ⇒ `Object`
 
-<a name="new_Model_new"></a>
+<a name="new_Model"></a>
 
 ### new Model([config])
 
-| Param | Type | Default | Description |
-| --- | --- | --- | --- |
-| [config] | `Object` |  |  |
-| [config.nGramMin] | `int` | `1` | Minimum n-gram size |
-| [config.nGramMax] | `int` | `1` | Maximum n-gram size |
-| [config.vocabulary] | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries |
-| [config.data] | `Object` | `{}` | Key-value store containing all training data |
+| Param               | Type                        | Default | Description                                                                                                 |
+| ------------------- | --------------------------- | ------- | ----------------------------------------------------------------------------------------------------------- |
+| [config]            | `Object`                    |         |                                                                                                             |
+| [config.nGramMin]   | `int`                       | `1`     | Minimum n-gram size                                                                                         |
+| [config.nGramMax]   | `int`                       | `1`     | Maximum n-gram size                                                                                         |
+| [config.vocabulary] | `Array` \| `Set` \| `false` | `[]`    | Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries |
+| [config.data]       | `Object`                    | `{}`    | Key-value store containing all training data                                                                |
 
 <a name="Model+nGramMin"></a>
 
 ### model.nGramMin : `int`
+
 Minimum n-gram size
 
 <a name="Model+nGramMax"></a>
 
 ### model.nGramMax : `int`
+
 Maximum n-gram size
 
 <a name="Model+vocabulary"></a>
 
 ### model.vocabulary : `Vocabulary` \| `false`
+
 Vocabulary instance
 
 <a name="Model+data"></a>
 
 ### model.data : `Object`
+
 Model data
 
 <a name="Model+serialize"></a>
 
 ### model.serialize() ⇒ `Object`
+
 Return the model in its current state an an object literal, including the
 configured n-gram min/max values, the vocabulary as an array (if any,
 otherwise false), and an object literal with all the training data
diff --git a/docs/prediction.md b/docs/prediction.md
index 10b80f8..7776fe8 100644
--- a/docs/prediction.md
+++ b/docs/prediction.md
@@ -2,16 +2,18 @@
 
 ## Prediction
 
-* [Prediction](#Prediction)
-    * [.label](#Prediction+label) : `string`
-    * [.confidence](#Prediction+confidence) : `number`
+-   [Prediction](#Prediction)
+    -   [.label](#Prediction+label) : `string`
+    -   [.confidence](#Prediction+confidence) : `number`
 
 <a name="Prediction+label"></a>
 
 ### prediction.label : `string`
+
 Label of the prediction
 
 <a name="Prediction+confidence"></a>
 
 ### prediction.confidence : `number`
+
 Confidence of the prediction
diff --git a/docs/vocabulary.md b/docs/vocabulary.md
index c607ca9..dfa317f 100644
--- a/docs/vocabulary.md
+++ b/docs/vocabulary.md
@@ -2,66 +2,71 @@
 
 ## Vocabulary
 
-* [Vocabulary](#Vocabulary)
-    * [new Vocabulary(terms)](#new_Vocabulary_new)
-    * [.size](#Vocabulary+size) : `number`
-    * [.terms](#Vocabulary+terms) : `Array` \| `Set`
-    * [.add(terms)](#Vocabulary+add) ⇒ `this`
-    * [.remove(terms)](#Vocabulary+remove) ⇒ `this`
-    * [.has(term)](#Vocabulary+has) ⇒ `bool`
-    * [.indexOf(term)](#Vocabulary+indexOf) ⇒ `number`
+-   [Vocabulary](#Vocabulary)
+    -   [new Vocabulary(terms)](#new_Vocabulary)
+    -   [.size](#Vocabulary+size) : `number`
+    -   [.terms](#Vocabulary+terms) : `Array` \| `Set`
+    -   [.add(terms)](#Vocabulary+add) ⇒ `this`
+    -   [.remove(terms)](#Vocabulary+remove) ⇒ `this`
+    -   [.has(term)](#Vocabulary+has) ⇒ `bool`
+    -   [.indexOf(term)](#Vocabulary+indexOf) ⇒ `number`
 
-<a name="new_Vocabulary_new"></a>
+<a name="new_Vocabulary"></a>
 
 ### new Vocabulary(terms)
 
-| Param | Type |
-| --- | --- |
-| terms | `Array` \| `Set` | 
+| Param | Type             |
+| ----- | ---------------- |
+| terms | `Array` \| `Set` |
 
 <a name="Vocabulary+size"></a>
 
 ### vocabulary.size : `number`
+
 Vocabulary size
 
 <a name="Vocabulary+terms"></a>
 
 ### vocabulary.terms : `Array` \| `Set`
+
 Vocabulary terms
 
 <a name="Vocabulary+add"></a>
 
 ### vocabulary.add(terms) ⇒ `this`
+
 Add one or more terms to the vocabulary
 
-| Param | Type |
-| --- | --- |
-| terms | `string` \| `Array` \| `Set` | 
+| Param | Type                         |
+| ----- | ---------------------------- |
+| terms | `string` \| `Array` \| `Set` |
 
 <a name="Vocabulary+remove"></a>
 
 ### vocabulary.remove(terms) ⇒ `this`
+
 Remove one or more terms from the vocabulary
 
-| Param | Type |
-| --- | --- |
-| terms | `string` \| `Array` \| `Set` | 
+| Param | Type                         |
+| ----- | ---------------------------- |
+| terms | `string` \| `Array` \| `Set` |
 
 <a name="Vocabulary+has"></a>
 
 ### vocabulary.has(term) ⇒ `bool`
+
 Return whether the vocabulary contains a certain term
 
-| Param | Type |
-| --- | --- |
-| term | `string` | 
+| Param | Type     |
+| ----- | -------- |
+| term  | `string` |
 
 <a name="Vocabulary+indexOf"></a>
 
 ### vocabulary.indexOf(term) ⇒ `number`
-Return the index of a term in the vocabulary (returns -1 if not found)
 
-| Param | Type |
-| --- | --- |
-| term | `string` | 
+Return the index of a term in the vocabulary (returns -1 if not found)
 
+| Param | Type     |
+| ----- | -------- |
+| term  | `string` |

From cf42a496cad7330da04dec7433d3173232e95308 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:34:49 +0100
Subject: [PATCH 14/19] Fix casing in docs filenames

---
 docs/{classifier.md => Classifier.md} | 0
 docs/{model.md => Model.md}           | 0
 docs/{prediction.md => Prediction.md} | 0
 docs/{vocabulary.md => Vocabulary.md} | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename docs/{classifier.md => Classifier.md} (100%)
 rename docs/{model.md => Model.md} (100%)
 rename docs/{prediction.md => Prediction.md} (100%)
 rename docs/{vocabulary.md => Vocabulary.md} (100%)

diff --git a/docs/classifier.md b/docs/Classifier.md
similarity index 100%
rename from docs/classifier.md
rename to docs/Classifier.md
diff --git a/docs/model.md b/docs/Model.md
similarity index 100%
rename from docs/model.md
rename to docs/Model.md
diff --git a/docs/prediction.md b/docs/Prediction.md
similarity index 100%
rename from docs/prediction.md
rename to docs/Prediction.md
diff --git a/docs/vocabulary.md b/docs/Vocabulary.md
similarity index 100%
rename from docs/vocabulary.md
rename to docs/Vocabulary.md

From 387d718fe3f6f7e5d922cb8f64adfd74ae8fc383 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 22:36:06 +0100
Subject: [PATCH 15/19] Update README

---
 README.md | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index fec886a..bd02438 100644
--- a/README.md
+++ b/README.md
@@ -45,16 +45,16 @@ const classifier = new Classifier()
 ### Training a model
 
 ```javascript
-let positive = [
-    'This is great, so cool!',
-    'Wow, I love it!',
-    'It really is amazing',
+const positive = [
+	'This is great, so cool!',
+	'Wow, I love it!',
+	'It really is amazing'
 ]
 
-let negative = [
-    'This is really bad',
-    'I hate it with a passion',
-    'Just terrible!',
+const negative = [
+	'This is really bad',
+	'I hate it with a passion',
+	'Just terrible!'
 ]
 
 classifier.train(positive, 'positive')
@@ -64,10 +64,10 @@ classifier.train(negative, 'negative')
 ### Getting a prediction
 
 ```javascript
-let predictions = classifier.predict('It sure is pretty great!')
+const predictions = classifier.predict('It sure is pretty great!')
 
 if (predictions.length) {
-	predictions.forEach(prediction => {
+	predictions.forEach((prediction) => {
 		console.log(`${prediction.label} (${prediction.confidence})`)
 	})
 } else {
@@ -89,12 +89,12 @@ The following configuration options can be passed both directly to a new [Model]
 
 #### Options
 
-| Property | Type | Default | Description |
-| --- | --- | --- | --- |
-| **nGramMin** | `int` | `1` | Minimum n-gram size |
-| **nGramMax** | `int` | `1` | Maximum n-gram size |
-| **vocabulary** | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data, set to `false` to store terms directly in the data entries |
-| **data** | `Object` | `{}` | Key-value store of labels and training data vectors |
+| Property       | Type                        | Default | Description                                                                                           |
+| -------------- | --------------------------- | ------- | ----------------------------------------------------------------------------------------------------- |
+| **nGramMin**   | `int`                       | `1`     | Minimum n-gram size                                                                                   |
+| **nGramMax**   | `int`                       | `1`     | Maximum n-gram size                                                                                   |
+| **vocabulary** | `Array` \| `Set` \| `false` | `[]`    | Terms mapped to indexes in the model data, set to `false` to store terms directly in the data entries |
+| **data**       | `Object`                    | `{}`    | Key-value store of labels and training data vectors                                                   |
 
 ### Using n-grams
 
@@ -112,7 +112,7 @@ const classifier = new Classifier({
 	nGramMax: 2
 })
 
-let tokens = classifier.tokenize('I really dont like it')
+const tokens = classifier.tokenize('I really dont like it')
 
 console.log(tokens)
 ```
@@ -135,7 +135,7 @@ After training a model with large sets of data, you'll want to store all this da
 To do this, simply use the `serialize` method on your [Model](docs/model.md), and either save the data structure to a file, send it to a server, or store it in any other way you want.
 
 ```javascript
-let model = classifier.model
+const model = classifier.model
 
 console.log(model.serialize())
 ```
@@ -173,10 +173,10 @@ Returning:
 
 ## Documentation
 
-* [Classifier](docs/classifier.md)
-* [Model](docs/model.md)
-* [Vocabulary](docs/vocabulary.md)
-* [Prediction](docs/prediction.md)
+-   [Classifier](docs/Classifier.md)
+-   [Model](docs/Model.md)
+-   [Vocabulary](docs/Vocabulary.md)
+-   [Prediction](docs/Prediction.md)
 
 ## Contributing
 

From 9f9d34986a0527bc23ba059ba082316f34bfa641 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 23:15:12 +0100
Subject: [PATCH 16/19] Updated package version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index dff5988..429a9d9 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "ml-classify-text",
-	"version": "2.0.0",
+	"version": "2.0.1",
 	"description": "Text classification using n-grams and cosine similarity",
 	"module": "./lib",
 	"main": "./lib",

From 5d6cd6cf50d3d862475f8c6f42e16a5ace5c219e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 23:15:57 +0100
Subject: [PATCH 17/19] Fix casing in docs index

---
 docs/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index 5ec5aab..15dc4d9 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -2,7 +2,7 @@
 
 Full documentation of all the available classes, properties and methods.
 
-* [Classifier](classifier.md)
-* [Model](model.md)
-* [Vocabulary](vocabulary.md)
-* [Prediction](prediction.md)
+-   [Classifier](Classifier.md)
+-   [Model](Model.md)
+-   [Vocabulary](Vocabulary.md)
+-   [Prediction](Prediction.md)

From dfd60beb7e05f0f39a3f917aef225a13ff5ca5b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 23:16:30 +0100
Subject: [PATCH 18/19] Update CONTRIBUTING.md

---
 CONTRIBUTING.md | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d6db632..6264c53 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,19 +2,19 @@
 
 This document contains basic guidelines to make contributing to this project as easy and transparent as possible, whether it's:
 
-- Reporting a bug
-- Discussing the current state of the code
-- Submitting a fix
-- Proposing new features
-- Becoming a maintainer
+-   Reporting a bug
+-   Discussing the current state of the code
+-   Submitting a fix
+-   Proposing new features
+-   Becoming a maintainer
 
 ## Pull requests are actively welcomed
 
 1. Fork the repo and create your branch from `master`.
 2. If you've added code that should be tested, add tests.
 3. If you've changed APIs, update the documentation.
-5. Make sure your code lints.
-6. Issue your pull request.
+4. Make sure your code lints.
+5. Issue your pull request.
 
 ## Any contributions you make will be under the MIT Software License
 
@@ -28,13 +28,13 @@ All bugs are tracked using GitHub issues to track public bugs. Report a bug by [
 
 **Great bug reports** tend to have:
 
-- A quick summary and/or background
-- Steps to reproduce
-  - Be specific!
-  - Give sample code if you can.
-  - What you expected would happen
-  - What actually happens
-- Notes (possibly including why you think this might be happening, or stuff you tried that didn't work)
+-   A quick summary and/or background
+-   Steps to reproduce
+    -   Be specific!
+    -   Give sample code if you can.
+    -   What you expected would happen
+    -   What actually happens
+-   Notes (possibly including why you think this might be happening, or stuff you tried that didn't work)
 
 ## License
 

From ce7f82c4d047b7dd786c5541a614e55a9cc5b010 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=CC=81=20Ekeberg?= <hello@andreekeberg.se>
Date: Sun, 5 Feb 2023 23:17:05 +0100
Subject: [PATCH 19/19] Update CHANGELOG.md

---
 CHANGELOG.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e6f3930..52aaa75 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,15 +2,24 @@
 
 All notable changes to this project will be documented in this file.
 
+## [2.0.1] - 2023-02-05
+
+### Changed
+
+-   Fixed all instances of improper object literal type checks
+-   Fixed bug where terms were added to the model vocabulary when making predictions
+-   Migrated tests from Mocha/Chai to Jest
+
 ## [2.0.0] - 2020-08-28
 
 ### Breaking changes
 
-* Removed `minimumConfidence` from `Model`
+-   Removed `minimumConfidence` from `Model`
 
 ## [1.0.0] - 2020-08-26
 
 Initial release
 
+[2.0.1]: https://github.com/andreekeberg/ml-classify-text-js/releases/tag/2.0.1
 [2.0.0]: https://github.com/andreekeberg/ml-classify-text-js/releases/tag/2.0.0
 [1.0.0]: https://github.com/andreekeberg/ml-classify-text-js/releases/tag/1.0.0