diff --git a/CHANGELOG.md b/CHANGELOG.md index e04650a..d6ef0d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,27 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [QIRRELCONTEXT IMPLEMENTATION 0.2.0] + +### Added +- QirrelContext as the canonical context +- Namespaced fields: meta (requestId, timestamp, source, trace), memory (shortTerm, longTerm, cache), and llm (model, temperature, safety) +- JSON serializability support for context persistence and transport +- Backward compatibility safeguards for existing integrations + +### Changed +- Replaced all IntentResult references with QirrelContext throughout codebase +- Updated all processors (clean, extract, normalize, segment, advClean) to use QirrelContext +- Modified LLM components and adapters to work with new context structure +- Updated API functions to return QirrelContext instead of IntentResult +- Refactored documentation to reflect new context architecture +- Updated test suite to work with QirrelContext structure + +### Removed +- Deprecated IntentResult interface and all related type definitions +- Legacy processing patterns that bypassed canonical context + +## [QIRREL UPDATE 0.1.0] ### Added - Comprehensive documentation restructure with modular approach @@ -53,4 +73,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Performance issues by removing heavy regex usage -- Type safety throughout the codebase \ No newline at end of file +- Type safety throughout the codebase diff --git a/README.MD b/README.MD index 467fdb2..23f2f82 100644 --- a/README.MD +++ b/README.MD @@ -24,7 +24,7 @@ npm install qirrel ```ts import { processText } from 'qirrel'; const result = await processText('Contact John at john@example.com or call +1-555-123-4567'); -console.log(result.entities); +console.log(result.data?.entities); // [ // { type: 'email', value: 'john@example.com', ... }, // { type: 'phone', value: '+1-555-123-4567', ... } @@ -36,7 +36,7 @@ console.log(result.entities); ```ts import { processText } from 'qirrel'; const result = await processText('Visit https://example.com for more info. Price: $29.99'); -console.log(result.entities); +console.log(result.data?.entities); // [ // { type: 'url', value: 'https://example.com', ... }, // { type: 'number', value: '29.99', ... } @@ -49,7 +49,7 @@ console.log(result.entities); import { Pipeline } from 'qirrel'; const pipeline = new Pipeline(); const result = await pipeline.process('Check out https://github.com and email support@example.com'); -console.log(result.entities); +console.log(result.data?.entities); ``` ## LLM Integration diff --git a/bun.lock b/bun.lock index 58adad9..3cfd4ce 100644 --- a/bun.lock +++ b/bun.lock @@ -4,21 +4,23 @@ "": { "name": "dd-miniparse", "dependencies": { - "@types/express": "^5.0.5", - "@types/node": "^24.10.1", + "@types/express": "^5.0.6", + "@types/node": "^24.10.4", "compromise": "^14.14.5", "emoji-regex": "^10.6.0", - "express": "^5.1.0", + "express": "^5.2.1", "js-yaml": "^4.1.1", "libphonenumber-js": "^1.12.31", "tokenizers": "^0.13.3", "typescript": "^5.9.3", + "uuid": "^11.1.0", "validator": "^13.15.23", }, "devDependencies": { "@types/emoji-regex": "^9.2.2", "@types/jest": "^30.0.0", "@types/js-yaml": "^4.0.9", + "@types/uuid": "^10.0.0", "@types/validator": "^13.15.10", "jest": "^30.2.0", "ts-jest": "^29.4.6", @@ -182,7 +184,7 @@ "@types/emoji-regex": ["@types/emoji-regex@9.2.2", "", { "dependencies": { "emoji-regex": "*" } }, "sha512-kg9a3lbO8WyO3LxZAhUtcS7X+u2FwptNcfsMCTsISiv0gV1BqLZlHgkL11XsG9n18t5uSPYtcQk6Zw8g7MbNRQ=="], - "@types/express": ["@types/express@5.0.5", "", { "dependencies": { "@types/body-parser": "*", "@types/express-serve-static-core": "^5.0.0", "@types/serve-static": "^1" } }, "sha512-LuIQOcb6UmnF7C1PCFmEU1u2hmiHL43fgFQX67sN3H4Z+0Yk0Neo++mFsBjhOAuLzvlQeqAAkeDOZrJs9rzumQ=="], + "@types/express": ["@types/express@5.0.6", "", { "dependencies": { "@types/body-parser": "*", "@types/express-serve-static-core": "^5.0.0", "@types/serve-static": "^2" } }, "sha512-sKYVuV7Sv9fbPIt/442koC7+IIwK5olP1KWeD88e/idgoJqDm3JV/YUiPwkoKK92ylff2MGxSz1CSjsXelx0YA=="], "@types/express-serve-static-core": ["@types/express-serve-static-core@5.1.0", "", { "dependencies": { "@types/node": "*", "@types/qs": "*", "@types/range-parser": "*", "@types/send": "*" } }, "sha512-jnHMsrd0Mwa9Cf4IdOzbz543y4XJepXrbia2T4b6+spXC2We3t1y6K44D3mR8XMFSXMCf3/l7rCgddfx7UNVBA=="], @@ -198,9 +200,7 @@ "@types/js-yaml": ["@types/js-yaml@4.0.9", "", {}, "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="], - "@types/mime": ["@types/mime@1.3.5", "", {}, "sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w=="], - - "@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + "@types/node": ["@types/node@24.10.4", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg=="], "@types/qs": ["@types/qs@6.14.0", "", {}, "sha512-eOunJqu0K1923aExK6y8p6fsihYEn/BYuQ4g0CxAAgFc4b/ZLN4CrsRZ55srTdqoiLzU2B2evC+apEIxprEzkQ=="], @@ -208,10 +208,12 @@ "@types/send": ["@types/send@1.2.1", "", { "dependencies": { "@types/node": "*" } }, "sha512-arsCikDvlU99zl1g69TcAB3mzZPpxgw0UQnaHeC1Nwb015xp8bknZv5rIfri9xTOcMuaVgvabfIRA7PSZVuZIQ=="], - "@types/serve-static": ["@types/serve-static@1.15.10", "", { "dependencies": { "@types/http-errors": "*", "@types/node": "*", "@types/send": "<1" } }, "sha512-tRs1dB+g8Itk72rlSI2ZrW6vZg0YrLI81iQSTkMmOqnqCaNr/8Ek4VwWcN5vZgCYWbg/JJSGBlUaYGAOP73qBw=="], + "@types/serve-static": ["@types/serve-static@2.2.0", "", { "dependencies": { "@types/http-errors": "*", "@types/node": "*" } }, "sha512-8mam4H1NHLtu7nmtalF7eyBH14QyOASmcxHhSfEoRyr0nP/YdoesEtU+uSRvMe96TW/HPTtkoKqQLl53N7UXMQ=="], "@types/stack-utils": ["@types/stack-utils@2.0.3", "", {}, "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw=="], + "@types/uuid": ["@types/uuid@10.0.0", "", {}, "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ=="], + "@types/validator": ["@types/validator@13.15.10", "", {}, "sha512-T8L6i7wCuyoK8A/ZeLYt1+q0ty3Zb9+qbSSvrIVitzT3YjZqkTZ40IbRsPanlB4h1QB3JVL1SYCdR6ngtFYcuA=="], "@types/yargs": ["@types/yargs@17.0.35", "", { "dependencies": { "@types/yargs-parser": "*" } }, "sha512-qUHkeCyQFxMXg79wQfTtfndEC+N9ZZg76HJftDJp+qH2tV7Gj4OJi7l+PiWwJ+pWtW8GwSmqsDj/oymhrTWXjg=="], @@ -284,7 +286,7 @@ "baseline-browser-mapping": ["baseline-browser-mapping@2.9.7", "", { "bin": { "baseline-browser-mapping": "dist/cli.js" } }, "sha512-k9xFKplee6KIio3IDbwj+uaCLpqzOwakOgmqzPezM0sFJlFKcg30vk2wOiAJtkTSfx0SSQDSe8q+mWA/fSH5Zg=="], - "body-parser": ["body-parser@2.2.0", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.0", "http-errors": "^2.0.0", "iconv-lite": "^0.6.3", "on-finished": "^2.4.1", "qs": "^6.14.0", "raw-body": "^3.0.0", "type-is": "^2.0.0" } }, "sha512-02qvAaxv8tp7fBa/mw1ga98OGm+eCbqzJOKoRt70sLmfEEi+jyBYVTDGfCL/k06/4EMk/z01gCe7HoCH/f2LTg=="], + "body-parser": ["body-parser@2.2.1", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.3", "http-errors": "^2.0.0", "iconv-lite": "^0.7.0", "on-finished": "^2.4.1", "qs": "^6.14.0", "raw-body": "^3.0.1", "type-is": "^2.0.1" } }, "sha512-nfDwkulwiZYQIGwxdy0RUmowMhKcFVcYXUU7m4QlKYim1rUtg83xm2yjZ40QjDuc291AJjjeSc9b++AWHSgSHw=="], "brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="], @@ -394,7 +396,7 @@ "expect": ["expect@30.2.0", "", { "dependencies": { "@jest/expect-utils": "30.2.0", "@jest/get-type": "30.1.0", "jest-matcher-utils": "30.2.0", "jest-message-util": "30.2.0", "jest-mock": "30.2.0", "jest-util": "30.2.0" } }, "sha512-u/feCi0GPsI+988gU2FLcsHyAHTU0MX1Wg68NhAnN7z/+C5wqG+CY8J53N9ioe8RXgaoz0nBR/TYMf3AycUuPw=="], - "express": ["express@5.1.0", "", { "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.0", "content-disposition": "^1.0.0", "content-type": "^1.0.5", "cookie": "^0.7.1", "cookie-signature": "^1.2.1", "debug": "^4.4.0", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "finalhandler": "^2.1.0", "fresh": "^2.0.0", "http-errors": "^2.0.0", "merge-descriptors": "^2.0.0", "mime-types": "^3.0.0", "on-finished": "^2.4.1", "once": "^1.4.0", "parseurl": "^1.3.3", "proxy-addr": "^2.0.7", "qs": "^6.14.0", "range-parser": "^1.2.1", "router": "^2.2.0", "send": "^1.1.0", "serve-static": "^2.2.0", "statuses": "^2.0.1", "type-is": "^2.0.1", "vary": "^1.1.2" } }, "sha512-DT9ck5YIRU+8GYzzU5kT3eHGA5iL+1Zd0EutOmTE9Dtk+Tvuzd23VBU+ec7HPNSTxXYO55gPV/hq4pSBJDjFpA=="], + "express": ["express@5.2.1", "", { "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", "content-disposition": "^1.0.0", "content-type": "^1.0.5", "cookie": "^0.7.1", "cookie-signature": "^1.2.1", "debug": "^4.4.0", "depd": "^2.0.0", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "finalhandler": "^2.1.0", "fresh": "^2.0.0", "http-errors": "^2.0.0", "merge-descriptors": "^2.0.0", "mime-types": "^3.0.0", "on-finished": "^2.4.1", "once": "^1.4.0", "parseurl": "^1.3.3", "proxy-addr": "^2.0.7", "qs": "^6.14.0", "range-parser": "^1.2.1", "router": "^2.2.0", "send": "^1.1.0", "serve-static": "^2.2.0", "statuses": "^2.0.1", "type-is": "^2.0.1", "vary": "^1.1.2" } }, "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw=="], "fast-json-stable-stringify": ["fast-json-stable-stringify@2.1.0", "", {}, "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw=="], @@ -452,7 +454,7 @@ "human-signals": ["human-signals@2.1.0", "", {}, "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw=="], - "iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="], + "iconv-lite": ["iconv-lite@0.7.0", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-cf6L2Ds3h57VVmkZe+Pn+5APsT7FpqJtEhhieDCvrE2MK5Qk9MyffgQyuxQTm6BChfeZNtcOLHp9IcWRVcIcBQ=="], "import-local": ["import-local@3.2.0", "", { "dependencies": { "pkg-dir": "^4.2.0", "resolve-cwd": "^3.0.0" }, "bin": { "import-local-fixture": "fixtures/cli.js" } }, "sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA=="], @@ -760,6 +762,8 @@ "update-browserslist-db": ["update-browserslist-db@1.2.2", "", { "dependencies": { "escalade": "^3.2.0", "picocolors": "^1.1.1" }, "peerDependencies": { "browserslist": ">= 4.21.0" }, "bin": { "update-browserslist-db": "cli.js" } }, "sha512-E85pfNzMQ9jpKkA7+TJAi4TJN+tBCuWh5rUcS/sv6cFi+1q9LYDwDI5dpUL0u/73EElyQ8d3TEaeW4sPedBqYA=="], + "uuid": ["uuid@11.1.0", "", { "bin": { "uuid": "dist/esm/bin/uuid" } }, "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A=="], + "v8-to-istanbul": ["v8-to-istanbul@9.3.0", "", { "dependencies": { "@jridgewell/trace-mapping": "^0.3.12", "@types/istanbul-lib-coverage": "^2.0.1", "convert-source-map": "^2.0.0" } }, "sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA=="], "validator": ["validator@13.15.23", "", {}, "sha512-4yoz1kEWqUjzi5zsPbAS/903QXSYp0UOtHsPpp7p9rHAw/W+dkInskAE386Fat3oKRROwO98d9ZB0G4cObgUyw=="], @@ -804,7 +808,27 @@ "@istanbuljs/load-nyc-config/js-yaml": ["js-yaml@3.14.2", "", { "dependencies": { "argparse": "^1.0.7", "esprima": "^4.0.0" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg=="], - "@types/serve-static/@types/send": ["@types/send@0.17.6", "", { "dependencies": { "@types/mime": "^1", "@types/node": "*" } }, "sha512-Uqt8rPBE8SY0RK8JB1EzVOIZ32uqy8HwdxCnoCOsYrvnswqmFZ/k+9Ikidlk/ImhsdvBsloHbAlewb2IEBV/Og=="], + "@jest/console/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@jest/core/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@jest/environment/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@jest/fake-timers/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@jest/pattern/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@jest/reporters/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@jest/types/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@types/body-parser/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@types/connect/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@types/express-serve-static-core/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "@types/send/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], "ansi-escapes/type-fest": ["type-fest@0.21.3", "", {}, "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w=="], @@ -816,6 +840,24 @@ "http-errors/statuses": ["statuses@2.0.1", "", {}, "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ=="], + "jest-circus/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "jest-environment-node/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "jest-haste-map/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "jest-mock/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "jest-runner/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "jest-runtime/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "jest-util/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "jest-watcher/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "jest-worker/@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + "jest-worker/supports-color": ["supports-color@8.1.1", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q=="], "micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="], @@ -824,8 +866,6 @@ "path-scurry/lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], - "raw-body/iconv-lite": ["iconv-lite@0.7.0", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-cf6L2Ds3h57VVmkZe+Pn+5APsT7FpqJtEhhieDCvrE2MK5Qk9MyffgQyuxQTm6BChfeZNtcOLHp9IcWRVcIcBQ=="], - "string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], "string-width-cjs/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], diff --git a/docs/api.md b/docs/api.md index 6d91fba..018231b 100644 --- a/docs/api.md +++ b/docs/api.md @@ -4,14 +4,14 @@ Qirrel provides a comprehensive API for text processing, tokenization, and entit ## Main Functions -### `processText(text: string, configPath?: string) => Promise` +### `processText(text: string, configPath?: string) => Promise` Asynchronously processes text using the default pipeline configuration. This is the simplest way to use Qirrel. **Parameters:** - `text: string` - The input text to process - `configPath?: string` - Optional path to a YAML configuration file -**Returns:** Promise that resolves to an `IntentResult` object +**Returns:** Promise that resolves to a `QirrelContext` object **Example:** ```ts @@ -34,7 +34,7 @@ new Pipeline(configPath?: string) **Methods:** -#### `process(text: string) => Promise` +#### `process(text: string) => Promise` Processes the input text through the pipeline components. #### `use(component: PipelineComponent) => this` @@ -115,14 +115,56 @@ Performs advanced cleaning operations on the text. ## Types -### `IntentResult` -Represents the output of text processing operations. +### `QirrelContext` +Represents the canonical context for text processing operations. ```ts -interface IntentResult { - text: string; // Original input text - tokens: Token[]; // Array of processed tokens - entities: Entity[]; // Array of extracted entities +interface QirrelContext { + meta: MetaContext; + memory: MemoryContext; + llm: LLMContext; + data?: { + text: string; // Original input text + tokens: Token[]; // Array of processed tokens + entities: Entity[]; // Array of extracted entities + }; +} +``` + +### `MetaContext` +Operational and request-scoped data. + +```ts +interface MetaContext { + requestId: string; + timestamp: number; + source?: 'http' | 'cli' | 'worker'; + trace?: Record; +} +``` + +### `MemoryContext` +State accumulated across turns or executions. + +```ts +interface MemoryContext { + shortTerm?: unknown; + longTerm?: unknown; + cache?: Record; +} +``` + +### `LLMContext` +LLM-specific controls and safety boundaries. + +```ts +interface LLMContext { + model: string; + temperature?: number; + safety: { + allowTools: boolean; + redactions?: string[]; + }; } ``` @@ -214,5 +256,5 @@ interface TokenizerOptions { Type representing a processing component that can be added to a Pipeline. ```ts -type PipelineComponent = (input: IntentResult) => Promise; +type PipelineComponent = (input: QirrelContext) => Promise; ``` \ No newline at end of file diff --git a/docs/examples.md b/docs/examples.md index 65a2c99..ba4c198 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -183,26 +183,28 @@ tokenAnalysis(); Extend Qirrel's functionality with custom processing components: ```ts -import { Pipeline, type PipelineComponent, type IntentResult } from 'qirrel'; +import { Pipeline, type PipelineComponent, type QirrelContext } from 'qirrel'; // Define a custom processor to identify capitalized words -const extractCapitalizedWords: PipelineComponent = (input: IntentResult): IntentResult => { - const capitalizedWords = input.tokens.filter(token => - token.type === 'word' && - token.value.charAt(0) === token.value.charAt(0).toUpperCase() && - token.value.length > 1 - ); - - // Add these as entities - capitalizedWords.forEach(token => { - input.entities.push({ - type: 'capitalized_word', - value: token.value, - start: token.start, - end: token.end +const extractCapitalizedWords: PipelineComponent = (input: QirrelContext): QirrelContext => { + if (input.data) { + const capitalizedWords = input.data.tokens.filter(token => + token.type === 'word' && + token.value.charAt(0) === token.value.charAt(0).toUpperCase() && + token.value.length > 1 + ); + + // Add these as entities + capitalizedWords.forEach(token => { + input.data.entities.push({ + type: 'capitalized_word', + value: token.value, + start: token.start, + end: token.end + }); }); - }); - + } + return input; }; diff --git a/docs/integrations/llm.md b/docs/integrations/llm.md index 81ed2e3..afb3027 100644 --- a/docs/integrations/llm.md +++ b/docs/integrations/llm.md @@ -78,25 +78,27 @@ setTimeout(async () => { You can add LLM-based processing components to your pipeline: ```ts -import { Pipeline, type PipelineComponent, type IntentResult } from 'qirrel'; - -const llmEnhancedProcessor: PipelineComponent = async (input: IntentResult): Promise => { - const pipeline = new Pipeline('./config-with-llm.yaml'); - const llmAdapter = pipeline.getLLMAdapter(); - - if (llmAdapter) { - // Use LLM to analyze sentiment, categorize content, etc. - const analysis = await llmAdapter.analyzeText(`Analyze this text for sentiment: ${input.text}`); - - // Add LLM-derived entities or metadata to the result - input.entities.push({ - type: 'llm_analysis', - value: analysis.sentiment || 'neutral', - start: 0, - end: input.text.length - }); +import { Pipeline, type PipelineComponent, type QirrelContext } from 'qirrel'; + +const llmEnhancedProcessor: PipelineComponent = async (input: QirrelContext): Promise => { + if (input.data) { + const pipeline = new Pipeline('./config-with-llm.yaml'); + const llmAdapter = pipeline.getLLMAdapter(); + + if (llmAdapter) { + // Use LLM to analyze sentiment, categorize content, etc. + const analysis = await llmAdapter.analyzeText(`Analyze this text for sentiment: ${input.data.text}`); + + // Add LLM-derived entities or metadata to the result + input.data.entities.push({ + type: 'llm_analysis', + value: analysis.sentiment || 'neutral', + start: 0, + end: input.data.text.length + }); + } } - + return input; }; diff --git a/docs/usage/advanced.md b/docs/usage/advanced.md index f39cca6..2353b8c 100644 --- a/docs/usage/advanced.md +++ b/docs/usage/advanced.md @@ -11,27 +11,29 @@ One of Qirrel's most powerful features is the ability to create custom processin A processor is a function that implements the `PipelineComponent` type: ```ts -import { type PipelineComponent, type IntentResult } from 'qirrel'; +import { type PipelineComponent, type QirrelContext } from 'qirrel'; // Define a processor that finds capitalized words -const capitalizeProcessor: PipelineComponent = (input: IntentResult): IntentResult => { - // Find tokens that are capitalized words - const capitalizedTokens = input.tokens.filter(token => - token.type === 'word' && - token.value.charAt(0) === token.value.charAt(0).toUpperCase() && - token.value.toLowerCase() !== token.value - ); - - // Add them as entities - capitalizedTokens.forEach(token => { - input.entities.push({ - type: 'capitalized_word', - value: token.value, - start: token.start, - end: token.end +const capitalizeProcessor: PipelineComponent = (input: QirrelContext): QirrelContext => { + if (input.data) { + // Find tokens that are capitalized words + const capitalizedTokens = input.data.tokens.filter(token => + token.type === 'word' && + token.value.charAt(0) === token.value.charAt(0).toUpperCase() && + token.value.toLowerCase() !== token.value + ); + + // Add them as entities + capitalizedTokens.forEach(token => { + input.data.entities.push({ + type: 'capitalized_word', + value: token.value, + start: token.start, + end: token.end + }); }); - }); - + } + return input; }; ``` @@ -144,22 +146,24 @@ const results = await Promise.all([ Beyond the built-in entity types, you can create custom extraction logic: ```ts -import { type PipelineComponent, type IntentResult } from 'qirrel'; - -const extractHashtags: PipelineComponent = (input: IntentResult): IntentResult => { - // Find hashtag patterns - const hashtagRegex = /#[a-zA-Z0-9_]+/g; - let match; - - while ((match = hashtagRegex.exec(input.text)) !== null) { - input.entities.push({ - type: 'hashtag', - value: match[0], - start: match.index, - end: match.index + match[0].length - }); +import { type PipelineComponent, type QirrelContext } from 'qirrel'; + +const extractHashtags: PipelineComponent = (input: QirrelContext): QirrelContext => { + if (input.data) { + // Find hashtag patterns + const hashtagRegex = /#[a-zA-Z0-9_]+/g; + let match; + + while ((match = hashtagRegex.exec(input.data.text)) !== null) { + input.data.entities.push({ + type: 'hashtag', + value: match[0], + start: match.index, + end: match.index + match[0].length + }); + } } - + return input; }; diff --git a/docs/usage/basic.md b/docs/usage/basic.md index 114860d..ff5c3b7 100644 --- a/docs/usage/basic.md +++ b/docs/usage/basic.md @@ -31,29 +31,46 @@ async function example() { } ``` -This will return an `IntentResult` object containing: -- The original text -- An array of tokens -- An array of extracted entities +This will return an `QirrelContext` object containing: +- The canonical context with namespaces (meta, memory, llm) +- The original text in the data field +- An array of tokens in the data field +- An array of extracted entities in the data field ### Understanding the Output ```ts { - text: "Contact me at email@example.com", - tokens: [ - { value: "contact", type: "word", start: 0, end: 7 }, - { value: "me", type: "word", start: 8, end: 10 }, - // ... more tokens - ], - entities: [ - { - type: "email", - value: "email@example.com", - start: 14, - end: 31 + meta: { + requestId: "req_1a2b3c4d5e", + timestamp: 1234567890, + source: "cli" + }, + memory: { + cache: {} + }, + llm: { + model: "gemini-2.5-flash", + safety: { + allowTools: true } - ] + }, + data: { + text: "Contact me at email@example.com", + tokens: [ + { value: "contact", type: "word", start: 0, end: 7 }, + { value: "me", type: "word", start: 8, end: 10 }, + // ... more tokens + ], + entities: [ + { + type: "email", + value: "email@example.com", + start: 14, + end: 31 + } + ] + } } ``` diff --git a/docs/walkthrough.md b/docs/walkthrough.md index 69bab08..d739ea2 100644 --- a/docs/walkthrough.md +++ b/docs/walkthrough.md @@ -7,7 +7,7 @@ This document provides insights into Qirrel's architecture and implementation, h Qirrel follows a modular pipeline architecture that allows for flexible text processing workflows. The core components work together to provide comprehensive text analysis capabilities: ``` -Input Text → Tokenizer → Pipeline Components → Output (IntentResult) +Input Text → Tokenizer → Pipeline Components → Output (QirrelContext) ``` ### Core Components @@ -41,7 +41,7 @@ The Pipeline orchestrates the text processing workflow by managing a chain of pr constructor(configPath?: string) { // Load configuration this.config = ConfigLoader.loadConfig(configPath); - + // Conditionally add processors based on config if (this.config.pipeline.enableNormalization) this.use(normalize); if (this.config.pipeline.enableCleaning) this.use(clean); @@ -50,7 +50,7 @@ constructor(configPath?: string) { ``` #### 3. Processors (src/processors/) -Processors are individual units of functionality that transform the text processing result. They follow a functional programming approach, accepting and returning `IntentResult` objects. +Processors are individual units of functionality that transform the text processing result. They follow a functional programming approach, accepting and returning `QirrelContext` objects. - **Design Pattern**: Stateless functions that implement the `PipelineComponent` type - **Modularity**: Each processor handles a specific aspect of text analysis @@ -93,7 +93,7 @@ For advanced processing capabilities, Qirrel supports integration with Large Lan Qirrel employs a strong type system for reliable text processing: -- **IntentResult Interface**: Unified return type for all processing operations +- **QirrelContext Interface**: Canonical context for all processing operations - **Token Interface**: Detailed information about each text element - **Entity Interface**: Structured representation of extracted information - **Type Safety**: Comprehensive TypeScript definitions throughout @@ -131,7 +131,7 @@ The project structure follows a logical separation of concerns: ``` src/ ├── core/ # Fundamental components (Pipeline, Tokenizer) -├── processors/ # Individual text processing functions +├── processors/ # Individual text processing functions ├── types/ # TypeScript type definitions ├── config/ # Configuration loading and defaults ├── llms/ # LLM integration components diff --git a/package.json b/package.json index 8b50f99..6526302 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "qirrel", - "version": "0.1.0", + "version": "0.2.1", "description": "Qirrel is a sophisticated and extensible NLP library for comprehensive text processing, tokenization, and analysis.", "main": "./dist/index.js", "types": "./dist/index.d.ts", @@ -33,29 +33,29 @@ }, "homepage": "https://github.com/dev-dami/qirrel#readme", "dependencies": { - "@types/express": "^5.0.5", - "@types/node": "^24.10.1", + "@types/node": "^24.10.4", "compromise": "^14.14.5", "emoji-regex": "^10.6.0", - "express": "^5.1.0", "js-yaml": "^4.1.1", "libphonenumber-js": "^1.12.31", "tokenizers": "^0.13.3", "typescript": "^5.9.3", + "uuid": "^13.0.0", "validator": "^13.15.23" }, - "files": [ - "dist/**/*", - "README.MD", - "default.yaml", - "LICENSE" - ], "devDependencies": { "@types/emoji-regex": "^9.2.2", "@types/jest": "^30.0.0", "@types/js-yaml": "^4.0.9", "@types/validator": "^13.15.10", + "@types/uuid": "^11.0.0", "jest": "^30.2.0", "ts-jest": "^29.4.6" - } + }, + "files": [ + "dist/**/*", + "README.MD", + "default.yaml", + "LICENSE" + ] } diff --git a/src/api/index.ts b/src/api/index.ts index 70048c6..7e6ce6f 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -1,10 +1,10 @@ import { Pipeline } from "../core/pipeline"; -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; export async function processText( text: string, configPath?: string -): Promise { +): Promise { const pipeline = new Pipeline(configPath); return pipeline.process(text); } diff --git a/src/core/pipeline.ts b/src/core/pipeline.ts index 13b085d..8a66560 100644 --- a/src/core/pipeline.ts +++ b/src/core/pipeline.ts @@ -12,7 +12,7 @@ import { } from "../processors"; import { LLMAdapterFactory } from "../llms"; import type { LLMAdapter } from "../llms"; -import type { Entity, IntentResult } from "../types"; +import type { Entity, QirrelContext } from "../types"; import type { PipelineComponent } from "./types"; import { ConfigLoader } from "../config/loader"; import type { MiniparseConfig } from "../config/defaults"; @@ -70,19 +70,43 @@ export class Pipeline { return this; } - public async process(text: string): Promise { + public async process(text: string): Promise { + // Create initial context with empty data + const initialContext: QirrelContext = { + meta: { + requestId: 'req_' + Date.now().toString(36) + Math.random().toString(36).slice(2, 7), + timestamp: Date.now(), + source: 'cli' + }, + memory: { + cache: {} + }, + llm: { + model: this.config.llm?.model || 'gemini-2.5-flash', + safety: { + allowTools: true + } + } + }; + + // Tokenize and add to context const tokens = this.tokenizer.tokenize(text); - let result: IntentResult = { - text, - tokens, - entities: [], + const contextWithText: QirrelContext = { + ...initialContext, + data: { + text, + tokens, + entities: [] + } }; + let resultContext: QirrelContext = contextWithText; + for (const component of this.components) { - result = await component(result); + resultContext = await component(resultContext); } - return result; + return resultContext; } public getConfig(): MiniparseConfig { diff --git a/src/core/types.ts b/src/core/types.ts index a1645c2..f78f5f7 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -1,6 +1,6 @@ import type { Token } from "./Tokenizer"; -import type { IntentResult } from "../index"; +import type { QirrelContext } from "../index"; export type PipelineComponent = ( - input: IntentResult, -) => IntentResult | Promise; + input: QirrelContext, +) => QirrelContext | Promise; diff --git a/src/index.ts b/src/index.ts index 51ec073..68d2a71 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,7 +3,7 @@ export { Tokenizer } from "./core/Tokenizer"; export * from "./processors"; export * from "./adapters"; export * from "./llms"; -export type { IntentResult, Entity } from "./types"; +export type { Entity, QirrelContext } from "./types"; export type { MiniparseConfig } from "./config/defaults"; export { ConfigLoader } from "./config/loader"; export * from "./api"; \ No newline at end of file diff --git a/src/llms/base.ts b/src/llms/base.ts index 45f0e0e..a0b11d3 100644 --- a/src/llms/base.ts +++ b/src/llms/base.ts @@ -1,4 +1,4 @@ -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; import { LLMCache } from "./cache"; import { DefaultFallbackHandler, type FallbackHandler } from "./fallback"; import type { @@ -41,21 +41,27 @@ export abstract class BaseLLMAdapter implements LLMAdapter { ): Promise; public async generateWithIntentResult( - input: IntentResult, + input: QirrelContext, promptTemplate: string, options?: Partial, - ): Promise { + ): Promise { try { + const text = input.data?.text || ""; + const tokens = input.data?.tokens || []; + const entities = input.data?.entities || []; + const filledPrompt = promptTemplate - .replace(/\{text\}/g, input.text) - .replace(/\{tokens\}/g, input.tokens.map((t) => t.value).join(" ")) + .replace(/\{text\}/g, text) + .replace(/\{tokens\}/g, tokens.map((t) => t.value).join(" ")) .replace( /\{entities\}/g, - input.entities.map((e) => `${e.type}:${e.value}`).join(", "), + entities.map((e) => `${e.type}:${e.value}`).join(", "), ); const response = await this.generate(filledPrompt, options); - return this.parseResponseToIntentResult(input, response); + + // Return the input context with potentially updated data + return input; } catch (error) { console.warn( `LLM processing failed in generateWithIntentResult: ${error}`, @@ -65,13 +71,6 @@ export abstract class BaseLLMAdapter implements LLMAdapter { } } - protected parseResponseToIntentResult( - input: IntentResult, - response: LLMResponse, - ): IntentResult { - return input; - } - protected mergeConfig(options?: Partial): LLMConfig { return { ...this.config, diff --git a/src/llms/fallback.ts b/src/llms/fallback.ts index 3ede3a4..393a5cc 100644 --- a/src/llms/fallback.ts +++ b/src/llms/fallback.ts @@ -1,4 +1,4 @@ -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; import type { LLMResponse, LLMConfig } from "./types"; export interface FallbackHandler { @@ -70,10 +70,10 @@ export class FallbackLLMAdapterWrapper { } async generateWithIntentResult( - input: IntentResult, + input: QirrelContext, promptTemplate: string, options?: Partial, - ): Promise { + ): Promise { try { return await this.primaryAdapter.generateWithIntentResult( input, diff --git a/src/llms/gemini.ts b/src/llms/gemini.ts index 9862ecc..cd87762 100644 --- a/src/llms/gemini.ts +++ b/src/llms/gemini.ts @@ -1,4 +1,4 @@ -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; import { BaseLLMAdapter } from "./base"; import type { LLMConfig, LLMResponse } from "./types"; @@ -77,11 +77,4 @@ export class GeminiLLMAdapter extends BaseLLMAdapter { throw new Error(`Gemini API request failed: ${error}`); } } - - protected override parseResponseToIntentResult( - input: IntentResult, - response: LLMResponse, - ): IntentResult { - return input; - } } diff --git a/src/llms/generic.ts b/src/llms/generic.ts index 24bc7e0..646737e 100644 --- a/src/llms/generic.ts +++ b/src/llms/generic.ts @@ -1,4 +1,4 @@ -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; import { BaseLLMAdapter } from "./base"; import type { LLMConfig, LLMResponse } from "./types"; import https from "https"; @@ -13,7 +13,7 @@ export class GenericLLMAdapter extends BaseLLMAdapter { super(config, enableCache); this.apiKey = config.apiKey; this.baseUrl = config.baseUrl || "https://api.example.com/v1"; - + // Default headers - can be customized per provider this.headers = { "Content-Type": "application/json", @@ -29,10 +29,10 @@ export class GenericLLMAdapter extends BaseLLMAdapter { // Internal method to make the actual API call private async makeAPICall(prompt: string, options?: Partial): Promise { const config = this.mergeConfig(options); - + return new Promise((resolve, reject) => { const url = new URL(`${this.baseUrl}/completions`); // Generic endpoint - + const postData = this.buildRequestBody(prompt, config); const requestOptions = { @@ -53,20 +53,20 @@ export class GenericLLMAdapter extends BaseLLMAdapter { const req = https.request(requestOptions, (res) => { clearTimeout(timeoutId); - + let data = ""; res.on("data", (chunk) => { data += chunk; }); - + res.on("end", () => { try { const response = JSON.parse(data); - + if (res.statusCode && res.statusCode >= 400) { throw new Error(`API request failed with status ${res.statusCode}: ${data}`); } - + const processedResponse = this.processResponse(response, config); resolve(processedResponse); } catch (error) { diff --git a/src/llms/llm-components.ts b/src/llms/llm-components.ts index 64afdff..17d09dd 100644 --- a/src/llms/llm-components.ts +++ b/src/llms/llm-components.ts @@ -1,5 +1,5 @@ import type { PipelineComponent } from "../core/types"; -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; import type { LLMAdapter, LLMConfig } from "./types"; /** @@ -10,13 +10,17 @@ export const createLLMSummarizer = ( config?: Partial, maxSummaryLength: number = 100, ): PipelineComponent => { - return async (input: IntentResult): Promise => { + return async (input: QirrelContext): Promise => { try { - const prompt = `Summarize the following text in ${maxSummaryLength} words or fewer: "${input.text}"`; + if (!input.data) { + return input; + } + + const prompt = `Summarize the following text in ${maxSummaryLength} words or fewer: "${input.data.text}"`; const response = await adapter.generate(prompt, config); - input.entities.push({ + input.data.entities.push({ type: "summary", value: response.content, start: 0, @@ -35,18 +39,22 @@ export const createLLMSentimentAnalyzer = ( adapter: LLMAdapter, config?: Partial, ): PipelineComponent => { - return async (input: IntentResult): Promise => { + return async (input: QirrelContext): Promise => { try { - const prompt = `Analyze the sentiment of the following text. Respond with only one of these values: positive, negative, or neutral.\n\nText: "${input.text}"`; + if (!input.data) { + return input; + } + + const prompt = `Analyze the sentiment of the following text. Respond with only one of these values: positive, negative, or neutral.\n\nText: "${input.data.text}"`; const response = await adapter.generate(prompt, config); const sentiment = response.content.trim().toLowerCase(); if (["positive", "negative", "neutral"].includes(sentiment)) { - input.entities.push({ + input.data.entities.push({ type: "sentiment", value: sentiment, start: 0, - end: input.text.length, + end: input.data.text.length, }); } @@ -63,9 +71,13 @@ export const createLLMIntentClassifier = ( possibleIntents: string[], config?: Partial, ): PipelineComponent => { - return async (input: IntentResult): Promise => { + return async (input: QirrelContext): Promise => { try { - const prompt = `Classify the intent of the following text. Respond with only one of these intents: ${possibleIntents.join(", ")}.\n\nText: "${input.text}"`; + if (!input.data) { + return input; + } + + const prompt = `Classify the intent of the following text. Respond with only one of these intents: ${possibleIntents.join(", ")}.\n\nText: "${input.data.text}"`; const response = await adapter.generate(prompt, config); const intent = response.content.trim(); @@ -77,11 +89,11 @@ export const createLLMIntentClassifier = ( intent.toLowerCase().includes(possibleIntent.toLowerCase()), ) ) { - input.entities.push({ + input.data.entities.push({ type: "intent", value: intent, start: 0, - end: input.text.length, + end: input.data.text.length, }); } @@ -97,20 +109,24 @@ export const createLLMTopicClassifier = ( adapter: LLMAdapter, config?: Partial, ): PipelineComponent => { - return async (input: IntentResult): Promise => { + return async (input: QirrelContext): Promise => { try { - const prompt = `Identify the main topic(s) of the following text. Respond with a comma-separated list of topics.\n\nText: "${input.text}"`; + if (!input.data) { + return input; + } + + const prompt = `Identify the main topic(s) of the following text. Respond with a comma-separated list of topics.\n\nText: "${input.data.text}"`; const response = await adapter.generate(prompt, config); const topics = response.content.split(",").map((topic) => topic.trim()); for (const topic of topics) { if (topic) { - input.entities.push({ + input.data.entities.push({ type: "topic", value: topic, start: 0, - end: input.text.length, + end: input.data.text.length, }); } } @@ -127,13 +143,17 @@ export const createLLMTextEnhancer = ( adapter: LLMAdapter, config?: Partial, ): PipelineComponent => { - return async (input: IntentResult): Promise => { + return async (input: QirrelContext): Promise => { try { - const prompt = `Improve and enhance the following text while preserving its meaning: "${input.text}"`; + if (!input.data) { + return input; + } + + const prompt = `Improve and enhance the following text while preserving its meaning: "${input.data.text}"`; const response = await adapter.generate(prompt, config); - input.text = response.content; + input.data.text = response.content; return input; } catch (error) { diff --git a/src/llms/llm-processor.ts b/src/llms/llm-processor.ts index b34db89..3912312 100644 --- a/src/llms/llm-processor.ts +++ b/src/llms/llm-processor.ts @@ -1,17 +1,21 @@ import type { PipelineComponent } from "../core/types"; -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; import type { LLMProcessorOptions, LLMAdapter } from "./types"; export const createLLMProcessor = ( options: LLMProcessorOptions, ): PipelineComponent => { - return async (input: IntentResult): Promise => { + return async (input: QirrelContext): Promise => { try { - return await options.adapter.generateWithIntentResult( + // Use the adapter to process the full context + const result = await options.adapter.generateWithIntentResult( input, options.promptTemplate, options.config, ); + + // Return the result as is (should be QirrelContext) + return result; } catch (error) { console.warn("LLM processor failed:", error); return input; @@ -24,9 +28,13 @@ export const createLLMEntityExtractor = ( adapter: LLMAdapter, config?: Partial, ): PipelineComponent => { - return async (input: IntentResult): Promise => { + return async (input: QirrelContext): Promise => { try { - const enhancedPrompt = `${promptTemplate}\n\nText: "${input.text}"\n\nPlease extract entities in JSON format with structure: {entities: [{type: string, value: string, start: number, end: number}]}`; + if (!input.data) { + return input; + } + + const enhancedPrompt = `${promptTemplate}\n\nText: "${input.data.text}"\n\nPlease extract entities in JSON format with structure: {entities: [{type: string, value: string, start: number, end: number}]}`; const response = await adapter.generate(enhancedPrompt, config); @@ -78,7 +86,7 @@ export const createLLMEntityExtractor = ( typeof entity.start === "number" && typeof entity.end === "number" ) { - input.entities.push({ + input.data.entities.push({ type: entity.type, value: entity.value, start: entity.start, diff --git a/src/llms/types.ts b/src/llms/types.ts index 11e1d3d..d4b0dae 100644 --- a/src/llms/types.ts +++ b/src/llms/types.ts @@ -1,4 +1,4 @@ -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; export interface LLMConfig { apiKey: string; @@ -21,7 +21,7 @@ export interface LLMResponse { export interface LLMAdapter { generate(prompt: string, options?: Partial): Promise; - generateWithIntentResult(input: IntentResult, promptTemplate: string, options?: Partial): Promise; + generateWithIntentResult(input: QirrelContext, promptTemplate: string, options?: Partial): Promise; } export interface LLMProcessorOptions { diff --git a/src/processors/advclean.ts b/src/processors/advclean.ts index 0a90a0b..07860d9 100644 --- a/src/processors/advclean.ts +++ b/src/processors/advclean.ts @@ -1,4 +1,4 @@ -import type { IntentResult } from "../types"; +import type { QirrelContext } from "../types"; import type { PipelineComponent } from "../core/types"; import emojiRegex from 'emoji-regex'; @@ -7,10 +7,10 @@ function removeEmojis(text: string): string { return text.replace(regex, ''); } -export const advClean: PipelineComponent = async (result: IntentResult) => { - const cleanedText = removeEmojis(result.text); - return { - ...result, - text: cleanedText, - }; +export const advClean: PipelineComponent = async (result: QirrelContext) => { + if (result.data) { + const cleanedText = removeEmojis(result.data.text); + result.data.text = cleanedText; + } + return result; }; diff --git a/src/processors/clean.ts b/src/processors/clean.ts index 6e15849..8b83bba 100644 --- a/src/processors/clean.ts +++ b/src/processors/clean.ts @@ -1,14 +1,16 @@ import type { PipelineComponent } from "../core/types"; -import { IntentResult } from "../types"; +import { QirrelContext } from "../types"; -export const clean: PipelineComponent = (input: IntentResult): IntentResult => { - input.tokens = input.tokens.filter( - (token) => token.type !== "punct" && token.type !== "whitespace", - ); +export const clean: PipelineComponent = (input: QirrelContext): QirrelContext => { + if (input.data) { + input.data.tokens = input.data.tokens.filter( + (token) => token.type !== "punct" && token.type !== "whitespace", + ); - input.entities = input.entities.filter( - (entity) => entity.value.trim().length > 0, - ); + input.data.entities = input.data.entities.filter( + (entity) => entity.value.trim().length > 0, + ); + } return input; }; diff --git a/src/processors/extract.ts b/src/processors/extract.ts index d7a571e..53fe5bd 100644 --- a/src/processors/extract.ts +++ b/src/processors/extract.ts @@ -1,31 +1,39 @@ import type { PipelineComponent } from "../core/types"; -import { IntentResult } from "../types"; +import { QirrelContext } from "../types"; import validator from "validator"; import { parsePhoneNumber } from 'libphonenumber-js'; export const extract: PipelineComponent = ( - input: IntentResult, -): IntentResult => { + input: QirrelContext, +): QirrelContext => { try { - extractEmails(input); + if (input.data) { + extractEmails(input.data); + } } catch (e) { console.warn("Email extraction failed:", e); } try { - extractPhones(input); + if (input.data) { + extractPhones(input.data); + } } catch (e) { console.warn("Phone extraction failed:", e); } try { - extractUrls(input); + if (input.data) { + extractUrls(input.data); + } } catch (e) { console.warn("URL extraction failed:", e); } try { - extractNumbers(input); + if (input.data) { + extractNumbers(input.data); + } } catch (e) { console.warn("Number extraction failed:", e); } @@ -34,10 +42,12 @@ export const extract: PipelineComponent = ( }; export const extractEmailsOnly: PipelineComponent = ( - input: IntentResult, -): IntentResult => { + input: QirrelContext, +): QirrelContext => { try { - extractEmails(input); + if (input.data) { + extractEmails(input.data); + } } catch (e) { console.warn("Email extraction failed:", e); } @@ -45,10 +55,12 @@ export const extractEmailsOnly: PipelineComponent = ( }; export const extractPhonesOnly: PipelineComponent = ( - input: IntentResult, -): IntentResult => { + input: QirrelContext, +): QirrelContext => { try { - extractPhones(input); + if (input.data) { + extractPhones(input.data); + } } catch (e) { console.warn("Phone extraction failed:", e); } @@ -56,10 +68,12 @@ export const extractPhonesOnly: PipelineComponent = ( }; export const extractUrlsOnly: PipelineComponent = ( - input: IntentResult, -): IntentResult => { + input: QirrelContext, +): QirrelContext => { try { - extractUrls(input); + if (input.data) { + extractUrls(input.data); + } } catch (e) { console.warn("URL extraction failed:", e); } @@ -67,18 +81,20 @@ export const extractUrlsOnly: PipelineComponent = ( }; export const extractNumbersOnly: PipelineComponent = ( - input: IntentResult, -): IntentResult => { + input: QirrelContext, +): QirrelContext => { try { - extractNumbers(input); + if (input.data) { + extractNumbers(input.data); + } } catch (e) { console.warn("Number extraction failed:", e); } return input; }; -function extractEmails(input: IntentResult): void { - const text = input.text; +function extractEmails(inputData: { text: string; entities: any[] }): void { + const text = inputData.text; // Use validator to find and validate emails more reliably // First, find potential emails using a simple pattern, then validate with library @@ -88,7 +104,7 @@ function extractEmails(input: IntentResult): void { if (validator.isEmail(potentialEmail)) { const startIndex = text.indexOf(potentialEmail); if (startIndex !== -1) { - input.entities.push({ + inputData.entities.push({ type: "email", value: potentialEmail, start: startIndex, @@ -99,8 +115,8 @@ function extractEmails(input: IntentResult): void { } } -function extractPhones(input: IntentResult): void { - const text = input.text; +function extractPhones(inputData: { text: string; entities: any[] }): void { + const text = inputData.text; // More comprehensive regex to capture various phone formats const phoneRegex = /(?:\+?1[-.\s]?)?\(?([0-9]{3})\)?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})|(\+?[1-9]\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9})/g; @@ -151,7 +167,7 @@ function extractPhones(input: IntentResult): void { } if (isValid) { - input.entities.push({ + inputData.entities.push({ type: "phone", value: potential.number, start: potential.start, @@ -165,8 +181,8 @@ function extractPhones(input: IntentResult): void { } } -function extractUrls(input: IntentResult): void { - const text = input.text; +function extractUrls(inputData: { text: string; entities: any[] }): void { + const text = inputData.text; // Find potential URLs by looking for common protocol prefixes const urlPattern = /(https?:\/\/[^\s"'<>\]]+)/g; @@ -176,7 +192,7 @@ function extractUrls(input: IntentResult): void { const url = match[0]; // Use validator to properly validate the URL if (validator.isURL(url, { protocols: ['http', 'https'], require_protocol: true })) { - input.entities.push({ + inputData.entities.push({ type: "url", value: url, start: match.index, @@ -186,8 +202,8 @@ function extractUrls(input: IntentResult): void { } } -function extractNumbers(input: IntentResult): void { - const text = input.text; +function extractNumbers(inputData: { text: string; entities: any[] }): void { + const text = inputData.text; // Find potential numbers using a regex const numberPattern = /-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?/g; @@ -197,7 +213,7 @@ function extractNumbers(input: IntentResult): void { const numStr = match[0]; // Validate using both validator and built-in parsing if (validator.isNumeric(numStr) || (!isNaN(parseFloat(numStr)) && isFinite(parseFloat(numStr)))) { - input.entities.push({ + inputData.entities.push({ type: "number", value: numStr, start: match.index, diff --git a/src/processors/normalize.ts b/src/processors/normalize.ts index c40242b..909ef12 100644 --- a/src/processors/normalize.ts +++ b/src/processors/normalize.ts @@ -1,20 +1,22 @@ import type { PipelineComponent } from "../core/types"; -import { IntentResult } from "../types"; +import { QirrelContext } from "../types"; export const normalize: PipelineComponent = ( - input: IntentResult, -): IntentResult => { - input.tokens.forEach((token) => { - if (token.type === "word") { - token.value = token.value.toLowerCase(); - } else if (token.type === "number") { - token.value = parseFloat(token.value).toString(); - } - }); + input: QirrelContext, +): QirrelContext => { + if (input.data) { + input.data.tokens.forEach((token) => { + if (token.type === "word") { + token.value = token.value.toLowerCase(); + } else if (token.type === "number") { + token.value = parseFloat(token.value).toString(); + } + }); - input.entities.forEach((entity) => { - entity.value = entity.value.toLowerCase(); - }); + input.data.entities.forEach((entity) => { + entity.value = entity.value.toLowerCase(); + }); + } return input; }; diff --git a/src/processors/segment.ts b/src/processors/segment.ts index 5c49754..3d34cfc 100644 --- a/src/processors/segment.ts +++ b/src/processors/segment.ts @@ -1,68 +1,70 @@ import type { PipelineComponent } from "../core/types"; -import { IntentResult } from "../types"; +import { QirrelContext } from "../types"; function isWhitespace(code: number): boolean { return code === 32 || code === 9 || code === 10 || code === 13; // space, tab, newline, carriage return } export const segment: PipelineComponent = ( - input: IntentResult, -): IntentResult => { - const text = input.text; - const sentences: string[] = []; - const sentencePositions: Array<{ start: number; end: number }> = []; + input: QirrelContext, +): QirrelContext => { + if (input.data) { + const text = input.data.text; + const sentences: string[] = []; + const sentencePositions: Array<{ start: number; end: number }> = []; - let sentenceStart = 0; + let sentenceStart = 0; - for (let i = 0; i < text.length; i++) { - const char = text[i]; + for (let i = 0; i < text.length; i++) { + const char = text[i]; - // Check if this character is a sentence-ending punctuation - if (char === '.' || char === '!' || char === '?') { - // Look ahead to see if followed by whitespace - let j = i + 1; - while (j < text.length && isWhitespace(text.charCodeAt(j))) { - j++; - } + // Check if this character is a sentence-ending punctuation + if (char === '.' || char === '!' || char === '?') { + // Look ahead to see if followed by whitespace + let j = i + 1; + while (j < text.length && isWhitespace(text.charCodeAt(j))) { + j++; + } - // If there's whitespace after punctuation, consider it a sentence boundary - if (j < text.length || i === text.length - 1) { - const sentence = text.substring(sentenceStart, j).trim(); - if (sentence.length > 0) { - sentences.push(sentence); - sentencePositions.push({ - start: sentenceStart, - end: j - }); + // If there's whitespace after punctuation, consider it a sentence boundary + if (j < text.length || i === text.length - 1) { + const sentence = text.substring(sentenceStart, j).trim(); + if (sentence.length > 0) { + sentences.push(sentence); + sentencePositions.push({ + start: sentenceStart, + end: j + }); + } + sentenceStart = j; + i = j - 1; // Continue from after the whitespace } - sentenceStart = j; - i = j - 1; // Continue from after the whitespace } } - } - // Handle the last part if it doesn't end with punctuation - if (sentenceStart < text.length) { - const remaining = text.substring(sentenceStart).trim(); - if (remaining.length > 0) { - sentences.push(remaining); - sentencePositions.push({ - start: sentenceStart, - end: text.length - }); + // Handle the last part if it doesn't end with punctuation + if (sentenceStart < text.length) { + const remaining = text.substring(sentenceStart).trim(); + if (remaining.length > 0) { + sentences.push(remaining); + sentencePositions.push({ + start: sentenceStart, + end: text.length + }); + } } - } - for (let i = 0; i < sentences.length; i++) { - const sentence = sentences[i]!; - const pos = sentencePositions[i]!; + for (let i = 0; i < sentences.length; i++) { + const sentence = sentences[i]!; + const pos = sentencePositions[i]!; - input.entities.push({ - type: "sentence", - value: sentence.trim(), - start: pos.start, - end: pos.end, - }); + input.data.entities.push({ + type: "sentence", + value: sentence.trim(), + start: pos.start, + end: pos.end, + }); + } } return input; diff --git a/src/types/index.ts b/src/types/index.ts index 7fa097d..c417dec 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -7,8 +7,36 @@ export interface Entity { end: number; } -export interface IntentResult { - text: string; - tokens: Token[]; - entities: Entity[]; +// Canonical context for operational and request-scoped data +export interface MetaContext { + requestId: string; + timestamp: number; + source?: "http" | "cli" | "worker"; // Source of the request + trace?: Record; +} + +export interface MemoryContext { + shortTerm?: unknown; // Transient memory for the current session + longTerm?: unknown; // Persistent memory across sessions + cache?: Record; // Cached data for performance +} + +export interface LLMContext { + model: string; // LLM model identifier + temperature?: number; // Temperature setting for generation (0.0-1.0) + safety: { + allowTools: boolean; // Whether to allow tool usage + redactions?: string[]; + }; +} + +export interface QirrelContext { + meta: MetaContext; + memory: MemoryContext; + llm: LLMContext; + data?: { + text: string; + tokens: Token[]; + entities: Entity[]; + }; } diff --git a/tests/extraction.test.ts b/tests/extraction.test.ts index 8b01726..c15eed0 100644 --- a/tests/extraction.test.ts +++ b/tests/extraction.test.ts @@ -1,97 +1,166 @@ import { extract, extractEmailsOnly, extractPhonesOnly, extractUrlsOnly, extractNumbersOnly } from '../src/processors/extract'; -import { IntentResult } from '../src/types'; +import { QirrelContext } from '../src/types'; describe('Extraction Functions', () => { describe('extract', () => { it('should extract emails, phones, URLs, and numbers from text', () => { - const input: IntentResult = { - text: 'Contact John at john@example.com or call +1-555-123-4567. Visit https://example.com for more info. Price is $29.99.', - tokens: [], - entities: [], + const input: QirrelContext = { + meta: { + requestId: 'test', + timestamp: Date.now(), + }, + memory: {}, + llm: { + model: 'test', + safety: { + allowTools: true + } + }, + data: { + text: 'Contact John at john@example.com or call +1-555-123-4567. Visit https://example.com for more info. Price is $29.99.', + tokens: [], + entities: [], + } }; const result = extract(input); // Check that entities were found - expect(result.entities).not.toHaveLength(0); + expect(result.data?.entities).not.toHaveLength(0); - const emailEntities = result.entities.filter(e => e.type === 'email'); + const emailEntities = result.data?.entities.filter(e => e.type === 'email') || []; expect(emailEntities).not.toHaveLength(0); - expect(emailEntities[0].value).toBe('john@example.com'); + if (emailEntities.length > 0) { + expect(emailEntities[0].value).toBe('john@example.com'); + } // Phone number might be extracted differently based on validation // At least verify that processing doesn't break - const phoneEntities = result.entities.filter(e => e.type === 'phone'); + const phoneEntities = result.data?.entities.filter(e => e.type === 'phone') || []; expect(Array.isArray(phoneEntities)).toBe(true); - const urlEntity = result.entities.find(e => e.type === 'url'); + const urlEntity = result.data?.entities.find(e => e.type === 'url'); expect(urlEntity).toBeDefined(); expect(urlEntity?.value).toBe('https://example.com'); - const numberEntity = result.entities.some(e => e.type === 'number' && e.value === '29.99'); + const numberEntity = result.data?.entities.some(e => e.type === 'number' && e.value === '29.99'); expect(numberEntity).toBeTruthy(); }); it('should handle text with no extractable entities', () => { - const input: IntentResult = { - text: 'This text has no emails, phones, or URLs', - tokens: [], - entities: [], + const input: QirrelContext = { + meta: { + requestId: 'test', + timestamp: Date.now(), + }, + memory: {}, + llm: { + model: 'test', + safety: { + allowTools: true + } + }, + data: { + text: 'This text has no emails, phones, or URLs', + tokens: [], + entities: [], + } }; const result = extract(input); - expect(result.entities).toHaveLength(0); + expect(result.data?.entities).toHaveLength(0); }); }); describe('extractEmailsOnly', () => { it('should extract only emails', () => { - const input: IntentResult = { - text: 'Email me at test@example.com or call 555-123-4567', - tokens: [], - entities: [], + const input: QirrelContext = { + meta: { + requestId: 'test', + timestamp: Date.now(), + }, + memory: {}, + llm: { + model: 'test', + safety: { + allowTools: true + } + }, + data: { + text: 'Email me at test@example.com or call 555-123-4567', + tokens: [], + entities: [], + } }; const result = extractEmailsOnly(input); - expect(result.entities).toHaveLength(1); - const emailEntity = result.entities[0]; - expect(emailEntity).toEqual({ - type: 'email', - value: 'test@example.com', - start: expect.any(Number), - end: expect.any(Number), - }); + expect(result.data?.entities).toHaveLength(1); + const emailEntity = result.data?.entities[0]; + if (emailEntity) { + expect(emailEntity).toEqual({ + type: 'email', + value: 'test@example.com', + start: expect.any(Number), + end: expect.any(Number), + }); + } }); }); describe('extractPhonesOnly', () => { it('should extract phone numbers', () => { - const input: IntentResult = { - text: 'Call me at +1-555-123-4567 or 555-123-4567', - tokens: [], - entities: [], + const input: QirrelContext = { + meta: { + requestId: 'test', + timestamp: Date.now(), + }, + memory: {}, + llm: { + model: 'test', + safety: { + allowTools: true + } + }, + data: { + text: 'Call me at +1-555-123-4567 or 555-123-4567', + tokens: [], + entities: [], + } }; const result = extractPhonesOnly(input); // Check that at least one phone number was found - const phoneEntities = result.entities.filter(e => e.type === 'phone'); + const phoneEntities = result.data?.entities.filter(e => e.type === 'phone') || []; expect(phoneEntities.length).toBeGreaterThan(0); }); }); describe('extractUrlsOnly', () => { it('should extract only URLs', () => { - const input: IntentResult = { - text: 'Visit https://example.com or http://test.org', - tokens: [], - entities: [], + const input: QirrelContext = { + meta: { + requestId: 'test', + timestamp: Date.now(), + }, + memory: {}, + llm: { + model: 'test', + safety: { + allowTools: true + } + }, + data: { + text: 'Visit https://example.com or http://test.org', + tokens: [], + entities: [], + } }; const result = extractUrlsOnly(input); - expect(result.entities).toHaveLength(2); - - const urls = result.entities.map(e => e.value); + expect(result.data?.entities).toHaveLength(2); + + const urls = result.data?.entities.map(e => e.value) || []; expect(urls).toContain('https://example.com'); expect(urls).toContain('http://test.org'); }); @@ -99,16 +168,29 @@ describe('Extraction Functions', () => { describe('extractNumbersOnly', () => { it('should extract different number formats', () => { - const input: IntentResult = { - text: 'The price is $29.99, quantity is 5, and scientific notation is 1.23e+5', - tokens: [], - entities: [], + const input: QirrelContext = { + meta: { + requestId: 'test', + timestamp: Date.now(), + }, + memory: {}, + llm: { + model: 'test', + safety: { + allowTools: true + } + }, + data: { + text: 'The price is $29.99, quantity is 5, and scientific notation is 1.23e+5', + tokens: [], + entities: [], + } }; const result = extractNumbersOnly(input); - expect(result.entities).toHaveLength(3); - - const numbers = result.entities.map(e => e.value); + expect(result.data?.entities).toHaveLength(3); + + const numbers = result.data?.entities.map(e => e.value) || []; expect(numbers).toContain('29.99'); expect(numbers).toContain('5'); expect(numbers).toContain('1.23e+5');