Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: improve the matching algorithm when there are competing fixing …
…choices
- Loading branch information
Showing
12 changed files
with
353 additions
and
68 deletions.
There are no files selected for viewing
2 changes: 1 addition & 1 deletion
2
packages/string-fix-broken-named-entities/coverage/coverage-summary.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
{"total":{"lines":{"total":230,"covered":230,"skipped":0,"pct":100},"statements":{"total":241,"covered":241,"skipped":0,"pct":100},"functions":{"total":26,"covered":26,"skipped":0,"pct":100},"branches":{"total":323,"covered":323,"skipped":0,"pct":100}}} | ||
{"total":{"lines":{"total":246,"covered":246,"skipped":0,"pct":100},"statements":{"total":260,"covered":260,"skipped":0,"pct":100},"functions":{"total":29,"covered":29,"skipped":0,"pct":100},"branches":{"total":335,"covered":334,"skipped":0,"pct":99.7}}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
packages/string-fix-broken-named-entities/dist/string-fix-broken-named-entities.mjs
Large diffs are not rendered by default.
Oops, something went wrong.
2 changes: 1 addition & 1 deletion
2
packages/string-fix-broken-named-entities/dist/string-fix-broken-named-entities.umd.js
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
{"_quickTake.js":{"title":"Quick Take","content":"import { strict as assert } from \"assert\";\nimport { fixEnt } from \"string-fix-broken-named-entities\";\nimport { rApply } from \"ranges-apply\";\n\nconst source = \"&nsp;x&nsp;y&nsp;\";\n\n// returns Ranges notation, see codsen.com/ranges/\nassert.deepEqual(fixEnt(source), [\n [0, 5, \" \"],\n [6, 11, \" \"],\n [12, 17, \" \"],\n]);\n\n// render result from ranges using \"ranges-apply\":\nassert.equal(rApply(source, fixEnt(source)), \" x y \");"},"sift-raw-ampersands-from-entities.js":{"title":"Sift raw ampersands in a string from broken character references","content":"// encode those raw ampersands and fix broken character references\n\nimport { strict as assert } from \"assert\";\nimport { fixEnt } from \"string-fix-broken-named-entities\";\nimport { rApply } from \"ranges-apply\";\n\nconst source = \"&&nsp;&&nsp;&\";\n\nconst finalRanges = [];\nconst indexesOfRawAmpersands = [];\n\n// fixEnt() returns Ranges (see codsen.com/ranges/)\nconst resultRanges = fixEnt(source, {\n textAmpersandCatcherCb: (idx) => indexesOfRawAmpersands.push(idx),\n});\n\n// check the ranges - all broken NBSP's were fixed:\nassert.deepEqual(resultRanges, [\n [1, 6, \" \"],\n [7, 12, \" \"],\n]);\n\n// don't apply the ranges yet, dump them into the \"finalRanges\" array\n// it's because applying them onto a string,\n// rApply(source, resultRanges);\n// will mess up the index positions, we'll need to calculate again.\n// The whole point of Ranges is they're COMPOSABLE.\n\nresultRanges.forEach((range) => {\n finalRanges.push(range);\n});\n\n// check what's been gathered so far:\nassert.deepEqual(resultRanges, [\n [1, 6, \" \"],\n [7, 12, \" \"],\n]);\n\n// check the positions of reported raw ampersands:\nassert.deepEqual(indexesOfRawAmpersands, [0, 6, 12]);\n\n// replace each character at these positions: 0, 7 and 14\n// with string \"&\" - in terms of Ranges, it's a matter\n// of building a Ranges array:\nconst replacementRanges = indexesOfRawAmpersands.map((idx) => [\n idx,\n idx + 1,\n \"&\",\n]);\n// this is Ranges notation, array of arrays: [from index, to index, what-to-replace]\nassert.deepEqual(replacementRanges, [\n [0, 1, \"&\"], // we're saying, replace indexes from 0 to 1 with &\n [6, 7, \"&\"],\n [12, 13, \"&\"],\n]);\n\n// push them into resultRanges as well:\nreplacementRanges.forEach((range) => {\n resultRanges.push(range);\n});\n\n// check what's been gathered so far:\nassert.deepEqual(resultRanges, [\n [1, 6, \" \"],\n [7, 12, \" \"],\n [0, 1, \"&\"],\n [6, 7, \"&\"],\n [12, 13, \"&\"],\n]);\n\n// apply Ranges onto a string - all amendments at once!\nconst finalResultStr = rApply(source, resultRanges);\n\n// check result\nassert.equal(finalResultStr, \"& & &\");\n\n// Voilà! We fixed broken entities and encoded raw ampersands"}} | ||
{"_quickTake.js":{"title":"Quick Take","content":"import { strict as assert } from \"assert\";\nimport { fixEnt } from \"string-fix-broken-named-entities\";\nimport { rApply } from \"ranges-apply\";\n\nconst source = \"&nsp;x&nsp;y&nsp;\";\n\n// returns Ranges notation, see codsen.com/ranges/\nassert.deepEqual(fixEnt(source), [\n [0, 5, \" \"],\n [6, 11, \" \"],\n [12, 17, \" \"],\n]);\n\n// render result from ranges using \"ranges-apply\":\nassert.equal(rApply(source, fixEnt(source)), \" x y \");"},"sift-raw-ampersands-from-entities.js":{"title":"Sift raw ampersands in a string from broken character references","content":"// encode those raw ampersands and fix broken character references\n\nimport { strict as assert } from \"assert\";\nimport { fixEnt } from \"string-fix-broken-named-entities\";\nimport { rApply } from \"ranges-apply\";\n\nconst source = \"&&nsp;&&nsp;&\";\n\nconst finalRanges = [];\nconst indexesOfRawAmpersands = [];\n\n// fixEnt() returns Ranges (see codsen.com/ranges/)\nconst resultRanges = fixEnt(source, {\n textAmpersandCatcherCb: (idx) => indexesOfRawAmpersands.push(idx),\n});\n\n// check the ranges - all broken NBSP's were fixed:\nassert.deepEqual(resultRanges, [\n [1, 6, \" \"],\n [7, 12, \" \"],\n]);\n\n// don't apply the ranges yet, dump them into the \"finalRanges\" array\n// it's because applying them onto a string,\n// rApply(source, resultRanges);\n// will mess up the index positions, we'll need to calculate again.\n// The whole point of Ranges is they're COMPOSABLE.\n\nresultRanges.forEach((range) => {\n finalRanges.push(range);\n});\n\n// check the positions of reported raw ampersands:\nassert.deepEqual(indexesOfRawAmpersands, [0, 6, 12]);\n\n// replace each character at these positions: 0, 6 and 12\n// with string \"&\" - in terms of Ranges, it's a matter\n// of building a Ranges array:\nconst replacementRanges = indexesOfRawAmpersands.map((idx) => [\n idx,\n idx + 1,\n \"&\",\n]);\n// this is Ranges notation, array of arrays: [from index, to index, what-to-replace]\nassert.deepEqual(replacementRanges, [\n [0, 1, \"&\"], // we're saying, replace indexes from 0 to 1 with &\n [6, 7, \"&\"],\n [12, 13, \"&\"],\n]);\n\n// push them into resultRanges as well:\nreplacementRanges.forEach((range) => {\n resultRanges.push(range);\n});\n\n// check what's been gathered so far:\nassert.deepEqual(resultRanges, [\n [1, 6, \" \"],\n [7, 12, \" \"],\n [0, 1, \"&\"],\n [6, 7, \"&\"],\n [12, 13, \"&\"],\n]);\n\n// apply Ranges onto a string - all amendments at once!\nconst finalResultStr = rApply(source, resultRanges);\n\n// check result\nassert.equal(finalResultStr, \"& & &\");\n\n// Voilà! We fixed broken entities and encoded raw ampersands"}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -124,4 +124,4 @@ | |
"tslib": "^2.1.0", | ||
"typescript": "^4.2.3" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.