diff --git a/.changeset/petite-parts-look.md b/.changeset/petite-parts-look.md new file mode 100644 index 0000000..7fadcda --- /dev/null +++ b/.changeset/petite-parts-look.md @@ -0,0 +1,5 @@ +--- +"@bnidev/js-utils": minor +--- + +feat(string): `stripHtmlTags` now supports an optional `maxLength` parameter to mitigate regex denial-of-service (ReDoS) risks diff --git a/src/string/__tests__/stripHtmlTags.test.ts b/src/string/__tests__/stripHtmlTags.test.ts index ae57fa3..5459e4e 100644 --- a/src/string/__tests__/stripHtmlTags.test.ts +++ b/src/string/__tests__/stripHtmlTags.test.ts @@ -35,4 +35,20 @@ describe('stripHtmlTags', () => { 'This is bold and italic text.' ) }) + + it('throws if input exceeds default maxLength of 1000', () => { + const longInput = `
${'x'.repeat(1001)}
` + expect(() => stripHtmlTags(longInput)).toThrow('Input too long') + }) + + it('does not throw if input is exactly at default maxLength', () => { + const validInput = `${'x'.repeat(993)}
` // Total 1000 chars + expect(() => stripHtmlTags(validInput)).not.toThrow() + }) + + it('respects custom maxLength argument', () => { + const input = `${'a'.repeat(5000)}` + expect(() => stripHtmlTags(input, 6000)).not.toThrow() + expect(() => stripHtmlTags(input, 1000)).toThrow('Input too long') + }) }) diff --git a/src/string/stripHtmlTags.ts b/src/string/stripHtmlTags.ts index d959a1a..d69a321 100644 --- a/src/string/stripHtmlTags.ts +++ b/src/string/stripHtmlTags.ts @@ -1,8 +1,15 @@ /** * Removes all HTML tags from a string, returning plain text. * - * @param html - The input string containing HTML. - * @returns The string without HTML tags. + * Applies the tag-stripping regular expression in a loop to handle nested or malformed tags safely. To mitigate potential performance risks from ambiguous regular expressions (e.g. catastrophic backtracking), the function enforces a maximum input length. + * + * @param html - The input string that may contain HTML. + * @param maxLength - Maximum allowed input length. Defaults to 1000 characters. + * Throws an error if the input exceeds this limit. + * + * @returns The plain text string with all HTML tags removed. + * + * @throws If the input exceeds the maximum allowed length. * * @category String * @@ -17,11 +24,23 @@ * * @example Usage * ```ts - * stripHtml('Hello World
') // → 'Hello World' + * stripHtmlTags('Hello World
') + * // → 'Hello World' * ``` */ -export function stripHtmlTags(html: string): string { +export function stripHtmlTags(html: string, maxLength = 1000): string { if (!html) return '' - // Simple regex to remove anything between < and > - return html.replace(/<[^>]*>/g, '') + if (html.length > maxLength) { + throw new Error(`Input too long (max ${maxLength} characters)`) + } + + let prev: string + let current = html + + do { + prev = current + current = current.replace(/<[^<>]*>/g, '') + } while (current !== prev) + + return current }