Skip to content

Commit 3b15f92

Browse files
authored
feat(sanitize): add sanitizeHtml, sanitizeJson, and sanitizeUrl utilities (#22)
1 parent 6c47629 commit 3b15f92

File tree

11 files changed

+477
-0
lines changed

11 files changed

+477
-0
lines changed

.changeset/little-brooms-sing.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@bnidev/js-utils": minor
3+
---
4+
5+
feat(sanitize): add `sanitizeJson` utility for safe JSON parsing

.changeset/twenty-signs-cheat.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@bnidev/js-utils": minor
3+
---
4+
5+
feat(sanitize): add `sanitizeHtml` utility for rich-text HTML cleaning

.changeset/two-apes-warn.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@bnidev/js-utils": minor
3+
---
4+
5+
feat(sanitize): add `sanitizeUrl` utility with protocol allowlist

src/index.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
* @categoryDescription Object
1414
* Utility functions for working with objects, including deep cloning, merging, and property manipulation.
1515
*
16+
* @categoryDescription Sanitize
17+
* Utility functions for sanitizing and validating data, such as JSON, HTML, and URLs, to prevent security issues like XSS and injection attacks.
18+
*
1619
* @categoryDescription String
1720
* Utility functions for string manipulation, including formatting, parsing, and validation.
1821
*
@@ -36,6 +39,8 @@ export * from './math'
3639

3740
export * from './object'
3841

42+
export * from './sanitize'
43+
3944
export * from './string'
4045

4146
export * from './timing'
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { sanitizeHtml } from '../sanitizeHtml'
3+
4+
describe('sanitizeHtml', () => {
5+
it('preserves allowed tags and content', () => {
6+
const input = '<p>Hello <strong>World</strong></p>'
7+
const output = sanitizeHtml(input)
8+
expect(output).toBe('<p>Hello <strong>World</strong></p>')
9+
})
10+
11+
it('removes disallowed tags but preserves inner content', () => {
12+
const input = '<script>alert("XSS")</script><p>Safe</p>'
13+
const output = sanitizeHtml(input)
14+
expect(output).toBe('alert("XSS")<p>Safe</p>')
15+
})
16+
17+
it('removes disallowed attributes', () => {
18+
const input = '<p style="color: red;" onclick="alert(1)">Hello</p>'
19+
const output = sanitizeHtml(input)
20+
expect(output).toBe('<p>Hello</p>')
21+
})
22+
23+
it('preserves allowed attributes on allowed tags', () => {
24+
const input = '<a href="https://example.com" target="_blank">Link</a>'
25+
const output = sanitizeHtml(input)
26+
expect(output).toBe(
27+
'<a href="https://example.com" target="_blank">Link</a>'
28+
)
29+
})
30+
31+
it('removes dangerous javascript: hrefs', () => {
32+
const input = '<a href="javascript:alert(1)">Click me</a>'
33+
const output = sanitizeHtml(input)
34+
expect(output).toBe('<a>Click me</a>')
35+
})
36+
37+
it('removes dangerous data: URIs', () => {
38+
const input = '<a href="data:text/html;base64,...">Click me</a>'
39+
const output = sanitizeHtml(input)
40+
expect(output).toBe('<a>Click me</a>')
41+
})
42+
43+
it('allows custom tags and attributes if provided', () => {
44+
const input = '<custom-el data-id="123">Test</custom-el>'
45+
const output = sanitizeHtml(input, ['custom-el'], {
46+
'custom-el': ['data-id']
47+
})
48+
expect(output).toBe('<custom-el data-id="123">Test</custom-el>')
49+
})
50+
51+
it('unwraps unknown custom elements if not allowed', () => {
52+
const input = '<my-tag><b>Bold</b></my-tag>'
53+
const output = sanitizeHtml(input)
54+
expect(output).toBe('<b>Bold</b>')
55+
})
56+
57+
it('is case-insensitive for tag and attribute matching', () => {
58+
const input = '<A HREF="https://example.com" TARGET="_blank">Link</A>'
59+
const output = sanitizeHtml(input)
60+
expect(output).toBe(
61+
'<a href="https://example.com" target="_blank">Link</a>'
62+
)
63+
})
64+
65+
it('unwraps nested disallowed tags correctly', () => {
66+
const input = '<div><span><script>alert(1)</script></span></div>'
67+
const output = sanitizeHtml(input)
68+
expect(output).toBe('alert(1)')
69+
})
70+
})
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { sanitizeJson } from '../sanitizeJson'
3+
4+
type Person = { name: string }
5+
6+
function isPerson(obj: unknown): obj is Person {
7+
return (
8+
typeof obj === 'object' &&
9+
obj !== null &&
10+
'name' in obj &&
11+
typeof (obj as Record<string, unknown>).name === 'string'
12+
)
13+
}
14+
15+
describe('sanitizeJson', () => {
16+
it('parses valid JSON without validation', () => {
17+
const input = '{"foo": "bar"}'
18+
const result = sanitizeJson(input)
19+
20+
expect(result.success).toBe(true)
21+
expect(result.value).toEqual({ foo: 'bar' })
22+
expect(result.error).toBeUndefined()
23+
})
24+
25+
it('returns error on invalid JSON', () => {
26+
const input = '{"foo": "bar"'
27+
const result = sanitizeJson(input)
28+
29+
expect(result.success).toBe(false)
30+
expect(result.value).toBeNull()
31+
expect(result.error).toBeInstanceOf(Error)
32+
expect(typeof result.error?.message).toBe('string')
33+
})
34+
35+
it('validates object when validator is provided and passes', () => {
36+
const input = '{"name": "Alice"}'
37+
const result = sanitizeJson<Person>(input, isPerson)
38+
39+
expect(result.success).toBe(true)
40+
expect(result.value).toEqual({ name: 'Alice' })
41+
})
42+
43+
it('returns error when validation fails', () => {
44+
const input = '{"name": 42}'
45+
const result = sanitizeJson<Person>(input, isPerson)
46+
47+
expect(result.success).toBe(false)
48+
expect(result.value).toBeNull()
49+
expect(result.error?.message).toBe('Validation failed')
50+
})
51+
52+
it('returns unknown error for non-Error throw', () => {
53+
// mock JSON.parse to throw a non-Error
54+
const originalParse = JSON.parse
55+
JSON.parse = () => {
56+
throw 'non-error string'
57+
}
58+
59+
const result = sanitizeJson('{"test": true}')
60+
expect(result.success).toBe(false)
61+
expect(result.error).toBeInstanceOf(Error)
62+
expect(result.error?.message).toBe('Unknown JSON parsing error')
63+
64+
JSON.parse = originalParse
65+
})
66+
})
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { sanitizeUrl } from '../sanitizeUrl'
3+
4+
describe('sanitizeUrl', () => {
5+
it('returns success and normalized URL for valid http URL', () => {
6+
const result = sanitizeUrl('http://example.com')
7+
expect(result.success).toBe(true)
8+
expect(result.value).toBe('http://example.com/')
9+
expect(result.error).toBeUndefined()
10+
})
11+
12+
it('rejects disallowed protocol by default', () => {
13+
const result = sanitizeUrl('mailto:user@example.com')
14+
expect(result.success).toBe(false)
15+
expect(result.value).toBeNull()
16+
expect(result.error).toBeInstanceOf(Error)
17+
expect(result.error?.message).toMatch(/Disallowed protocol/)
18+
})
19+
20+
it('accepts custom allowed protocols', () => {
21+
const result = sanitizeUrl('mailto:user@example.com', {
22+
allowedProtocols: ['mailto:', 'http:']
23+
})
24+
expect(result.success).toBe(true)
25+
expect(result.value).toBe('mailto:user@example.com')
26+
expect(result.error).toBeUndefined()
27+
})
28+
29+
it('returns original input when normalize option is false', () => {
30+
const result = sanitizeUrl('http://example.com', { normalize: false })
31+
expect(result.success).toBe(true)
32+
expect(result.value).toBe('http://example.com')
33+
})
34+
35+
it('returns error for invalid URL string', () => {
36+
const result = sanitizeUrl('not-a-url')
37+
expect(result.success).toBe(false)
38+
expect(result.value).toBeNull()
39+
expect(result.error).toBeInstanceOf(Error)
40+
expect(result.error?.message).toMatch(/Invalid URL/)
41+
})
42+
43+
it('handles non-Error exceptions gracefully', () => {
44+
const OriginalURL = globalThis.URL
45+
46+
globalThis.URL = class {
47+
constructor() {
48+
throw 'some string error'
49+
}
50+
} as unknown as typeof URL
51+
52+
const result = sanitizeUrl('http://example.com')
53+
54+
expect(result.success).toBe(false)
55+
expect(result.value).toBeNull()
56+
expect(result.error).toBeInstanceOf(Error)
57+
expect(result.error?.message).toBe('Invalid URL')
58+
59+
globalThis.URL = OriginalURL
60+
})
61+
})

src/sanitize/index.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// Export all modules from the 'sanitize' directory
2+
3+
export * from './sanitizeHtml'
4+
export * from './sanitizeJson'
5+
export * from './sanitizeUrl'

src/sanitize/sanitizeHtml.ts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/**
2+
* Sanitizes a string of HTML, preserving only safe tags and attributes for rich text rendering.
3+
*
4+
* This function removes any tags and attributes that are not explicitly allowed, helping to prevent
5+
* XSS (cross-site scripting) attacks and unwanted formatting. It is designed primarily for use with
6+
* rich text editors, comments, or other user-generated content where a limited set of semantic HTML
7+
* is acceptable.
8+
*
9+
* By default, only common formatting tags (`<p>`, `<strong>`, `<ul>`, etc.) are preserved. Layout
10+
* and styling tags like `<div>` and `<span>` are excluded by design to keep the output clean and focused.
11+
*
12+
* @param dirtyHtml - The input HTML string to sanitize.
13+
* @param allowedTags - An array of tag names (lowercase) to allow. Defaults to safe formatting tags.
14+
* @param allowedAttributes - A map of tag names to allowed attributes. Keys and attribute names should be lowercase.
15+
*
16+
* @returns The sanitized HTML string.
17+
*
18+
* @remarks
19+
* This function uses the DOM API and is safe to run in the browser.
20+
* It prevents XSS by stripping dangerous tags and attribute values.
21+
*
22+
* @category Sanitize
23+
*
24+
* @example Imports
25+
* ```ts
26+
* // ES Module
27+
* import { sanitizeHtml } from '@bnidev/js-utils'
28+
*
29+
* // CommonJS
30+
* const { sanitizeHtml } = require('@bnidev/js-utils')
31+
* ```
32+
*
33+
* @example Usage
34+
* ```ts
35+
* sanitizeHtml('<p onclick="alert()">Hi <strong>there</strong></p>')
36+
* // → '<p>Hi <strong>there</strong></p>'
37+
* ```
38+
*/
39+
export function sanitizeHtml(
40+
dirtyHtml: string,
41+
allowedTags: string[] = [
42+
'b',
43+
'i',
44+
'em',
45+
'strong',
46+
'a',
47+
'ul',
48+
'ol',
49+
'li',
50+
'p',
51+
'br'
52+
],
53+
allowedAttributes: Record<string, string[]> = { a: ['href', 'target', 'rel'] }
54+
): string {
55+
const parser = new DOMParser()
56+
const doc = parser.parseFromString(dirtyHtml, 'text/html')
57+
58+
const isDangerousAttr = (attrName: string, value: string): boolean => {
59+
if (attrName === 'href' || attrName === 'src') {
60+
const val = value.trim().toLowerCase()
61+
return (
62+
val.startsWith('javascript:') ||
63+
val.startsWith('data:') ||
64+
val.startsWith('vbscript:')
65+
)
66+
}
67+
return false
68+
}
69+
70+
const sanitizeNode = (node: Element) => {
71+
const tag = node.tagName.toLowerCase()
72+
73+
if (!allowedTags.includes(tag)) {
74+
node.replaceWith(...Array.from(node.childNodes)) // unwrap disallowed tag
75+
return
76+
}
77+
78+
for (const attr of Array.from(node.attributes)) {
79+
const attrName = attr.name.toLowerCase()
80+
const tagAttrs = allowedAttributes[tag] || []
81+
82+
if (
83+
!tagAttrs.includes(attrName) ||
84+
isDangerousAttr(attrName, attr.value)
85+
) {
86+
node.removeAttribute(attr.name)
87+
}
88+
}
89+
}
90+
91+
const walker = doc.body.querySelectorAll('*')
92+
for (const el of walker) {
93+
sanitizeNode(el)
94+
}
95+
96+
return doc.body.innerHTML
97+
}

0 commit comments

Comments
 (0)