diff --git a/src/helpers/serializer.test.ts b/src/helpers/serializer.test.ts index 94f38f38..847f90e5 100644 --- a/src/helpers/serializer.test.ts +++ b/src/helpers/serializer.test.ts @@ -1,10 +1,29 @@ import { getSchema } from '@tiptap/core' import { RichTextKit } from '../extensions/rich-text/rich-text-kit' +import { createSuggestionExtension } from '../factories/create-suggestion-extension' -import { extractTagsFromParseRules } from './serializer' +import { buildSuggestionSchemaPartialRegex, extractTagsFromParseRules } from './serializer' describe('Helper: Serializer', () => { + describe('#buildSuggestionSchemaPartialRegex', () => { + test('returns `null` when there are no suggestion nodes in the schema', () => { + expect(buildSuggestionSchemaPartialRegex(getSchema([RichTextKit]))).toBeNull() + }) + + test('returns a partial regular expression including valid URL schemas', () => { + expect( + buildSuggestionSchemaPartialRegex( + getSchema([ + RichTextKit, + createSuggestionExtension('mention'), + createSuggestionExtension('channel'), + ]), + ), + ).toBe('(?:mention|channel)://') + }) + }) + describe('#extractTagsFromParseRules', () => { test('returns an array of all tags from the given parse rules', () => { expect( diff --git a/src/helpers/serializer.ts b/src/helpers/serializer.ts index 42eb085c..dad66eb2 100644 --- a/src/helpers/serializer.ts +++ b/src/helpers/serializer.ts @@ -1,4 +1,29 @@ -import type { ParseRule } from 'prosemirror-model' +import { kebabCase } from 'lodash-es' + +import type { ParseRule, Schema } from 'prosemirror-model' + +/** + * Builds a partial regular expression that includes valid URL schemas used by all the available + * suggestion nodes from the given editor schema. + * + * @param schema The editor schema to be used for suggestion nodes detection. + * + * @returns A partial regular expression with valid URL schemas for the available suggestion nodes, + * `null` if there are no suggestion nodes in the editor schema. + */ +function buildSuggestionSchemaPartialRegex(schema: Schema) { + const suggestionNodes = Object.values(schema.nodes).filter((node) => + node.name.endsWith('Suggestion'), + ) + + if (suggestionNodes.length === 0) { + return null + } + + return `(?:${suggestionNodes + .map((suggestionNode) => kebabCase(suggestionNode.name.replace(/Suggestion$/, ''))) + .join('|')})://` +} /** * Extract all tags from the given parse rules argument, and returns an array of said tags. @@ -19,4 +44,4 @@ function extractTagsFromParseRules( .map((rule) => rule.tag as keyof HTMLElementTagNameMap) } -export { extractTagsFromParseRules } +export { buildSuggestionSchemaPartialRegex, extractTagsFromParseRules } diff --git a/src/serializers/html/extensions/disabled.ts b/src/serializers/html/extensions/disabled.ts new file mode 100644 index 00000000..1f93aded --- /dev/null +++ b/src/serializers/html/extensions/disabled.ts @@ -0,0 +1,160 @@ +import { marked } from 'marked' + +import { buildSuggestionSchemaPartialRegex } from '../../../helpers/serializer' +import { INITIAL_MARKED_OPTIONS } from '../html' + +import type { Schema } from 'prosemirror-model' + +/** + * A version of `marked.TokenizerObject` that allows to return an `undefined` tokenizer. + */ +type MarkedTokenizerObjectAsUndefined = Partial< + Omit, 'constructor' | 'options'> +> + +/** + * A Marked extension which disables multiple parsing rules by disabling the rules respective + * tokenizers based on the availability of marks and/or nodes in the editor schema. + * + * @param schema The editor schema to be used for nodes and marks detection. + */ +function disabled(schema: Schema) { + const markedTokenizer = new marked.Tokenizer(INITIAL_MARKED_OPTIONS) + + const tokenizer: marked.TokenizerObject = {} + + if (!schema.nodes.blockquote) { + Object.assign(tokenizer, { + blockquote() { + return undefined + }, + }) + } + + if (!schema.marks.bold || !schema.marks.italic) { + Object.assign(tokenizer, { + emStrong() { + return undefined + }, + }) + } + + // Given that there isn't a one to one mapping between the bullet/ordered list nodes and Marked + // tokenizers, we need to conditionally disable the `list` tokenizer based on the input + if (!schema.nodes.bulletList || !schema.nodes.orderedList) { + Object.assign(tokenizer, { + list(src) { + const isOrdered = /^\d+/.test(src) + + if ( + (isOrdered && schema.nodes.orderedList) || + (!isOrdered && schema.nodes.bulletList) + ) { + return markedTokenizer.list.apply(this, [src]) + } + + return undefined + }, + }) + } + + if (!schema.marks.code) { + Object.assign(tokenizer, { + codespan() { + return undefined + }, + }) + } + + if (!schema.nodes.codeBlock) { + Object.assign(tokenizer, { + code() { + return undefined + }, + fences() { + return undefined + }, + }) + } + + if (!schema.nodes.hardBreak) { + Object.assign(tokenizer, { + br() { + return undefined + }, + }) + } + + if (!schema.nodes.heading) { + Object.assign(tokenizer, { + heading() { + return undefined + }, + }) + } + + if (!schema.nodes.horizontalRule) { + Object.assign(tokenizer, { + hr() { + return undefined + }, + }) + } + + if (!schema.marks.link) { + Object.assign(tokenizer, { + url() { + return undefined + }, + }) + } + + // Given that there isn't a one to one mapping between the link/image mark/node and Marked + // tokenizers, nor Marked supports our custom Markdown syntax for suggestions, we need to + // conditionally disable the `link` tokenizer based on the input + if (!schema.marks.link || !schema.nodes.image) { + const suggestionSchemaPartialRegex = buildSuggestionSchemaPartialRegex(schema) + const suggestionSchemaRegex = suggestionSchemaPartialRegex + ? new RegExp(`^\\[[^\\]]*\\]\\(${suggestionSchemaPartialRegex}`) + : null + + Object.assign(tokenizer, { + link(src) { + const isImage = /^!\[[^\]]*\]\([^)]+\)/.test(src) + const isSuggestion = suggestionSchemaRegex?.test(src) + + if ( + (isImage && schema.nodes.image) || + (!isImage && schema.marks.link) || + isSuggestion + ) { + return markedTokenizer.link.apply(this, [src]) + } + + return undefined + }, + }) + } + + if (!schema.marks.strike) { + Object.assign(tokenizer, { + del() { + return undefined + }, + }) + } + + if (!schema.nodes.table) { + Object.assign(tokenizer, { + table() { + return undefined + }, + }) + } + + return { + tokenizer, + } +} + +export { disabled } diff --git a/src/serializers/html/extensions/link.ts b/src/serializers/html/extensions/link.ts index 9aa15007..44e54d00 100644 --- a/src/serializers/html/extensions/link.ts +++ b/src/serializers/html/extensions/link.ts @@ -1,27 +1,19 @@ -import { kebabCase } from 'lodash-es' import { marked } from 'marked' -import type { NodeType } from 'prosemirror-model' - const markedRenderer = new marked.Renderer() /** * A Marked extension which tweaks the `link` renderer to add support for suggestion nodes, while * preserving the original renderer for standard links. * - * @param suggestionNodes An array of the suggestion nodes to serialize. + * @param suggestionSchemaRegex A regular expression with valid URL schemas for the available + * suggestion nodes. */ -function link(suggestionNodes: NodeType[]): marked.MarkedExtension { - const linkSchemaRegex = new RegExp( - `^(?:${suggestionNodes - .map((suggestionNode) => kebabCase(suggestionNode.name.replace(/Suggestion$/, ''))) - .join('|')})://`, - ) - +function link(suggestionSchemaRegex: RegExp): marked.MarkedExtension { return { renderer: { link(href, title, text) { - if (href && linkSchemaRegex.test(href)) { + if (href && suggestionSchemaRegex?.test(href)) { const [, schema, id] = /^([a-z-]+):\/\/(\S+)$/i.exec(href) || [] if (schema && id && text) { diff --git a/src/serializers/html/html.test.ts b/src/serializers/html/html.test.ts index 0238a92d..49f76cbe 100644 --- a/src/serializers/html/html.test.ts +++ b/src/serializers/html/html.test.ts @@ -60,7 +60,7 @@ const MARKDOWN_INPUT_BLOCKQUOTES = `> Dorothy followed her through many of the b > - Revenue was off the chart. > - Profits were higher than ever. > -> *Everything* is going according to **plan**.` +> _Everything_ is going according to **plan**.` const MARKDOWN_INPUT_ORDERED_LISTS = `1. First item 2. Second item @@ -222,6 +222,25 @@ const MARKDOWN_INPUT_STYLED_LINKS = `I love supporting the **[EFF](https://eff.o This is the *[Markdown Guide](https://www.markdownguide.org)*. See the section on [\`code\`](#code).` +const MARKDOWN_INPUT_TABLES = `| Syntax | Description | +| ----------- | ----------- | +| Header | Title | +| Paragraph | Text | + +--- + +| Syntax | Description | +| --- | ----------- | +| Header | Title | +| Paragraph | Text | + +--- + +| Syntax | Description | Test Text | +| :--- | :----: | ---: | +| Header | Title | Here's this | +| Paragraph | Text | And more |` + describe('HTML Serializer', () => { describe('Plain-text Document', () => { describe('with default extensions', () => { @@ -263,7 +282,7 @@ describe('HTML Serializer', () => { test('blockquotes syntax is preserved', () => { expect(htmlSerializer.serialize(MARKDOWN_INPUT_BLOCKQUOTES)).toBe( - '

> Dorothy followed her through many of the beautiful rooms in her castle.

> Dorothy followed her through many of the beautiful rooms in her castle.

>

> The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.

> Dorothy followed her through many of the beautiful rooms in her castle.

>

> > The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.

> #### The quarterly results look great!

>

> - Revenue was off the chart.

> - Profits were higher than ever.

>

> *Everything* is going according to **plan**.

', + '

> Dorothy followed her through many of the beautiful rooms in her castle.

> Dorothy followed her through many of the beautiful rooms in her castle.

>

> The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.

> Dorothy followed her through many of the beautiful rooms in her castle.

>

> > The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.

> #### The quarterly results look great!

>

> - Revenue was off the chart.

> - Profits were higher than ever.

>

> _Everything_ is going according to **plan**.

', ) }) @@ -309,7 +328,7 @@ describe('HTML Serializer', () => { ) }) - test('line rules syntax is preserved', () => { + test('horizontal rules syntax is preserved', () => { expect(htmlSerializer.serialize(MARKDOWN_INPUT_LINE_RULES)).toBe( '

***

_________________

---

', ) @@ -413,12 +432,6 @@ Answer: [Doist Frontend](channel://190200)`), ) }) - test('task lists syntax is preserved (unsupported by default)', () => { - expect(htmlSerializer.serialize(MARKDOWN_INPUT_TASK_LISTS)).toBe( - '
  • [ ] First item
  • [x] Second item
  • [x] Third item
  • [ ] Fourth item

  • [x] First item
  • [ ] Second item
  • [ ] Third item
  • [x] Fourth item

  • [x] First item
  • [ ] Second item
  • [x] Third item
  • [ ] Fourth item

  • First item
  • Second item
  • Third item
    • [ ] Indented item
    • [ ] Indented item
  • Fourth item

  • [ ] 1968. A great year!
  • [x] I think 1969 was second best.

  • [ ] This is the first list item.
  • [ ] Here's the second list item.
    I need to add another paragraph below the second list item.
  • [ ] And here's the third list item.
', - ) - }) - test('images HTML output is correct', () => { expect(htmlSerializer.serialize(MARKDOWN_INPUT_IMAGES)).toBe( 'Octobi Wan CatnobiOctobi Wan Catnobi

Octobi Wan Catnobi

Octobi Wan Catnobi:

Octobi Wan Catnobi: - These are not the droids you\'re looking for!

- These are not the droids you\'re looking for!

', @@ -449,7 +462,7 @@ Answer: [Doist Frontend](channel://190200)`), </html>`) }) - test('line rules HTML output is correct', () => { + test('horizontal rules HTML output is correct', () => { expect(htmlSerializer.serialize(MARKDOWN_INPUT_LINE_RULES)).toBe('


') }) @@ -466,77 +479,176 @@ Answer: [Doist Frontend](channel://190200)`), }) }) - describe('without `heading` extension', () => { - test("HTML output doesn't have heading elements", () => { - const customSerializer = createHTMLSerializer( + describe('without non-essential extensions', () => { + let htmlSerializer: HTMLSerializerReturnType + + beforeEach(() => { + htmlSerializer = createHTMLSerializer( getSchema([ + // Although we can disable pretty much any extension in the `RichTextKit`, + // only non-essential nodes/marks extensions are disabled (these are the + // only ones that have an effect on the serializer output) RichTextKit.configure({ + blockquote: false, + bold: false, + bulletList: false, + code: false, + codeBlock: false, + hardBreak: false, heading: false, + horizontalRule: false, + image: false, + italic: false, + link: false, + orderedList: false, + strike: false, }), ]), ) + }) - expect(customSerializer.serialize(MARKDOWN_INPUT_HEADINGS)).toBe( + test('headings HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_HEADINGS)).toBe( '

# Heading level 1

## Heading level 2

### Heading level 3

#### Heading level 4

##### Heading level 5

###### Heading level 6

', ) }) - }) - describe('without `strike` extension', () => { - test("HTML output doesn't have `del` elements", () => { - const customSerializer = createHTMLSerializer( - getSchema([ - RichTextKit.configure({ - strike: false, - }), - ]), + test('paragraphs HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_PARAGRAPHS)).toBe( + "

I really like using Markdown.

I think I'll use it to format all of my documents from now on.

", ) + }) - expect(customSerializer.serialize(MARKDOWN_INPUT_STYLED_TEXT)).toBe( - "

I just love bold text.
I just love bold text.

Italicized text is the cat's meow.
Italicized text is the cat's meow.

This text is really important.
This text is really important.
This text is really important.
This text is really important.
This is really very important text.

Strikethrough uses two tildes: ~~scratch this~~

", + test('line breaks HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_LINE_BREAKS)).toBe( + `

This is the first line. +And this is the second line.

`, ) }) - }) - describe('without `codeblock` extension', () => { - test('code block HTML output is correct', () => { - const customSerializer = createHTMLSerializer( - getSchema([ - RichTextKit.configure({ - codeBlock: false, - }), - ]), + test('styled text HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_STYLED_TEXT)) + .toBe(`

I just love **bold text**. +I just love __bold text__.

Italicized text is the *cat's meow*. +Italicized text is the _cat's meow_.

This text is ***really important***. +This text is ___really important___. +This text is __*really important*__. +This text is **_really important_**. +This is really ***very*** important text.

Strikethrough uses two tildes: ~~scratch this~~

`) + }) + + test('blockquotes HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_BLOCKQUOTES)).toBe( + '

> Dorothy followed her through many of the beautiful rooms in her castle.

> Dorothy followed her through many of the beautiful rooms in her castle.

>

> The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.

> Dorothy followed her through many of the beautiful rooms in her castle.

>

> > The Witch bade her clean the pots and kettles and sweep the floor and keep the fire fed with wood.

> #### The quarterly results look great!

>

> - Revenue was off the chart.

> - Profits were higher than ever.

>

> _Everything_ is going according to **plan**.

', ) + }) - expect(customSerializer.serialize(MARKDOWN_INPUT_CODE_BLOCK)) - .toBe(`
<html>
+            test('ordered lists HTML output is preserved', () => {
+                expect(htmlSerializer.serialize(MARKDOWN_INPUT_ORDERED_LISTS))
+                    .toBe(`

1. First item +2. Second item +3. Third item +4. Fourth item

---

1. First item

1. Second item

1. Third item

1. Fourth item

---

1. First item +8. Second item +3. Third item +5. Fourth item

---

1. First item +2. Second item +3. Third item + 1. Indented item + 2. Indented item +4. Fourth item

`) + }) + + test('unordered lists HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_UNORDERED_LISTS)) + .toBe(`

- First item

- Second item

- Third item

- Fourth item

---

* First item

* Second item

* Third item

* Fourth item

---

+ First item

+ Second item

+ Third item

+ Fourth item

---

- First item

- Second item

- Third item + - Indented item + - Indented item

- Fourth item

---

- 1968. A great year!

- I think 1969 was second best.

---

* This is the first list item.

* Here's the second list item. + I need to add another paragraph below the second list item.

* And here's the third list item.

`) + }) + + test('images HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_IMAGES)).toBe( + '

![Octobi Wan Catnobi](https://octodex.github.com/images/octobiwan.jpg)

![](https://octodex.github.com/images/octobiwan.jpg)![](https://octodex.github.com/images/octobiwan.jpg)

![Octobi Wan Catnobi](https://octodex.github.com/images/octobiwan.jpg "Octobi Wan Catnobi")

[![Octobi Wan Catnobi](https://octodex.github.com/images/octobiwan.jpg "Octobi Wan Catnobi")](https://octodex.github.com/octobiwan/)

Octobi Wan Catnobi: ![](https://octodex.github.com/images/octobiwan.jpg)

Octobi Wan Catnobi: ![](https://octodex.github.com/images/octobiwan.jpg) - These are not the droids you\'re looking for!

![](https://octodex.github.com/images/octobiwan.jpg) - These are not the droids you\'re looking for!

', + ) + }) + + test('code HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_CODE)).toBe( + '

At the command prompt, type `nano`.

``Use `code` in your Markdown file.``

', + ) + }) + + test('code block HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_CODE_BLOCK)).toBe( + `

\`\`\`

<html> <head> <title>Test</title> </head> </html> -
`) +\`\`\``, + ) + }) + + test('block elements HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_INDENTED_BLOCK_ELEMENTS)) + .toBe(`

1. Blockquote: + > Dorothy followed her through many of the beautiful rooms in her castle. +2. Image: + ![Octobi Wan Catnobi](https://octodex.github.com/images/octobiwan.jpg) +3. Codeblock: + \`\`\` + <html> + <head> + <title>Test</title> + </head> + </html> + \`\`\`

`) + }) + + test('horizontal rules HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_LINE_RULES)).toBe( + '

***

_________________

---

', + ) + }) + + test('links HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_LINKS)) + .toBe(`

My favorite search engine is [Duck Duck Go](https://duckduckgo.com). +My favorite search engine is [Duck Duck Go](https://duckduckgo.com "The best search engine for privacy").

`) + }) + + test('styled links HTML output is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_STYLED_LINKS)) + .toBe(`

I love supporting the **[EFF](https://eff.org)**. +This is the *[Markdown Guide](https://www.markdownguide.org)*. +See the section on [\`code\`](#code).

`) }) }) - describe('without `image` extension', () => { - test('images HTML output is correct', () => { - const customSerializer = createHTMLSerializer( - getSchema([ - RichTextKit.configure({ - image: false, - }), - ]), + describe('without support for certain extensions', () => { + let htmlSerializer: HTMLSerializerReturnType + + beforeEach(() => { + htmlSerializer = createHTMLSerializer(getSchema([RichTextKit])) + }) + + test('task lists syntax is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_TASK_LISTS)).toBe( + '
  • [ ] First item
  • [x] Second item
  • [x] Third item
  • [ ] Fourth item

  • [x] First item
  • [ ] Second item
  • [ ] Third item
  • [x] Fourth item

  • [x] First item
  • [ ] Second item
  • [x] Third item
  • [ ] Fourth item

  • First item
  • Second item
  • Third item
    • [ ] Indented item
    • [ ] Indented item
  • Fourth item

  • [ ] 1968. A great year!
  • [x] I think 1969 was second best.

  • [ ] This is the first list item.
  • [ ] Here's the second list item.
    I need to add another paragraph below the second list item.
  • [ ] And here's the third list item.
', ) + }) - expect(customSerializer.serialize(MARKDOWN_INPUT_IMAGES)).toBe( - 'Octobi Wan CatnobiOctobi Wan Catnobi

Octobi Wan Catnobi

Octobi Wan Catnobi:

Octobi Wan Catnobi: - These are not the droids you\'re looking for!

- These are not the droids you\'re looking for!

', + test('tables syntax is preserved', () => { + expect(htmlSerializer.serialize(MARKDOWN_INPUT_TABLES)).toBe( + "

| Syntax | Description |
| ----------- | ----------- |
| Header | Title |
| Paragraph | Text |


| Syntax | Description |
| --- | ----------- |
| Header | Title |
| Paragraph | Text |


| Syntax | Description | Test Text |
| :--- | :----: | ---: |
| Header | Title | Here's this |
| Paragraph | Text | And more |

", ) }) }) - describe('with `image` extension (inline)', () => { + describe('with `image` extension (inline node rendering)', () => { test('images HTML output is correct', () => { - const customSerializer = createHTMLSerializer( + const htmlSerializer = createHTMLSerializer( getSchema([ RichTextKit.configure({ image: { @@ -546,19 +658,19 @@ Answer: [Doist Frontend](channel://190200)`), ]), ) - expect(customSerializer.serialize(MARKDOWN_INPUT_IMAGES)).toBe( + expect(htmlSerializer.serialize(MARKDOWN_INPUT_IMAGES)).toBe( '

Octobi Wan Catnobi

Octobi Wan Catnobi

Octobi Wan Catnobi

Octobi Wan Catnobi:

Octobi Wan Catnobi: - These are not the droids you\'re looking for!

- These are not the droids you\'re looking for!

', ) }) }) - describe('with custom `taskList` extension', () => { + describe('with official `taskList`/`taskItem` extensions', () => { test('task lists HTML output is correct', () => { - const customSerializer = createHTMLSerializer( + const htmlSerializer = createHTMLSerializer( getSchema([RichTextKit, TaskList, TaskItem]), ) - expect(customSerializer.serialize(MARKDOWN_INPUT_TASK_LISTS)).toBe( + expect(htmlSerializer.serialize(MARKDOWN_INPUT_TASK_LISTS)).toBe( '
  • First item
  • Second item
  • Third item
  • Fourth item

  • First item
  • Second item
  • Third item
  • Fourth item

  • First item
  • Second item
  • Third item
  • Fourth item

  • First item
  • Second item
  • Third item
    • Indented item
    • Indented item
  • Fourth item

  • 1968. A great year!
  • I think 1969 was second best.

  • This is the first list item.
  • Here's the second list item.
    I need to add another paragraph below the second list item.
  • And here's the third list item.
', ) }) @@ -566,42 +678,82 @@ Answer: [Doist Frontend](channel://190200)`), describe('with custom `*Suggestion` extensions', () => { test('suggestion extensions support alphanumeric IDs', () => { - const customSerializer = createHTMLSerializer( + const htmlSerializer = createHTMLSerializer( getSchema([RichTextKit, createSuggestionExtension('mention')]), ) expect( - customSerializer.serialize(`Question: Who's the head of the Frontend team? + htmlSerializer.serialize(`Question: Who's the head of the Frontend team? Answer: [Henning M](mention://user:190200@doist.dev)`), ).toBe( '

Question: Who\'s the head of the Frontend team?
Answer:

', ) }) - test('mention suggestions HTML output is correct', () => { - const customSerializer = createHTMLSerializer( - getSchema([RichTextKit, createSuggestionExtension('mention')]), - ) + describe('with the `Link` extension enabled', () => { + let htmlSerializer: HTMLSerializerReturnType - expect( - customSerializer.serialize(`Question: Who's the head of the Frontend team? -Answer: [Henning M](mention://963827)`), - ).toBe( - '

Question: Who\'s the head of the Frontend team?
Answer:

', - ) - }) + beforeEach(() => { + htmlSerializer = createHTMLSerializer( + getSchema([ + RichTextKit, + createSuggestionExtension('mention'), + createSuggestionExtension('channel'), + ]), + ) + }) - test('channel suggestions HTML output is correct', () => { - const customSerializer = createHTMLSerializer( - getSchema([RichTextKit, createSuggestionExtension('channel')]), - ) - - expect( - customSerializer.serialize(`Question: What's the best channel on Twist? + test('mention suggestions HTML output is correct', () => { + expect( + htmlSerializer.serialize(`Question: Who's the head of the Frontend team? +Answer: [Henning M](mention://963827)`), + ).toBe( + '

Question: Who\'s the head of the Frontend team?
Answer:

', + ) + }) + + test('channel suggestions HTML output is correct', () => { + expect( + htmlSerializer.serialize(`Question: What's the best channel on Twist? Answer: [Doist Frontend](channel://190200)`), - ).toBe( - '

Question: What\'s the best channel on Twist?
Answer:

', - ) + ).toBe( + '

Question: What\'s the best channel on Twist?
Answer:

', + ) + }) + }) + + describe('with the `Link` extension disabled', () => { + let htmlSerializer: HTMLSerializerReturnType + + beforeEach(() => { + htmlSerializer = createHTMLSerializer( + getSchema([ + RichTextKit.configure({ + link: false, + }), + createSuggestionExtension('mention'), + createSuggestionExtension('channel'), + ]), + ) + }) + + test('mention suggestions HTML output is correct', () => { + expect( + htmlSerializer.serialize(`Question: Who's the head of the Frontend team? +Answer: [Henning M](mention://963827)`), + ).toBe( + '

Question: Who\'s the head of the Frontend team?
Answer:

', + ) + }) + + test('channel suggestions HTML output is correct', () => { + expect( + htmlSerializer.serialize(`Question: What's the best channel on Twist? +Answer: [Doist Frontend](channel://190200)`), + ).toBe( + '

Question: What\'s the best channel on Twist?
Answer:

', + ) + }) }) }) }) diff --git a/src/serializers/html/html.ts b/src/serializers/html/html.ts index d1e03741..17f48c32 100644 --- a/src/serializers/html/html.ts +++ b/src/serializers/html/html.ts @@ -3,9 +3,11 @@ import { marked } from 'marked' import { REGEX_LINE_BREAKS } from '../../constants/regular-expressions' import { isPlainTextDocument } from '../../helpers/schema' +import { buildSuggestionSchemaPartialRegex } from '../../helpers/serializer' import { checkbox } from './extensions/checkbox' import { code } from './extensions/code' +import { disabled } from './extensions/disabled' import { html } from './extensions/html' import { link } from './extensions/link' import { paragraph } from './extensions/paragraph' @@ -33,6 +35,7 @@ type HTMLSerializerReturnType = { * @see https://marked.js.org/using_advanced#options */ const INITIAL_MARKED_OPTIONS: marked.MarkedOptions = { + ...marked.getDefaults(), breaks: true, gfm: true, headerIds: false, @@ -40,78 +43,58 @@ const INITIAL_MARKED_OPTIONS: marked.MarkedOptions = { } /** - * Serialize the Markdown input to HTML with a custom serializer, ready for a plain-text editor. + * Create a custom Markdown to HTML serializer for plain-text editors only. * * @param schema The editor schema to be used for nodes and marks detection. * - * @returns The serialized HTML output. + * @returns A normalized object for the HTML serializer. */ -function serializeForPlainTextEditor(markdown: string, schema: Schema): string { - // Converts special characters (i.e. `&`, `<`, `>`, `"`, and `'`) to their corresponding HTML - // entities. Unlike the `sanitize` option that is used by the rich-text serializer, it's safe - // for the plain-text serializer to escape the full input considering we need to output the full - // content as valid HTML (i.e. the editor should not drop invalid HTML). - let htmlResult = escape(markdown) - - // Serialize all suggestion links if any suggestion node exists in the schema - Object.values(schema.nodes) - .filter((node) => node.name.endsWith('Suggestion')) - .forEach((suggestionNode) => { - const linkSchema = kebabCase(suggestionNode.name.replace(/Suggestion$/, '')) - - htmlResult = htmlResult.replace( - new RegExp(`\\[([^\\[]+)\\]\\((?:${linkSchema}):\\/\\/(\\d+)\\)`, 'gm'), - ``, - ) - }) - - // Return the serialized HTML with every line wrapped in a paragraph element - return htmlResult.replace(/^([^\n]+)\n?|\n+/gm, `

$1

`) +function createHTMLSerializerForPlainTextEditor(schema: Schema) { + return { + serialize(markdown: string) { + // Converts special characters (i.e. `&`, `<`, `>`, `"`, and `'`) to their corresponding + // HTML entities because we need to output the full content as valid HTML (i.e. the + // editor should not drop invalid HTML). + let htmlResult = escape(markdown) + + // Serialize all suggestion links if any suggestion node exists in the schema + Object.values(schema.nodes) + .filter((node) => node.name.endsWith('Suggestion')) + .forEach((suggestionNode) => { + const linkSchema = kebabCase(suggestionNode.name.replace(/Suggestion$/, '')) + + htmlResult = htmlResult.replace( + new RegExp(`\\[([^\\[]+)\\]\\((?:${linkSchema}):\\/\\/(\\d+)\\)`, 'gm'), + ``, + ) + }) + + // Return the serialized HTML with every line wrapped in a paragraph element + return htmlResult.replace(/^([^\n]+)\n?|\n+/gm, `

$1

`) + }, + } } /** - * Serialize the Markdown input to HTML with Marked, ready for a rich-text editor. + * Create a Markdown to HTML serializer with the Marked library for a rich-text editor, or use a + * custom serializer for a plain-text editor. The editor schema is used to detect which nodes and + * marks are available in the editor, and only parses the input with the minimal required rules. * - * @param markdown The input Markdown to be serialized to HTML. * @param schema The editor schema to be used for nodes and marks detection. * - * @returns The serialized HTML output. + * @returns A normalized object for the HTML serializer. */ -function serializeForRichTextEditor(markdown: string, schema: Schema): string { - // Reset Marked to the defaults and set custom options - marked.setOptions({ - ...marked.getDefaults(), - ...INITIAL_MARKED_OPTIONS, - }) - - // Disable built-in rules that the editor does not yet support - marked.use({ - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore: Returning `undefined` is acceptable to disable tokens - tokenizer: { - ...(!schema.marks.strike - ? { - del() { - /* noop: disables tokenizer */ - }, - } - : {}), - ...(!schema.nodes.heading - ? { - heading() { - /* noop: disables tokenizer */ - }, - } - : {}), - ...(!schema.nodes.table - ? { - table() { - /* noop: disables tokenizer */ - }, - } - : {}), - }, - }) +function createHTMLSerializer(schema: Schema): HTMLSerializerReturnType { + // Returns a custom HTML serializer for plain-text editors + if (isPlainTextDocument(schema)) { + return createHTMLSerializerForPlainTextEditor(schema) + } + + // Reset Marked instance to the initial options + marked.setOptions(INITIAL_MARKED_OPTIONS) + + // Disable built-in rules that are not supported by the schema + marked.use(disabled(schema)) // Overwrite some built-in rules for handling of special behaviours // (see documentation for each extension for more details) @@ -127,45 +110,31 @@ function serializeForRichTextEditor(markdown: string, schema: Schema): string { marked.use(taskList) } - // Get all the available suggestion nodes from the schema - const suggestionNodes = Object.values(schema.nodes).filter((node) => - node.name.endsWith('Suggestion'), - ) + // Build a regular expression with all the available suggestion nodes from the schema + const suggestionSchemaPartialRegex = buildSuggestionSchemaPartialRegex(schema) // Overwrite the built-in link rule if any suggestion node exists in the schema - if (suggestionNodes.length > 0) { - marked.use(link(suggestionNodes)) + if (suggestionSchemaPartialRegex) { + marked.use(link(new RegExp(`^${suggestionSchemaPartialRegex}`))) } - // Return the serialized HTML parsed with Marked - return ( - marked - .parse(markdown) - // Removes newlines after tags from the HTML output with a specially crafted RegExp - // (needed to prevent the editor from converting newlines to blank paragraphs) - .replace(new RegExp(`>${REGEX_LINE_BREAKS.source}`, REGEX_LINE_BREAKS.flags), '>') - ) -} - -/** - * Create a Markdown to HTML serializer with the Marked library for a rich-text editor, or use a - * custom serializer for a plain-text editor. The editor schema is used to detect which nodes and - * marks are available in the editor, and only parses the input with the minimal required rules. - * - * @param schema The editor schema to be used for nodes and marks detection. - * - * @returns A normalized `serialize` function. - */ -function createHTMLSerializer(schema: Schema): HTMLSerializerReturnType { return { serialize(markdown: string) { - return isPlainTextDocument(schema) - ? serializeForPlainTextEditor(markdown, schema) - : serializeForRichTextEditor(markdown, schema) + return ( + marked + .parse(markdown) + // Removes line breaks after HTML tags from the HTML output with a specially + // crafted RegExp (this is needed to prevent the editor from converting newline + // control characters to blank paragraphs). + .replace( + new RegExp(`>${REGEX_LINE_BREAKS.source}`, REGEX_LINE_BREAKS.flags), + '>', + ) + ) }, } } -export { createHTMLSerializer } +export { createHTMLSerializer, INITIAL_MARKED_OPTIONS } export type { HTMLSerializerReturnType } diff --git a/src/serializers/markdown/markdown.test.ts b/src/serializers/markdown/markdown.test.ts index 605eaa53..c18de1d3 100644 --- a/src/serializers/markdown/markdown.test.ts +++ b/src/serializers/markdown/markdown.test.ts @@ -1,6 +1,7 @@ import { getSchema } from '@tiptap/core' import { TaskItem } from '@tiptap/extension-task-item' import { TaskList } from '@tiptap/extension-task-list' +import Turndown from 'turndown' import { PlainTextKit } from '../../extensions/plain-text/plain-text-kit' import { RichTextKit } from '../../extensions/rich-text/rich-text-kit' @@ -206,25 +207,29 @@ before _ after
99. after
` describe('Plain-text Document', () => { - let markdownSerializer: MarkdownSerializerReturnType + describe('with default extensions', () => { + let markdownSerializer: MarkdownSerializerReturnType - beforeEach(() => { - markdownSerializer = createMarkdownSerializer(getSchema([PlainTextKit])) - }) + const useMock = jest.spyOn(Turndown.prototype, 'use') + const addRuleMock = jest.spyOn(Turndown.prototype, 'addRule') + + beforeEach(() => { + markdownSerializer = createMarkdownSerializer(getSchema([PlainTextKit])) + }) - test('special HTML entities are converted to ASCII characters', () => { - expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_HTML_CHARS)) - .toBe(`Ambition & Balance + test('special HTML entities are converted to ASCII characters', () => { + expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_HTML_CHARS)) + .toBe(`Ambition & Balance "Doist" 'Doist'`) - }) + }) - test('special Markdown characters are NOT escaped', () => { - expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_MARKDOWN_CHARS)) - .toBe(`before \\ after + test('special Markdown characters are NOT escaped', () => { + expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_MARKDOWN_CHARS)) + .toBe(`before \\ after before * after - after + after @@ -238,144 +243,33 @@ before ] after before _ after 1. after 99. after`) - }) - - test('paragraphs Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_PARAGRAPHS)).toBe( - `I really like using Markdown. -I think I'll use it to format all of my documents from now on.`, - ) - }) - - describe('without `listItem` extension', () => { - test('ordered lists Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_ORDERED_LISTS)).toBe( - `1. First item -2. Second item -3. Third item -4. Fourth item - ---- - -5. First item -6. Second item -7. Third item -8. Fourth item - ---- - -1. First item -2. Second item -3. Third item - 1. Indented item - 2. Indented item -4. Fourth item`, - ) - }) - - test('unordered lists Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_UNORDERED_LISTS)).toBe( - `- First item -- Second item -- Third item -- Fourth item - ---- - -- First item -- Second item -- Third item - - Indented item - - Indented item -- Fourth item - ---- - -- 1968. A great year! -- I think 1969 was second best. - ---- - -- This is the first list item. -- Here's the second list item. - I need to add another paragraph below the second list item. -- And here's the third list item.`, - ) }) - }) - describe('without `strike` extension', () => { - test('strikethrough Markdown output is correct', () => { - const customSerializer = createMarkdownSerializer( - getSchema([ - RichTextKit.configure({ - strike: false, - }), - ]), + test('paragraphs Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_PARAGRAPHS)).toBe( + `I really like using Markdown. +I think I'll use it to format all of my documents from now on.`, ) - - expect( - customSerializer.serialize( - '

Strikethrough uses two tildes: scratch this

', - ), - ).toBe('Strikethrough uses two tildes: scratch this') }) - }) - - describe('without custom `taskList` extension', () => { - test('task lists syntax is ignored', () => { - expect(markdownSerializer.serialize(HTML_INPUT_TASK_LISTS)).toBe( - `- First item -- Second item -- Third item -- Fourth item - ---- - -- First item -- Second item -- Third item -- Fourth item - ---- - -- First item -- Second item -- Third item -- Fourth item - ---- -- First item -- Second item -- Third item - - Indented item - - Indented item -- Fourth item - ---- - -- 1968. A great year! -- I think 1969 was second best. - ---- - -- This is the first list item. -- Here's the second list item. - I need to add another paragraph below the second list item. -- And here's the third list item.`, - ) + test('only the paragraph rule is overwritten', () => { + expect(useMock).toHaveBeenCalledTimes(1) + expect(addRuleMock).toHaveBeenLastCalledWith('paragraph', { + filter: 'p', + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + replacement: expect.any(Function), + }) }) }) describe('with custom `*Suggestion` extensions', () => { test('mention suggestion Markdown output is correct', () => { - const customSerializer = createMarkdownSerializer( + const markdownSerializer = createMarkdownSerializer( getSchema([RichTextKit, createSuggestionExtension('mention')]), ) expect( - customSerializer.serialize( + markdownSerializer.serialize( `

Question: Who's the head of the Frontend team?
Answer: @Henning M

`, ), ).toBe(`Question: Who's the head of the Frontend team? @@ -383,12 +277,12 @@ Answer: [Henning M](mention://963827)`) }) test('channel suggestions Markdown output is correct', () => { - const customSerializer = createMarkdownSerializer( + const markdownSerializer = createMarkdownSerializer( getSchema([RichTextKit, createSuggestionExtension('channel')]), ) expect( - customSerializer.serialize( + markdownSerializer.serialize( `

Question: What's the best channel on Twist?
Answer: #Doist Frontend

`, ), ).toBe(`Question: What's the best channel on Twist? @@ -398,25 +292,26 @@ Answer: [Doist Frontend](channel://190200)`) }) describe('Rich-text Document', () => { - let markdownSerializer: MarkdownSerializerReturnType + describe('without default extensions', () => { + let markdownSerializer: MarkdownSerializerReturnType - beforeEach(() => { - markdownSerializer = createMarkdownSerializer(getSchema([RichTextKit])) - }) + beforeEach(() => { + markdownSerializer = createMarkdownSerializer(getSchema([RichTextKit])) + }) - test('special HTML entities are converted to ASCII characters', () => { - expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_HTML_CHARS)) - .toBe(`Ambition & Balance + test('special HTML entities are converted to ASCII characters', () => { + expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_HTML_CHARS)) + .toBe(`Ambition & Balance "Doist" 'Doist'`) - }) + }) - test('special Markdown characters are escaped', () => { - expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_MARKDOWN_CHARS)) - .toBe(`before \\\\ after + test('special Markdown characters are escaped', () => { + expect(markdownSerializer.serialize(HTML_INPUT_SPECIAL_MARKDOWN_CHARS)) + .toBe(`before \\\\ after before \\* after \\- after \\+ after @@ -430,31 +325,31 @@ before \\] after before \\_ after 1\\. after 99\\. after`) - }) + }) - test('headings Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_HEADINGS)).toBe( - '# Heading level 1\n\n## Heading level 2\n\n### Heading level 3\n\n#### Heading level 4\n\n##### Heading level 5\n\n###### Heading level 6', - ) - }) + test('headings Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_HEADINGS)).toBe( + '# Heading level 1\n\n## Heading level 2\n\n### Heading level 3\n\n#### Heading level 4\n\n##### Heading level 5\n\n###### Heading level 6', + ) + }) - test('paragraphs Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_PARAGRAPHS)).toBe( - `I really like using Markdown. + test('paragraphs Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_PARAGRAPHS)).toBe( + `I really like using Markdown. I think I'll use it to format all of my documents from now on.`, - ) - }) + ) + }) - test('line breaks Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_LINE_BREAKS)) - .toBe(`This is the first line. + test('line breaks Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_LINE_BREAKS)) + .toBe(`This is the first line. And this is the second line.`) - }) + }) - test('styled text Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_STYLED_TEXT)).toBe( - `I just love **bold text**. + test('styled text Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_STYLED_TEXT)).toBe( + `I just love **bold text**. I just love **bold text**. Italicized text is the _cat's meow_. @@ -467,12 +362,12 @@ This text is **_really important_**. This is really _**very**_ important text. Strikethrough uses two tildes: ~~scratch this~~`, - ) - }) + ) + }) - test('blockquotes Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_BLOCKQUOTES)).toBe( - `> Dorothy followed her through many of the beautiful rooms in her castle. + test('blockquotes Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_BLOCKQUOTES)).toBe( + `> Dorothy followed her through many of the beautiful rooms in her castle. > Dorothy followed her through many of the beautiful rooms in her castle. > @@ -488,11 +383,11 @@ Strikethrough uses two tildes: ~~scratch this~~`, > - Profits were higher than ever. > > _Everything_ is going according to **plan**.`, - ) - }) + ) + }) - test('ordered lists Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_ORDERED_LISTS)).toBe(`1. First item + test('ordered lists Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_ORDERED_LISTS)).toBe(`1. First item 2. Second item 3. Third item 4. Fourth item @@ -512,11 +407,11 @@ Strikethrough uses two tildes: ~~scratch this~~`, 1. Indented item 2. Indented item 4. Fourth item`) - }) + }) - test('unordered lists Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_UNORDERED_LISTS)).toBe( - `- First item + test('unordered lists Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_UNORDERED_LISTS)).toBe( + `- First item - Second item - Third item - Fourth item @@ -541,12 +436,12 @@ Strikethrough uses two tildes: ~~scratch this~~`, - Here's the second list item. I need to add another paragraph below the second list item. - And here's the third list item.`, - ) - }) + ) + }) - test('task lists syntax is ignored (unsupported by default)', () => { - expect(markdownSerializer.serialize(HTML_INPUT_TASK_LISTS)).toBe( - `- First item + test('task lists syntax is ignored (unsupported by default)', () => { + expect(markdownSerializer.serialize(HTML_INPUT_TASK_LISTS)).toBe( + `- First item - Second item - Third item - Fourth item @@ -585,40 +480,40 @@ Strikethrough uses two tildes: ~~scratch this~~`, - Here's the second list item. I need to add another paragraph below the second list item. - And here's the third list item.`, - ) - }) + ) + }) - test('images Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_IMAGES)).toBe( - `![Tux, the Linux mascot](/assets/images/tux.png) ![Tux, the Linux mascot](/assets/images/tux.png "The Linux mascot") ![](_SUPPORTED) + test('images Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_IMAGES)).toBe( + `![Tux, the Linux mascot](/assets/images/tux.png) ![Tux, the Linux mascot](/assets/images/tux.png "The Linux mascot") ![](_SUPPORTED) [![Tux, the Linux mascot](/assets/images/tux.png "The Linux mascot")](https://d33wubrfki0l68.cloudfront.net/e7ed9fe4bafe46e275c807d63591f85f9ab246ba/e2d28/assets/images/tux.png)`, - ) - }) + ) + }) - test('code Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_CODE)).toBe( - `At the command prompt, type \`nano\`. + test('code Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_CODE)).toBe( + `At the command prompt, type \`nano\`. \`\`Use \`code\` in your Markdown file.\`\``, - ) - }) + ) + }) - test('code block Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_CODE_BLOCK)).toBe( - `\`\`\` + test('code block Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_CODE_BLOCK)).toBe( + `\`\`\` Test \`\`\``, - ) - }) + ) + }) - test('block elements Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_INDENTED_BLOCK_ELEMENTS)) - .toBe(`1. Blockquote: + test('block elements Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_INDENTED_BLOCK_ELEMENTS)) + .toBe(`1. Blockquote: > Dorothy followed her through many of the beautiful rooms in her castle. 2. Image: @@ -632,69 +527,135 @@ Strikethrough uses two tildes: ~~scratch this~~`, \`\`\``) - }) + }) - test('line rules Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_LINE_RULES)).toBe( - `--- + test('horizontal rules Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_LINE_RULES)).toBe( + `--- --- ---`, - ) - }) + ) + }) - test('links Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_LINKS)).toBe( - `My favorite search engine is [Duck Duck Go](https://duckduckgo.com). + test('links Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_LINKS)).toBe( + `My favorite search engine is [Duck Duck Go](https://duckduckgo.com). My favorite search engine is [Duck Duck Go](https://duckduckgo.com "The best search engine for privacy").`, - ) - }) + ) + }) - test('styled links Markdown output is correct', () => { - expect(markdownSerializer.serialize(HTML_INPUT_STYLED_LINKS)).toBe( - `I love supporting the **[EFF](https://eff.org)**. + test('styled links Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_STYLED_LINKS)).toBe( + `I love supporting the **[EFF](https://eff.org)**. This is the _[Markdown Guide](https://www.markdownguide.org)_. See the section on [\`code\`](#code).`, - ) - }) + ) + }) - test('special Markdown characters are NOT escaped if `escape` is disabled', () => { - const customSerializer = createMarkdownSerializer(getSchema([RichTextKit]), { - escape: false, + test('special Markdown characters are NOT escaped if `escape` is disabled', () => { + const customSerializer = createMarkdownSerializer(getSchema([RichTextKit]), { + escape: false, + }) + expect( + customSerializer.serialize( + `

Wrapped markdown **still markdown**

`, + ), + ).toBe(`**Wrapped markdown** **still markdown**`) }) - expect( - customSerializer.serialize( - `

Wrapped markdown **still markdown**

`, - ), - ).toBe(`**Wrapped markdown** **still markdown**`) }) - describe('without `strike` extension', () => { - test('strikethrough Markdown output is correct', () => { - const customSerializer = createMarkdownSerializer( - getSchema([ - RichTextKit.configure({ - strike: false, - }), - ]), + describe('without custom extensions', () => { + const markdownSerializer = createMarkdownSerializer( + getSchema([ + RichTextKit.configure({ + bulletList: false, + image: false, + listItem: false, + orderedList: false, + strike: false, + }), + ]), + ) + + test('ordered lists Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_ORDERED_LISTS)).toBe(`1. First item +2. Second item +3. Third item +4. Fourth item + +--- + +5. First item +6. Second item +7. Third item +8. Fourth item + +--- + +1. First item +2. Second item +3. Third item + 1. Indented item + 2. Indented item +4. Fourth item`) + }) + + test('unordered lists Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_UNORDERED_LISTS)).toBe( + `- First item +- Second item +- Third item +- Fourth item + +--- + +- First item +- Second item +- Third item + - Indented item + - Indented item +- Fourth item + +--- + +- 1968\\. A great year! +- I think 1969 was second best. + +--- + +- This is the first list item. +- Here's the second list item. + I need to add another paragraph below the second list item. +- And here's the third list item.`, + ) + }) + + test('images Markdown output is correct', () => { + expect(markdownSerializer.serialize(HTML_INPUT_IMAGES)).toBe( + `![Tux, the Linux mascot](/assets/images/tux.png) ![Tux, the Linux mascot](/assets/images/tux.png "The Linux mascot") ![]() + +[![Tux, the Linux mascot](/assets/images/tux.png "The Linux mascot")](https://d33wubrfki0l68.cloudfront.net/e7ed9fe4bafe46e275c807d63591f85f9ab246ba/e2d28/assets/images/tux.png)`, ) + }) + test('strikethrough Markdown output is ignored', () => { expect( - customSerializer.serialize( + markdownSerializer.serialize( '

Strikethrough uses two tildes: scratch this

', ), ).toBe('Strikethrough uses two tildes: scratch this') }) }) - describe('with custom `taskList` extension', () => { - test('task lists HTML output is correct', () => { - const customSerializer = createMarkdownSerializer( + describe('with official `taskList`/`taskItem` extensions', () => { + test('task lists Markdown output is correct', () => { + const markdownSerializer = createMarkdownSerializer( getSchema([RichTextKit, TaskList, TaskItem]), ) - expect(customSerializer.serialize(HTML_INPUT_TASK_LISTS)).toBe( + expect(markdownSerializer.serialize(HTML_INPUT_TASK_LISTS)).toBe( `- [ ] First item - [x] Second item - [x] Third item @@ -740,12 +701,12 @@ See the section on [\`code\`](#code).`, describe('with custom `*Suggestion` extensions', () => { test('mention suggestion Markdown output is correct', () => { - const customSerializer = createMarkdownSerializer( + const markdownSerializer = createMarkdownSerializer( getSchema([RichTextKit, createSuggestionExtension('mention')]), ) expect( - customSerializer.serialize( + markdownSerializer.serialize( `

Question: Who's the head of the Frontend team?
Answer: @Henning M

`, ), ).toBe(`Question: Who's the head of the Frontend team? @@ -753,12 +714,12 @@ Answer: [Henning M](mention://963827)`) }) test('channel suggestions Markdown output is correct', () => { - const customSerializer = createMarkdownSerializer( + const markdownSerializer = createMarkdownSerializer( getSchema([RichTextKit, createSuggestionExtension('channel')]), ) expect( - customSerializer.serialize( + markdownSerializer.serialize( `

Question: What's the best channel on Twist?
Answer: #Doist Frontend

`, ), ).toBe(`Question: What's the best channel on Twist? diff --git a/src/serializers/markdown/markdown.ts b/src/serializers/markdown/markdown.ts index 90f03f79..94c06f59 100644 --- a/src/serializers/markdown/markdown.ts +++ b/src/serializers/markdown/markdown.ts @@ -89,9 +89,13 @@ const INITIAL_TURNDOWN_OPTIONS: Turndown.Options = { * a plain-text editor. The editor schema is used to detect which nodes and marks are available in * the editor, and only parses the input with the minimal required rules. * + * **Note:** Unlike the HTML serializer, built-in rules that are not supported by the schema are not + * disabled because if the schema does not support certain nodes/marks, the parsing rules don't have + * valid HTML elements to match in the editor HTML output. + * * @param schema The editor schema to be used for nodes and marks detection. * - * @returns A normalized `serialize` function. + * @returns A normalized object for the Markdown serializer. */ function createMarkdownSerializer( schema: Schema, @@ -100,32 +104,31 @@ function createMarkdownSerializer( // Initialize Turndown with custom options const turndown = new Turndown(INITIAL_TURNDOWN_OPTIONS) - // Turndown ensures Markdown characters are escaped (i.e. `\`) by default, so they are not - // interpreted as Markdown when the output is compiled back to HTML. For example, the contents - // of `

1. Hello world

` need to be escaped to `1\. Hello world`, otherwise it will be - // interpreted as a list item rather than a heading. However, if the schema represents a - // plain-text document, we need to override the escape function to return the input as-is, so - // that Markdown characters are interpreted as Markdown. - // ref: https://github.com/mixmark-io/turndown#escaping-markdown-characters + // Turndown was built to convert HTML into Markdown, expecting the input to be standards + // compliant HTML. As such, it collapses all whitespace by default, and there's + // currently no way to opt-out of this behavior. However, for plain-text editors, we + // need to preserve Markdown whitespace (otherwise we lose syntax like nested lists) by + // replacing all instances of the space character (but only if it's preceded by another + // space character) by the non-breaking space character, and after processing the input + // with Turndown, we restore the original space character. if (isPlainTextDocument(schema) || options?.escape === false) { turndown.escape = (str) => str } + // Overwrite some built-in rules for handling of special behaviours + // (see documentation for each extension for more details) + turndown.use(paragraph(schema.nodes.paragraph, isPlainTextDocument(schema))) + // Overwrite the built-in `image` rule if the corresponding node exists in the schema if (schema.nodes.image) { turndown.use(image(schema.nodes.image)) } // Overwrite the built-in `listItem` rule if the corresponding node exists in the schema - if (schema.nodes.listItem) { + if ((schema.nodes.bulletList || schema.nodes.orderedList) && schema.nodes.listItem) { turndown.use(listItem(schema.nodes.listItem)) } - // Overwrite the built-in `paragraph` rule if the corresponding node exists in the schema - if (schema.nodes.paragraph) { - turndown.use(paragraph(schema.nodes.paragraph, isPlainTextDocument(schema))) - } - // Add a rule for `strikethrough` if the corresponding node exists in the schema if (schema.marks.strike) { turndown.use(strikethrough(schema.marks.strike)) diff --git a/src/serializers/markdown/plugins/image.ts b/src/serializers/markdown/plugins/image.ts index 10eb0448..ca6ef492 100644 --- a/src/serializers/markdown/plugins/image.ts +++ b/src/serializers/markdown/plugins/image.ts @@ -21,7 +21,7 @@ function image(nodeType: NodeType): Turndown.Plugin { return (turndown: Turndown) => { turndown.addRule(nodeType.name, { filter: 'img', - replacement: function (_, node) { + replacement(_, node) { const src = String((node as Element).getAttribute('src')) // Preserve Data URL image prefix with message about base64 being unsupported diff --git a/src/serializers/markdown/plugins/list-item.ts b/src/serializers/markdown/plugins/list-item.ts index 668ef1ea..9367164c 100644 --- a/src/serializers/markdown/plugins/list-item.ts +++ b/src/serializers/markdown/plugins/list-item.ts @@ -18,13 +18,13 @@ function listItem(nodeType: NodeType): Turndown.Plugin { return (turndown: Turndown) => { turndown.addRule(nodeType.name, { - filter: (node) => { + filter(node) { return ( tags.some((tag) => tag.toUpperCase() === node.nodeName) && node.getAttribute('data-type') !== 'taskItem' ) }, - replacement: function (content, node) { + replacement(content, node) { const parentNode = node.parentNode as HTMLElement let listItemMarker = `${BULLET_LIST_MARKER} ` diff --git a/src/serializers/markdown/plugins/paragraph.ts b/src/serializers/markdown/plugins/paragraph.ts index ac55f56f..aae8d1fc 100644 --- a/src/serializers/markdown/plugins/paragraph.ts +++ b/src/serializers/markdown/plugins/paragraph.ts @@ -12,7 +12,7 @@ function paragraph(nodeType: NodeType, isPlainText: boolean): Turndown.Plugin { return (turndown: Turndown) => { turndown.addRule(nodeType.name, { filter: 'p', - replacement: function (content) { + replacement(content) { return isPlainText ? `\n${content}\n` : `\n\n${content}\n\n` }, }) diff --git a/src/serializers/markdown/plugins/strikethrough.ts b/src/serializers/markdown/plugins/strikethrough.ts index ae9764d7..08a425aa 100644 --- a/src/serializers/markdown/plugins/strikethrough.ts +++ b/src/serializers/markdown/plugins/strikethrough.ts @@ -16,10 +16,10 @@ function strikethrough(markType: MarkType): Turndown.Plugin { return (turndown: Turndown) => { turndown.addRule(markType.name, { - filter: (node) => { + filter(node) { return tags.some((tag) => tag.toUpperCase() === node.nodeName) }, - replacement: (content) => { + replacement(content) { return `~~${content}~~` }, }) diff --git a/src/serializers/markdown/plugins/suggestion.ts b/src/serializers/markdown/plugins/suggestion.ts index 6b6d10c3..e9d3d50b 100644 --- a/src/serializers/markdown/plugins/suggestion.ts +++ b/src/serializers/markdown/plugins/suggestion.ts @@ -14,8 +14,10 @@ function suggestion(nodeType: NodeType): Turndown.Plugin { return (turndown: Turndown) => { turndown.addRule(nodeType.name, { - filter: (node: Element) => node.hasAttribute(`data-${attributeType}`), - replacement: (_, node) => { + filter(node: Element) { + return node.hasAttribute(`data-${attributeType}`) + }, + replacement(_, node) { const label = String((node as Element).getAttribute('data-label')) const id = String((node as Element).getAttribute('data-id')) diff --git a/src/serializers/markdown/plugins/task-item.ts b/src/serializers/markdown/plugins/task-item.ts index bd579a30..a6225488 100644 --- a/src/serializers/markdown/plugins/task-item.ts +++ b/src/serializers/markdown/plugins/task-item.ts @@ -18,13 +18,13 @@ function taskItem(nodeType: NodeType): Turndown.Plugin { return (turndown: Turndown) => { turndown.addRule(nodeType.name, { - filter: (node) => { + filter(node) { return ( tags.some((tag) => tag.toUpperCase().startsWith(node.nodeName)) && node.getAttribute('data-type') === 'taskItem' ) }, - replacement: function (content, node) { + replacement(content, node) { const parentNode = node.parentNode as HTMLElement let listItemMarker = `${BULLET_LIST_MARKER} `