Skip to content

Commit

Permalink
feat(parsers): Add slim export, move to factory fns (#1960)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Aug 14, 2021
1 parent e4b9369 commit efcba05
Show file tree
Hide file tree
Showing 11 changed files with 327 additions and 230 deletions.
29 changes: 13 additions & 16 deletions Readme.md
Expand Up @@ -134,6 +134,11 @@ The options in the `xml` object are taken directly from [htmlparser2](https://gi
For a full list of options and their effects, see [domhandler](https://github.com/fb55/DomHandler) and
[htmlparser2's options](https://github.com/fb55/htmlparser2/wiki/Parser-options).

#### Using `htmlparser2`

Cheerio ships with two parsers, `parse5` and `htmlparser2`. The
former is the default for HTML, the latter the default for XML.

Some users may wish to parse markup with the `htmlparser2` library, and
traverse/manipulate the resulting structure with Cheerio. This may be the case
for those upgrading from pre-1.0 releases of Cheerio (which relied on
Expand All @@ -156,6 +161,13 @@ const dom = htmlparser2.parseDocument(document, options);
const $ = cheerio.load(dom);
```

If you want to save some bytes, you can use Cheerio's _slim_ export, which
always uses `htmlparser2`:

```js
const cheerio = require('cheerio/lib/slim');
```

### Selectors

Cheerio's selector implementation is nearly identical to jQuery's, so the API is very similar.
Expand Down Expand Up @@ -210,21 +222,6 @@ cheerio.html($('.pear'));
//=> <li class="pear">Pear</li>
```

By default, `html` will leave some tags open. Sometimes you may instead want to render a valid XML document. For example, you might parse the following XML snippet:

```js
const $ = cheerio.load(
'<media:thumbnail url="http://www.foo.com/keyframe.jpg" width="75" height="50" time="12:05:01.123"/>'
);
```

... and later want to render to XML. To do this, you can use the 'xml' utility function:

```js
$.xml();
//=> <media:thumbnail url="http://www.foo.com/keyframe.jpg" width="75" height="50" time="12:05:01.123"/>
```

You may also render the text content of a Cheerio object using the `text` static method:

```js
Expand All @@ -246,7 +243,7 @@ $.prototype.logHtml = function () {
$('body').logHtml(); // logs "Hello, <b>world</b>!" to the console
```

If you're using TypeScript, you should also add a type definition for your new method:
If you're using TypeScript, you should add a type definition for your new method:

```ts
declare module 'cheerio' {
Expand Down
24 changes: 12 additions & 12 deletions src/api/manipulation.ts
Expand Up @@ -5,8 +5,8 @@
*/

import { Node, NodeWithChildren, Element, Text, hasChildren } from 'domhandler';
import { default as parse, update as updateDOM } from '../parse';
import { html as staticHtml, text as staticText } from '../static';
import { update as updateDOM } from '../parse';
import { text as staticText } from '../static';
import { domEach, cloneDom, isTag, isHtml, isCheerio } from '../utils';
import { removeElement } from 'domutils';
import type { Cheerio } from '../cheerio';
Expand Down Expand Up @@ -39,7 +39,7 @@ export function _makeDomArray<T extends Node>(
);
}
if (typeof elem === 'string') {
return parse(elem, this.options, false).children;
return this._parse(elem, this.options, false).children;
}
return clone ? cloneDom([elem]) : [elem];
}
Expand All @@ -63,7 +63,7 @@ function _insert(
if (!hasChildren(el)) return;
const domSrc =
typeof elems[0] === 'function'
? elems[0].call(el, i, staticHtml(el.children))
? elems[0].call(el, i, this._render(el.children))
: (elems as Node[]);

const dom = this._makeDomArray(domSrc, i < lastIdx);
Expand Down Expand Up @@ -599,7 +599,7 @@ export function after<T extends Node>(

const domSrc =
typeof elems[0] === 'function'
? elems[0].call(el, i, staticHtml(el.children))
? elems[0].call(el, i, this._render(el.children))
: (elems as Node[]);

const dom = this._makeDomArray(domSrc, i < lastIdx);
Expand Down Expand Up @@ -713,7 +713,7 @@ export function before<T extends Node>(

const domSrc =
typeof elems[0] === 'function'
? elems[0].call(el, i, staticHtml(el.children))
? elems[0].call(el, i, this._render(el.children))
: (elems as Node[]);

const dom = this._makeDomArray(domSrc, i < lastIdx);
Expand Down Expand Up @@ -923,7 +923,7 @@ export function html<T extends Node>(
if (str === undefined) {
const el = this[0];
if (!el || !hasChildren(el)) return null;
return staticHtml(el.children, this.options);
return this._render(el.children);
}

// Keep main options unchanged
Expand All @@ -939,7 +939,7 @@ export function html<T extends Node>(

const content = isCheerio(str)
? str.toArray()
: parse(`${str}`, opts, false).children;
: this._parse(`${str}`, opts, false).children;

updateDOM(content, el);
});
Expand All @@ -952,7 +952,7 @@ export function html<T extends Node>(
* @returns The rendered document.
*/
export function toString<T extends Node>(this: Cheerio<T>): string {
return staticHtml(this, this.options);
return this._render(this);
}

/**
Expand Down Expand Up @@ -992,9 +992,9 @@ export function text<T extends Node>(
}
if (typeof str === 'function') {
// Function support
return domEach(this, (el, i) => {
text.call(this._make(el), str.call(el, i, staticText([el])));
});
return domEach(this, (el, i) =>
this._make(el).text(str.call(el, i, staticText([el])))
);
}

// Append text node to each selected elements
Expand Down
24 changes: 24 additions & 0 deletions src/cheerio.ts
Expand Up @@ -64,6 +64,30 @@ export abstract class Cheerio<T> implements ArrayLike<T> {
dom: ArrayLike<T> | T | string,
context?: BasicAcceptedElems<Node>
): Cheerio<T>;

/**
* Parses some content.
*
* @private
* @param content - Content to parse.
* @param options - Options for parsing.
* @param isDocument - Allows parser to be switched to fragment mode.
* @returns A document containing the `content`.
*/
abstract _parse(
content: string | Document | Node | Node[] | Buffer,
options: InternalOptions,
isDocument: boolean
): Document;

/**
* Render an element or a set of elements.
*
* @private
* @param dom - DOM to render.
* @returns The rendered DOM.
*/
abstract _render(dom: Node | ArrayLike<Node>): string;
}

export interface Cheerio<T>
Expand Down
35 changes: 33 additions & 2 deletions src/index.ts
Expand Up @@ -4,6 +4,7 @@
* @category Cheerio
*/
export type { Cheerio } from './cheerio';

/**
* Types used in signatures of Cheerio methods.
*
Expand All @@ -22,8 +23,38 @@ export type {
*/
export type { Node, NodeWithChildren, Element, Document } from 'domhandler';

export * from './load';
import { load } from './load';
export type { CheerioAPI } from './load';
import { getLoad } from './load';
import { getParse } from './parse';
import { renderWithParse5, parseWithParse5 } from './parsers/parse5-adapter';
import renderWithHtmlparser2 from 'dom-serializer';
import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';

const parse = getParse((content, options, isDocument) =>
options.xmlMode || options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument)
);

// Duplicate docs due to https://github.com/TypeStrong/typedoc/issues/1616
/**
* Create a querying function, bound to a document created from the provided markup.
*
* Note that similar to web browser contexts, this operation may introduce
* `<html>`, `<head>`, and `<body>` elements; set `isDocument` to `false` to
* switch to fragment mode and disable this.
*
* @param content - Markup to be loaded.
* @param options - Options for the created instance.
* @param isDocument - Allows parser to be switched to fragment mode.
* @returns The loaded document.
* @see {@link https://cheerio.js.org#loading} for additional usage information.
*/
export const load = getLoad(parse, (dom, options) =>
options.xmlMode || options._useHtmlParser2
? renderWithHtmlparser2(dom, options)
: renderWithParse5(dom)
);

/**
* The default cheerio instance.
Expand Down

0 comments on commit efcba05

Please sign in to comment.