A `htmlparser2` handler for parsing rich metadata from HTML. Includes HTML metadata, JSON-LD, RDFa, microdata, OEmbed, Twitter cards and AppLinks.
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Failed to load latest commit information.
scripts
src
test
vendor
.editorconfig
.gitignore
.travis.yml
LICENSE
README.md
package-lock.json
package.json
tsconfig.json
tslint.json

README.md

htmlmetaparser

NPM version NPM downloads Build status Test coverage

A htmlparser2 handler for parsing rich metadata from HTML. Includes HTML metadata, JSON-LD, RDFa, microdata, OEmbed, Twitter cards and AppLinks.

Installation

npm install htmlmetaparser --save

Usage

import * as metaparser from 'htmlmetaparser'
import * as htmlparser from 'htmlparser2'

const url = 'https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254#.a0wjf4ltt'

const handler = new metaparser.Handler(
  function (err, result) {
    console.log(result)
  },
  {
    url // The HTML pages URL is used to resolve relative URLs.
  }
)

// Create a HTML parser with the handler.
const parser = new htmlparser.Parser(handler, { decodeEntities: true })
parser.write(html)
parser.done()

/*
Object {
  "alternate": Array [],
  "applinks": Object {
    "android:app_name": "Medium",
    "android:package": "com.medium.reader",
    "android:url": "medium://p/e64b4bb9254",
    "ios:app_name": "Medium",
    "ios:app_store_id": "828256236",
    "ios:url": "medium://p/e64b4bb9254",
    "web:url": "https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254",
  },
  "html": Object {
    "author": "Matt Haughey",
    "canonical": "https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254",
    "description": "Let’s start with the most obvious question first. This is what an “unfurl” is:",
    "robots": "index, follow",
    "title": "Everything you ever wanted to know about unfurling but were afraid to ask /or/ How to make your… – Slack Platform Blog – Medium",
    "viewport": "width=device-width, initial-scale=1",
  },
  "jsonld": Object {
    "@context": "http://schema.org",
    "@type": "NewsArticle",
    "author": Object {
      "@type": "Person",
      "name": "Matt Haughey",
      "url": "https://medium.com/@mathowie",
    },
    "creator": Array [
      "Matt Haughey",
    ],
    "dateModified": "2016-04-08T05:59:26.776Z",
    "datePublished": "2015-11-24T21:33:25.196Z",
    "headline": "Everything you ever wanted to know about unfurling but were afraid to ask /or/ How to make your…",
    "image": Object {
      "@type": "ImageObject",
      "height": 2000,
      "url": "https://cdn-images-1.medium.com/max/1360/1*QOMaDLcO8rExD0ctBV3BWg.png",
      "width": 1360,
    },
    "keywords": Array [
      "Web Development",
      "Slack",
      "How To",
    ],
    "mainEntityOfPage": "https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254",
    "name": "Everything you ever wanted to know about unfurling but were afraid to ask /or/ How to make your…",
    "publisher": Object {
      "@type": "Organization",
      "logo": Object {
        "@type": "ImageObject",
        "height": 60,
        "url": "https://cdn-images-1.medium.com/max/215/1*5ztbgEt4NqpVaxTc64C-XA.png",
        "width": 215,
      },
      "name": "Slack Platform Blog",
      "url": "https://medium.com/slack-developer-blog",
    },
  },
  "rdfa": Object {
    "@context": Object {
      "article": "http://ogp.me/ns/article#",
      "cc": "https://creativecommons.org/ns#",
      "fb": "http://ogp.me/ns/fb#",
      "medium-com": "http://ogp.me/ns/fb/medium-com#",
      "og": "http://ogp.me/ns#",
    },
    "@graph": Array [
      Object {
        "al:android:app_name": "Medium",
        "al:android:package": "com.medium.reader",
        "al:android:url": "medium://p/e64b4bb9254",
        "al:ios:app_name": "Medium",
        "al:ios:app_store_id": "828256236",
        "al:ios:url": "medium://p/e64b4bb9254",
        "al:web:url": "https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254",
        "article:author": "https://medium.com/@mathowie",
        "article:published_time": "2015-11-24T21:33:25.196Z",
        "article:publisher": "https://www.facebook.com/medium",
        "cc:attributionName": Object {
          "@language": "en",
          "@type": undefined,
          "@value": "https://medium.com/@mathowie",
        },
        "cc:attributionUrl": Object {
          "@id": "https://medium.com/@mathowie",
        },
        "fb:app_id": "542599432471018",
        "fb:smart_publish:robots": "noauto",
        "og:description": "Let’s start with the most obvious question first. This is what an “unfurl” is:",
        "og:image": "https://cdn-images-1.medium.com/max/1200/1*QOMaDLcO8rExD0ctBV3BWg.png",
        "og:site_name": "Medium",
        "og:title": "Everything you ever wanted to know about unfurling but were afraid to ask /or/ How to make your… – Slack Platform Blog",
        "og:type": "article",
        "og:url": "https://medium.com/slack-developer-blog/everything-you-ever-wanted-to-know-about-unfurling-but-were-afraid-to-ask-or-how-to-make-your-e64b4bb9254",
      },
    ],
  },
  "twitter": Object {
    "app:id:iphone": "828256236",
    "app:name:iphone": "Medium",
    "app:url:iphone": "medium://p/e64b4bb9254",
    "card": "summary_large_image",
    "creator": "@mathowie",
    "description": "Let’s start with the most obvious question first. This is what an “unfurl” is:",
    "image:src": "https://cdn-images-1.medium.com/max/1200/1*QOMaDLcO8rExD0ctBV3BWg.png",
    "site": "@Medium",
  },
}
*/

Please note: No HTTP requests are made by htmlmetaparser. You must decide what you want to retrieve by traversing the alternate array and making requests manually (E.g. external JSON-LD documents, RDF documents, OEmbed, RSS).

Development

# Build the fixtures directory with raw content.
node scripts/fixtures.js

# Load the latest `provider.json` document.
node scripts/providers.js

# Run the test suite.
npm test

# Run the test suite and update snapshots.
npm test -- -u

License

Apache 2.0