Skip to content

Commit

Permalink
fix(parser): abort html parsing only after some metadata has been found
Browse files Browse the repository at this point in the history
fixes #67
  • Loading branch information
trieloff committed Jan 5, 2021
1 parent f89e93e commit 74e44ea
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/index.ts
Expand Up @@ -268,7 +268,9 @@ function getMetadata (ctx, opts: Opts) {
}

// We want to parse as little as possible so finish once we see </head>
if (tag === 'head') {
// if we have not seen a title tag within the head, we scan the entire
// document instead
if (tag === 'head' && this._title) {
parser.reset()
}
}
Expand Down
14 changes: 14 additions & 0 deletions test/basic/basic-body.html
@@ -0,0 +1,14 @@

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
</head>
<body>
<title>ccc</title>
<meta name="description" content="aaa" />
<meta name="keywords" content="a, b, c" />
</body>
</html>


19 changes: 19 additions & 0 deletions test/basic/test.ts
Expand Up @@ -32,6 +32,25 @@ test('should detect title, description and keywords', async () => {
expect(result).toEqual(expected)
})

test('should detect title, description and keywords even when they are in the body', async () => {
nock('http://localhost')
.get('/html/basic-body')
.replyWithFile(200, __dirname + '/basic-body.html', {
'Content-Type': 'text/html'
})

const result = await unfurl('http://localhost/html/basic-body')

const expected = {
favicon: 'http://localhost/favicon.ico',
description: 'aaa',
keywords: ['a', 'b', 'c'],
title: 'ccc'
}

expect(result).toEqual(expected)
})

test('should detect title, description and keywords', async () => {
nock('http://localhost')
.get('/html/basic')
Expand Down

0 comments on commit 74e44ea

Please sign in to comment.