diff --git a/README.md b/README.md index e670cda..73fe625 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,10 @@ You can also check out this nice [working implementation](https://github.com/scr If you only need the metadata available when each article streams, you can use `item.meta` as usual. +- `guidlink` - Set to `false` to override Feedparser's default behavior, which + is to use an RSS item's `guid` as the item `link` when the item has no `link` + and the `guid` starts with `http:` or `https:`. + - `feedurl` - The url (string) of the feed. FeedParser is very good at resolving relative urls in feeds, including those embedded in HTML content fields. But some feeds use relative urls without declaring the `xml:base` diff --git a/index.d.ts b/index.d.ts index abad0ae..c376e71 100644 --- a/index.d.ts +++ b/index.d.ts @@ -36,6 +36,7 @@ declare namespace FeedParser { strict?: boolean; normalize?: boolean; addmeta?: boolean; + guidlink?: boolean; feedurl?: string; resume_saxerror?: boolean; MAX_BUFFER_LENGTH?: number; diff --git a/lib/feedparser.js b/lib/feedparser.js index edffce2..d2ddd76 100644 --- a/lib/feedparser.js +++ b/lib/feedparser.js @@ -77,6 +77,7 @@ function FeedParser (options) { if (!('strict' in this.options)) this.options.strict = false; if (!('normalize' in this.options)) this.options.normalize = true; if (!('addmeta' in this.options)) this.options.addmeta = true; + if (!('guidlink' in this.options)) this.options.guidlink = true; if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true; // MAX_BUFFER_LENGTH is not part of the public API of sax, but we need to be // able to handle nodes that are larger than the 64K default @@ -1168,7 +1169,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options) { if (item.categories.length) { item.categories = _.uniq(item.categories); } - if (!item.link) { + if (!item.link && (!options || options.guidlink)) { if (item.guid && /^https?:/.test(item.guid)) { item.link = item.guid; } diff --git a/test/link.js b/test/link.js index 8996b3d..24cad04 100644 --- a/test/link.js +++ b/test/link.js @@ -47,4 +47,64 @@ describe('links', function () { }); }); + it('should infer item link from http guid by default (issue #293)', function (done) { + var feedparser = new FeedParser(); + var feed = '' + + '' + + '' + + 'Linkless feed' + + 'http://example.com/' + + 'Feed with linkless items' + + '' + + 'One' + + 'http://example.com/posts/one' + + '' + + '' + + ''; + + feedparser + .once('readable', function () { + var item = this.read(); + assert.equal(item.guid, 'http://example.com/posts/one'); + assert.equal(item.link, 'http://example.com/posts/one'); + done(); + }) + .on('error', function (err) { + assert.ifError(err); + done(err); + }); + + feedparser.end(feed); + }); + + it('should not infer item link from guid when guidlink is false (issue #293)', function (done) { + var feedparser = new FeedParser({ guidlink: false }); + var feed = '' + + '' + + '' + + 'Linkless feed' + + 'http://example.com/' + + 'Feed with linkless items' + + '' + + 'One' + + 'http://example.com/posts/one' + + '' + + '' + + ''; + + feedparser + .once('readable', function () { + var item = this.read(); + assert.equal(item.guid, 'http://example.com/posts/one'); + assert.equal(item.link, null); + done(); + }) + .on('error', function (err) { + assert.ifError(err); + done(err); + }); + + feedparser.end(feed); + }); + }); diff --git a/test/types.ts b/test/types.ts index 883ba4d..e7aaf13 100644 --- a/test/types.ts +++ b/test/types.ts @@ -8,6 +8,7 @@ const fp2 = new FeedParser({ strict: false, normalize: true, addmeta: true, + guidlink: true, feedurl: 'https://example.com/feed', resume_saxerror: true, MAX_BUFFER_LENGTH: 1024 * 1024,