fix(jsdoc): do not parse for tags within HTML blocks

In markdown you can provide inline HTML blocks which are not parsed for further markdown syntax. In the same way that the parseTagsProcessor ignored potential tags inside backtick code blocks, it now also ignores potential tags inside inline HTML blocks. These blocks are identified with the same semantic as inline HTML in markdown. This fix is implemented by making the parser more generic and the modifying its behaviour by specifying "parser adapters". Currently parse adapters must expose the following interface: ``` interface ParseAdapter { init(lines, tags); nextLine(line, lineNumber) parseForTags() } ``` BREAKING CHANGE: Tags inside HTML blocks are no longer parsed by default. If you wish this to enable this then you can modify the `parseTagsProcessor.parserAdapters` array from a config block: ``` somePackage.config(function(parseTagsProcessor, backtickParserAdapter) { parseTagsProcessor.parserAdapters = [backtickParserAdapter]; }); ```
angular · Mar 17, 2017 · 451d84a · 451d84a
1 parent 5297e93
commit 451d84a
Show file tree

Hide file tree

Showing 7 changed files with 305 additions and 44 deletions.
diff --git a/jsdoc/index.js b/jsdoc/index.js
@@ -28,6 +28,8 @@ module.exports = new Package('jsdoc', [require('../base')])
 .factory(require('./services/transforms/unknown-tag'))
 .factory(require('./services/transforms/whole-tag'))
 .factory(require('./services/transforms/trim-whitespace'))
+.factory(require('./services/parser-adapters/backtick-parser-adapter'))
+.factory(require('./services/parser-adapters/html-block-parser-adapter'))
 
 .factory(require('./services/jsParser'))
 .factory(require('./file-readers/jsdoc'))

diff --git a/jsdoc/processors/parse-tags.js b/jsdoc/processors/parse-tags.js
@@ -7,17 +7,18 @@ var StringMap = require('stringmap');
  * @dgProcessor parseTagsProcessor
  * @description Parse the doc for jsdoc style tags
  */
-module.exports = function parseTagsProcessor(log, createDocMessage) {
+module.exports = function parseTagsProcessor(log, createDocMessage, backTickParserAdapter, htmlBlockParserAdapter) {
   return {
     tagDefinitions: [],
+    parserAdapters: [backTickParserAdapter, htmlBlockParserAdapter],
     $validate: {
-      tagDefinitions: { presence: true }
+      tagDefinitions: { presence: true },
     },
     $runAfter: ['parsing-tags'],
     $runBefore: ['tags-parsed'],
     $process: function(docs) {
 
-      var tagParser = createTagParser(this.tagDefinitions);
+      var tagParser = createTagParser(this.tagDefinitions, this.parserAdapters);
 
       docs.forEach(function(doc) {
         try {
@@ -54,12 +55,12 @@ function createTagDefMap(tagDefinitions) {
 /**
  * Create a new tagParser that can parse a set of jsdoc-style tags from a document
  * @param  {Array} tagDefMap A map of tag definitions keyed on tagName/aliasName.
+ * @param {ParserAdapter[]} A collection of adapters that modify the parsing behaviour
  */
-function createTagParser(tagDefinitions) {
+function createTagParser(tagDefinitions, parserAdapters) {
 
   var END_OF_LINE = /\r?\n/;
   var TAG_MARKER = /^\s*@(\S+)\s*(.*)$/;
-  var CODE_FENCE = /^\s*```(?!.*```)/;
   var tagDefMap = createTagDefMap(tagDefinitions);
 
   /**
@@ -74,25 +75,24 @@ function createTagParser(tagDefinitions) {
     var line, match, tagDef;
     var descriptionLines = [];
     var current;          // The current that that is being extracted
-    var inCode = false;   // Are we inside a fenced, back-ticked, code block
     var tags = new TagCollection();        // Contains all the tags that have been found
 
+    init(lines, tags);
 
     // Extract the description block
     do {
       line = lines[lineNumber];
 
-      if ( CODE_FENCE.test(line) ) {
-        inCode = !inCode;
-      }
+      nextLine(line, lineNumber);
 
-      // We ignore tags if we are in a code block
-      match = TAG_MARKER.exec(line);
-      tagDef = match && tagDefMap.get(match[1]);
-      if ( !inCode && match && ( !tagDef || !tagDef.ignore ) ) {
-        // Only store tags that are unknown or not ignored
-        current = new Tag(tagDef, match[1], match[2], startingLine + lineNumber);
-        break;
+      if (parseForTags()) {
+        match = TAG_MARKER.exec(line);
+        tagDef = match && tagDefMap.get(match[1]);
+        if ( match && ( !tagDef || !tagDef.ignore ) ) {
+          // Only store tags that are unknown or not ignored
+          current = new Tag(tagDef, match[1], match[2], startingLine + lineNumber);
+          break;
+        }
       }
 
       lineNumber += 1;
@@ -107,14 +107,11 @@ function createTagParser(tagDefinitions) {
     while(lineNumber < lines.length) {
       line = lines[lineNumber];
 
-      if ( CODE_FENCE.test(line) ) {
-        inCode = !inCode;
-      }
+      nextLine(line, lineNumber);
 
-      // We ignore tags if we are in a code block
       match = TAG_MARKER.exec(line);
       tagDef = match && tagDefMap.get(match[1]);
-      if ( !inCode && match && (!tagDef || !tagDef.ignore) ) {
+      if (parseForTags() && match && (!tagDef || !tagDef.ignore) ) {
         tags.addTag(current);
         current = new Tag(tagDef, match[1], match[2], startingLine + lineNumber);
       } else {
@@ -129,4 +126,17 @@ function createTagParser(tagDefinitions) {
 
     return tags;
   };
-}
+
+
+  function init(lines, tags) {
+    parserAdapters.forEach(function(adapter) { adapter.init(lines, tags); });
+  }
+
+  function nextLine(line, lineNumber) {
+    parserAdapters.forEach(function(adapter) { adapter.nextLine(line, lineNumber); });
+  }
+
+  function parseForTags() {
+    return parserAdapters.every(function(adapter) { return adapter.parseForTags(); });
+  }
+}
diff --git a/jsdoc/processors/parse-tags.spec.js b/jsdoc/processors/parse-tags.spec.js
@@ -1,6 +1,19 @@
 var mockPackage = require('../mocks/mockPackage');
 var Dgeni = require('dgeni');
 
+function MockParserAdapter() {
+}
+MockParserAdapter.prototype = {
+  init: function() {},
+  nextLine: function(line) {
+    if (/<<IGNORE_START>>/.test(line)) { this.ignore = true; }
+    if (/<<IGNORE_END>>/.test(line)) { this.ignore = false; }
+  },
+  parseForTags: function() {
+    return !this.ignore;
+  }
+};
+
 describe("parse-tags processor", function() {
   var processor;
   var tagDefinitions = [
@@ -48,25 +61,58 @@ describe("parse-tags processor", function() {
     );
   });
 
-    it("should cope with tags that have no 'description'", function() {
-      var content = '@id\n@description some description';
-      var doc = { content: content, startingLine: 123 };
-      processor.$process([doc]);
-      expect(doc.tags.tags[0]).toEqual(jasmine.objectContaining({ tagName: 'id', description: '' }));
-      expect(doc.tags.tags[1]).toEqual(jasmine.objectContaining({ tagName: 'description', description: 'some description' }));
-    });
+  it("should cope with tags that have no 'description'", function() {
+    var content = '@id\n@description some description';
+    var doc = { content: content, startingLine: 123 };
+    processor.$process([doc]);
+    expect(doc.tags.tags[0]).toEqual(jasmine.objectContaining({ tagName: 'id', description: '' }));
+    expect(doc.tags.tags[1]).toEqual(jasmine.objectContaining({ tagName: 'description', description: 'some description' }));
+  });
 
-    it("should cope with empty content or no known tags", function() {
-      expect(function() {
-        processor.$process([{ content: '', startingLine: 123 }]);
-      }).not.toThrow();
+  it("should cope with empty content or no known tags", function() {
+    expect(function() {
+      processor.$process([{ content: '', startingLine: 123 }]);
+    }).not.toThrow();
 
-      expect(function() {
-        processor.$process([{ content: '@unknownTag some text', startingLine: 123 }]);
-      }).not.toThrow();
-    });
+    expect(function() {
+      processor.$process([{ content: '@unknownTag some text', startingLine: 123 }]);
+    }).not.toThrow();
+  });
 
 
+  it('should ignore tags if a parser adapter has indicated that the line should not be parsed', function() {
+    processor.tagDefinitions = [{ name: 'a' }, { name: 'b' }];
+    processor.parserAdapters = [new MockParserAdapter()];
+    var content =
+    '@a some text\n\n' +
+      '<<IGNORE_START>>\n' +
+      '  some code\n' +
+      '  @b not a tag\n' +
+      '<<IGNORE_END>>\n\n' +
+      'more text\n' +
+      '@b is a tag';
+    var doc = { content: content };
+    processor.$process([doc]);
+    expect(doc.tags.getTag('a').description).toEqual('some text\n\n' +
+      '<<IGNORE_START>>\n' +
+      '  some code\n' +
+      '  @b not a tag\n' +
+      '<<IGNORE_END>>\n\n' +
+      'more text'
+    );
+    expect(doc.tags.getTags('b').length).toEqual(1);
+    expect(doc.tags.getTag('b').description).toEqual('is a tag');
+  });
+
+
+  it("should ignore doc if it has no content", function() {
+    expect(function() {
+      processor.$process([{}]);
+    }).not.toThrow();
+  });
+
+
+  describe('legacy standard adapter', function() {
     it("should ignore @tags inside back-ticked code blocks", function() {
       processor.tagDefinitions = [{ name: 'a' }, { name: 'b' }];
       var content =
@@ -116,11 +162,5 @@ describe("parse-tags processor", function() {
 
       expect(doc.tags.getTags('b').length).toEqual(0);
     });
-
-
-    it("should ignore doc if it has no content", function() {
-      expect(function() {
-        processor.$process([{}]);
-      }).not.toThrow();
-    });
+  });
 });
diff --git a/jsdoc/services/parser-adapters/backtick-parser-adapter.js b/jsdoc/services/parser-adapters/backtick-parser-adapter.js
@@ -0,0 +1,17 @@
+/**
+ * A ParserAdapter adapter that ignores tags between triple backtick blocks
+ */
+module.exports = function backTickParserAdapter() {
+  return {
+    init: function() {},
+    nextLine: function(line, lineNumber) {
+      const CODE_FENCE = /^\s*```(?!.*```)/;
+      if ( CODE_FENCE.test(line) ) {
+        this.inCode = !this.inCode;
+      }
+    },
+    parseForTags: function() {
+      return !this.inCode;
+    }
+  };
+};
diff --git a/jsdoc/services/parser-adapters/backtick-parser-adapter.spec.js b/jsdoc/services/parser-adapters/backtick-parser-adapter.spec.js
@@ -0,0 +1,79 @@
+const backTickParserAdapterFactory = require('./backtick-parser-adapter');
+const TagCollection = require('../../lib/TagCollection');
+
+describe('backTickParserAdapter', function() {
+  it("should ignore @tags inside back-ticked code blocks", function() {
+    const adapter = backTickParserAdapterFactory();
+    const lines = [
+      '@a some text',
+      '',
+      '',
+      '```',
+      '  some code',
+      '  @b not a tag',
+      '```',
+      '',
+      'more text',
+      '@b is a tag'
+    ];
+    adapter.init && adapter.init(lines, new TagCollection());
+
+    adapter.nextLine(lines[0], 0);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[1], 1);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[2], 2);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[3], 3);
+    expect(adapter.parseForTags()).toBeFalsy();
+    adapter.nextLine(lines[4], 4);
+    expect(adapter.parseForTags()).toBeFalsy();
+    adapter.nextLine(lines[5], 5);
+    expect(adapter.parseForTags()).toBeFalsy();
+    adapter.nextLine(lines[6], 6);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[7], 7);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[8], 8);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[9], 9);
+    expect(adapter.parseForTags()).toBeTruthy();
+  });
+
+
+  it("should cope with single line back-ticked code blocks", function() {
+    const adapter = backTickParserAdapterFactory();
+    const lines = [
+      '@a some text',
+      '',
+      '```some single line of code @b not a tag```',
+      '',
+      'some text outside a code block',
+      '```',
+      '  some code',
+      '  @b not a tag',
+      '```'
+    ];
+
+    adapter.init(lines, new TagCollection());
+
+    adapter.nextLine(lines[0], 0);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[1], 1);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[2], 2);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[3], 3);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[4], 4);
+    expect(adapter.parseForTags()).toBeTruthy();
+    adapter.nextLine(lines[5], 5);
+    expect(adapter.parseForTags()).toBeFalsy();
+    adapter.nextLine(lines[6], 6);
+    expect(adapter.parseForTags()).toBeFalsy();
+    adapter.nextLine(lines[7], 7);
+    expect(adapter.parseForTags()).toBeFalsy();
+    adapter.nextLine(lines[8], 8);
+    expect(adapter.parseForTags()).toBeTruthy();
+  });
+});
diff --git a/jsdoc/services/parser-adapters/html-block-parser-adapter.js b/jsdoc/services/parser-adapters/html-block-parser-adapter.js
@@ -0,0 +1,42 @@
+const TAG_REGEXP = /^<([a-zA-Z]+)\b[\s\S]*?>/;
+/**
+ * A ParserAdapter adapter that ignores tags between HTML blocks that would be ignored by markdown
+ * See https://daringfireball.net/projects/markdown/syntax#html
+ */
+module.exports = function htmlBlockParserAdapter() {
+  return {
+    init: function(lines) {
+      this.lines = lines;
+      this.tagDepth = 0;
+      this.currentTag = null;
+    },
+    nextLine: function(line, lineNumber) {
+      if (this.tagDepth === 0 && this.lines[lineNumber - 1] === '') {
+        const m = TAG_REGEXP.exec(line);
+        if (m) {
+          this.currentTag = m[1];
+        }
+      }
+      if (this.currentTag) {
+        this.tagDepth = this.tagDepth + countTags(line, '<' + this.currentTag) - countTags(line, '</' + this.currentTag);
+      }
+      if (this.tagDepth === 0) {
+        this.currentTag = null;
+      }
+    },
+    parseForTags: function() {
+      return !this.currentTag;
+    }
+  };
+};
+
+
+function countTags(line, marker) {
+  const regexp = new RegExp(marker + '\\b[\\s\\S]*?(/)?>', 'g');
+  let count = 0;
+  let match;
+  while(match = regexp.exec(line)) {
+    count += 1;
+  }
+  return count;
+}