From c6fcd24866851ee2962ffacec52fe8897a7f391b Mon Sep 17 00:00:00 2001 From: Joe Hewitt Date: Sat, 9 Jul 2011 00:40:34 -0700 Subject: [PATCH] * First commit --- .gitignore | 4 + LICENSE | 13 + Makefile | 6 + README.md | 26 + lib/NodeHandler.js | 124 +++ lib/NodeTransformer.js | 133 +++ lib/markdom.js | 34 + lib/nodes.js | 520 ++++++++++ package.json | 21 + src/array.c | 300 ++++++ src/array.h | 148 +++ src/bridge.cc | 344 +++++++ src/bridge.h | 27 + src/buffer.c | 291 ++++++ src/buffer.h | 167 ++++ src/dom.c | 574 +++++++++++ src/dom.h | 24 + src/markdom.cc | 122 +++ src/markdown.c | 2068 ++++++++++++++++++++++++++++++++++++++++ src/markdown.h | 112 +++ src/xhtml.c | 797 ++++++++++++++++ src/xhtml.h | 43 + test/markdom-test.js | 88 ++ wscript | 29 + 24 files changed, 6015 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 lib/NodeHandler.js create mode 100644 lib/NodeTransformer.js create mode 100644 lib/markdom.js create mode 100644 lib/nodes.js create mode 100644 package.json create mode 100755 src/array.c create mode 100755 src/array.h create mode 100644 src/bridge.cc create mode 100644 src/bridge.h create mode 100755 src/buffer.c create mode 100755 src/buffer.h create mode 100755 src/dom.c create mode 100644 src/dom.h create mode 100644 src/markdom.cc create mode 100755 src/markdown.c create mode 100755 src/markdown.h create mode 100755 src/xhtml.c create mode 100644 src/xhtml.h create mode 100644 test/markdom-test.js create mode 100644 wscript diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..acf196a --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +node_modules +build* +*.o +.dropbox diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..aa96201 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright 2011 Joe Hewitt + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ccae344 --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ +default: test + +test: + vows test/*-test.js + +.PHONY: test diff --git a/README.md b/README.md new file mode 100644 index 0000000..03ed94a --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +markdom +======== + +A Markdown parser than can create and transform a DOM before generating HTML. + +Installation +------------ + + $ npm install markdom + +License +------- + +Copyright 2011 Joe Hewitt + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/lib/NodeHandler.js b/lib/NodeHandler.js new file mode 100644 index 0000000..e8e57d9 --- /dev/null +++ b/lib/NodeHandler.js @@ -0,0 +1,124 @@ + +var NodeTypes = require('./nodes'); + +// ************************************************************************************************* + +function NodeHandler(context) { + this.nodes = {}; + this.nodeCount = 0; + this.context = context; + this.embeds = []; +} +exports.NodeHandler = NodeHandler; + +NodeHandler.prototype = { + header: function(level, content) { + return this.createNode(new NodeTypes.Header(level, this.getNodes(content))); + }, + + paragraph: function(content) { + return this.createNode(new NodeTypes.Paragraph(this.getNodes(content))); + }, + + blockquote: function(content) { + return this.createNode(new NodeTypes.Blockquote(this.getNodes(content))); + }, + + blockCode: function(lang, text) { + return this.createNode(new NodeTypes.BlockCode(lang, text)); + }, + + blockHTML: function(text) { + return this.createNode(new NodeTypes.BlockHTML(text)); + }, + + list: function(level, content) { + return this.createNode(new NodeTypes.List(level, this.getNodes(content))); + }, + + listItem: function(content) { + return this.createNode(new NodeTypes.ListItem(this.getNodes(content))); + }, + + table: function(header, body) { + return this.createNode(new NodeTypes.Table(this.getNodes(header), this.getNodes(body))); + }, + + tableRow: function(cells) { + return this.createNode(new NodeTypes.TableRow(this.getNodes(cells))); + }, + + tableCell: function(content, align) { + return this.createNode(new NodeTypes.TableCell(this.getNodes(content), align)); + }, + + hrule: function() { + return this.createNode(new NodeTypes.HRule()); + }, + + lineBreak: function() { + return this.createNode(new NodeTypes.LineBreak()); + }, + + emphasis: function(depth, content) { + return this.createNode(new NodeTypes.Emphasis(depth, this.getNodes(content))); + }, + + strikethrough: function(content) { + return this.createNode(new NodeTypes.Strikethrough(this.getNodes(content))); + }, + + codeSpan: function(text) { + return this.createNode(new NodeTypes.CodeSpan(text)); + }, + + link: function(url, title, content) { + return this.createNode(new NodeTypes.Link(url, title, this.getNodes(content))); + }, + + autolink: function(url, type) { + return this.createNode(new NodeTypes.Link(url, '', + new NodeTypes.NodeSet([new NodeTypes.Text(url)]))); + }, + + image: function(url, title, alt) { + var id = this.createNode(new NodeTypes.Image(url, title, alt)); + if (id > 0) { + // Don't blame me for this hack, I am just replicating what Upskirt already does + // in markdown.c. Yes, it actually goes back in the buffer and erases the ! before + // the image syntax. + var prev = this.nodes[id-1]; + if (prev instanceof NodeTypes.Text) { + prev.text = prev.text.slice(0, prev.text.length-1); + } + } + return id; + }, + + text: function(text) { + return this.createNode(new NodeTypes.Text(text)); + }, + + // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + + createNode: function(node) { + var nodeId = this.nodeCount++; + this.nodes[nodeId] = node; + return nodeId; + }, + + getNodes: function(ids) { + var nodes = []; + if (ids) { + var idns = ids.split(','); + for (var i = 0; i < idns.length; ++i) { + var id = idns[i]; + var node = this.nodes[id]; + if (node) { + nodes.push(node); + } + } + } + return new NodeTypes.NodeSet(nodes); + } +}; diff --git a/lib/NodeTransformer.js b/lib/NodeTransformer.js new file mode 100644 index 0000000..507cae2 --- /dev/null +++ b/lib/NodeTransformer.js @@ -0,0 +1,133 @@ + +var NodeTypes = require('./nodes'); + +// ************************************************************************************************* + +function NodeTransformer() { + +} + +NodeTransformer.prototype = { + visit: function(node) { + node.visit(this); + if (node instanceof NodeTypes.NodeSet) { + return this.nodeSet(node); + } else if (node instanceof NodeTypes.Header) { + return this.header(node); + } else if (node instanceof NodeTypes.Paragraph) { + return this.paragraph(node); + } else if (node instanceof NodeTypes.List) { + return this.list(node); + } else if (node instanceof NodeTypes.ListItem) { + return this.listItem(node); + } else if (node instanceof NodeTypes.Table) { + return this.table(node); + } else if (node instanceof NodeTypes.TableRow) { + return this.tableRow(node); + } else if (node instanceof NodeTypes.TableCell) { + return this.tableCell(node); + } else if (node instanceof NodeTypes.Blockquote) { + return this.blockquote(node); + } else if (node instanceof NodeTypes.BlockCode) { + return this.blockcode(node); + } else if (node instanceof NodeTypes.BlockHTML) { + return this.blockHTML(node); + } else if (node instanceof NodeTypes.HRule) { + return this.hrule(node); + } else if (node instanceof NodeTypes.LineBreak) { + return this.lineBreak(node); + } else if (node instanceof NodeTypes.Emphasis) { + return this.emphasis(node); + } else if (node instanceof NodeTypes.Strikethrough) { + return this.strikethrough(node); + } else if (node instanceof NodeTypes.CodeSpan) { + return this.codeSpan(node); + } else if (node instanceof NodeTypes.Link) { + return this.link(node); + } else if (node instanceof NodeTypes.Image) { + return this.image(node); + } else if (node instanceof NodeTypes.Text) { + return this.text(node); + } else { + return node; + } + }, + + nodeSet: function(node) { + return node; + }, + + header: function(node) { + return node; + }, + + paragraph: function(node) { + return node; + }, + + list: function(node) { + return node; + }, + + listItem: function(node) { + return node; + }, + + table: function(node) { + return node; + }, + + tableRow: function(node) { + return node; + }, + + tableCell: function(node) { + return node; + }, + + blockquote: function(node) { + return node; + }, + + blockCode: function(node) { + return node; + }, + + blockHTML: function(node) { + return node; + }, + + hrule: function(node) { + return node; + }, + + lineBreak: function(node) { + return node; + }, + + emphasis: function(node) { + return node; + }, + + strikethrough: function(node) { + return node; + }, + + codeSpan: function(node) { + return node; + }, + + link: function(node) { + return node; + }, + + image: function(node) { + return node; + }, + + text: function(node) { + return node; + }, +}; + +exports.NodeTransformer = NodeTransformer; diff --git a/lib/markdom.js b/lib/markdom.js new file mode 100644 index 0000000..8dc453b --- /dev/null +++ b/lib/markdom.js @@ -0,0 +1,34 @@ + +var _markdom = require('../build/default/_markdom'); +var NodeTypes = require('./nodes'); +var NodeHandler = require('./NodeHandler').NodeHandler; +var NodeTransformer = require('./NodeTransformer').NodeTransformer; + +// ************************************************************************************************* + +exports.toDOM = function(source, options, transformer) { + var handler = new NodeHandler(); + + var ids = _markdom.toDOM(source, handler, options || {}); + var rootNodes = handler.getNodes(ids); + if (transformer) { + transformer.visit(rootNodes); + } + return rootNodes; +} + +exports.toHTML = function(source, options, app) { + if (source instanceof NodeTypes.Node) { + return source.toHTML(); + } else { + if (app) { + var nodes = exports.toDOM(source, options, app); + return nodes.toHTML(); + } else { + return _markdom.toHTML(source, options || {}); + } + } +} + +exports.nodeTypes = NodeTypes; +exports.NodeTransformer = NodeTransformer; diff --git a/lib/nodes.js b/lib/nodes.js new file mode 100644 index 0000000..f0cbfa6 --- /dev/null +++ b/lib/nodes.js @@ -0,0 +1,520 @@ + +var _ = require('underscore'); + +// ************************************************************************************************* + +function openTag(tagName, className) { + return '<' + tagName + (className ? ' class="' + className + '"' : ''); +} + +// ************************************************************************************************* + +function Node() { + +} + +Node.prototype = { + visit: function(visitor) { + }, +}; + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function NodeSet(nodes) { + this.nodes = nodes || []; +} + +NodeSet.prototype = subclass(Node, { + visit: function(visitor) { + this.nodes = _.map(this.nodes, function(node) { return visitor.visit(node); }); + }, + + toHTML: function() { + return _.map(this.nodes, function(node) { return node.toHTML(); }).join(''); + }, + + toMarkdown: function(indent) { + return _.map(this.nodes, function(node) { return node.toMarkdown(indent); }).join(''); + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Header(level, content) { + this.level = level; + this.content = content; +} + +Header.prototype = subclass(Node, { + visit: function(visitor) { + this.content = visitor.visit(this.content); + }, + + toHTML: function() { + var tag = 'h' + this.level; + return '<' + tag + '>' + this.content.toHTML() + ''; + }, + + toMarkdown: function() { + return '#' + this.level + ' ' + this.content.toMarkdown() + '\n\n'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function HRule() { +} + +HRule.prototype = subclass(Node, { + toHTML: function() { + return '
'; + }, + + toMarkdown: function() { + return '------\n'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function LineBreak() { +} + +LineBreak.prototype = subclass(Node, { + toHTML: function() { + return '
'; + }, + + toMarkdown: function() { + return '\n'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Paragraph(content) { + this.content = content; +} + +Paragraph.prototype = subclass(Node, { + visit: function(visitor) { + this.content = visitor.visit(this.content); + }, + + toHTML: function() { + return '

' + this.content.toHTML() + '

'; + }, + + toMarkdown: function() { + return this.content.toMarkdown() + '\n\n'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Blockquote(content) { + this.content = content; +} + +Blockquote.prototype = subclass(Node, { + visit: function(visitor) { + this.content = visitor.visit(this.content); + }, + + toHTML: function() { + return '
' + this.content.toHTML() + '
'; + }, + + toMarkdown: function() { + return '> ' + this.content.toMarkdown() + '\n'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function BlockCode(lang, text) { + this.lang = lang; + this.text = text; +} + +BlockCode.prototype = subclass(Node, { + toHTML: function() { + return '
' + this.text + '
'; + }, + + toMarkdown: function(indent) { + return _.map(this.text.split('\n'), function(line) { + return ' ' + line; + }).join('\n'); + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function BlockHTML(text) { + this.text = text; +} + +BlockHTML.prototype = subclass(Node, { + toHTML: function() { + return this.text; + }, + + toMarkdown: function(indent) { + return this.text; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function List(ordered, items) { + this.ordered = ordered; + this.items = items; +} + +List.prototype = subclass(Node, { + visit: function(visitor) { + this.items = visitor.visit(this.items); + }, + + toHTML: function() { + var tag = this.ordered ? 'ol' : 'ul'; + return '<' + tag + '>' + this.items.toHTML() + ''; + }, + + toMarkdown: function(indent) { + if (!indent) indent = ''; + var items = []; + _.each(this.items.nodes, function(node) { + items.push(node.toMarkdown(indent)); + }); + return items.join('\n') + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function ListItem(content) { + this.content = content; +} + +ListItem.prototype = subclass(Node, { + visit: function(visitor) { + this.content = visitor.visit(this.content); + }, + + toHTML: function() { + return '
  • ' + this.content.toHTML() + '
  • '; + }, + + toMarkdown: function(indent) { + if (!indent) indent = ''; + return indent + '* ' + this.content.toMarkdown(indent+' ') + '\n'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Table(header, body) { + this.header = header; + this.body = body; +} + +Table.prototype = subclass(Node, { + visit: function(visitor) { + this.header = visitor.visit(this.header); + this.body = visitor.visit(this.body); + }, + + toHTML: function() { + return '' + + '' + this.body.toHTML() + '
    ' + this.header.toHTML() + '
    '; + }, + + toMarkdown: function(indent) { + var header = this.header.nodes.length + ? _.map(this.header.nodes, function(node) { return node.toMarkdown(indent); }).join('\n') + : ''; + var body = this.body.nodes.length + ? _.map(this.body.nodes, function(node) { return node.toMarkdown(indent); }).join('\n') + : ''; + return header && body ? header + '\n-------\n' + body : (header || body); + + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function TableRow(cells) { + this.cells = cells; +} + +TableRow.prototype = subclass(Node, { + visit: function(visitor) { + this.cells = visitor.visit(this.cells); + }, + + toHTML: function() { + return '' + this.cells.toHTML() + ''; + }, + + toMarkdown: function(indent) { + return _.map(this.cells.nodes, function(node) { + return node.toMarkdown(indent); + }).join(' | '); + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function TableCell(content, align) { + this.content = content; + this.align = ['', 'left', 'right', 'center'][align]; +} + +TableCell.prototype = subclass(Node, { + visit: function(visitor) { + this.content = visitor.visit(this.content); + }, + + toHTML: function() { + return '' + this.content.toHTML() + ''; + }, + + toMarkdown: function(indent) { + return this.content.toMarkdown(indent); + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Emphasis(depth, content) { + this.depth = depth; + this.content = content; +} + +Emphasis.prototype = subclass(Node, { + visit: function(visitor) { + this.content = visitor.visit(this.content); + }, + + toHTML: function() { + if (this.depth == 1) { + return '' + this.content.toHTML() + ''; + } else if (this.depth == 2) { + return '' + this.content.toHTML() + ''; + } else if (this.depth == 3) { + return '' + this.content.toHTML() + ''; + } + }, + + toMarkdown: function() { + return '*' + this.depth + ':' + this.content.toMarkdown() + '*'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Strikethrough(content) { + this.content = content; +} + +Strikethrough.prototype = subclass(Node, { + visit: function(visitor) { + this.content = visitor.visit(this.content); + }, + + toHTML: function() { + return '' + this.content.toHTML() + ''; + }, + + toMarkdown: function() { + return '~~' + this.content.toMarkdown() + '~~'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function CodeSpan(text) { + this.text = text; +} + +CodeSpan.prototype = subclass(Node, { + toHTML: function() { + return '' + this.text + ''; + }, + + toMarkdown: function(indent) { + return '`' + this.text + '`'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Link(url, title, content) { + this.url = url; + this.title = title; + this.content = content; +} + +Link.prototype = subclass(Node, { + visit: function(visitor) { + this.content = visitor.visit(this.content); + }, + + toHTML: function() { + return openTag('a', this.className) + + ' href="' + this.url + '">' + (this.content ? this.content.toHTML() : '') + ''; + }, + + toMarkdown: function() { + return '[' + (this.content ? this.content.toMarkdown() : '') + '](' + this.url + + (this.title ? ' "' + this.title + '"' : '') + ')'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Image(url, title, alt) { + this.url = url; + this.title = title; + this.alt = alt; +} + +Image.prototype = subclass(Node, { + toHTML: function() { + var w = this.width == undefined ? '' : (' width="' + this.width + '"'); + var h = this.height == undefined ? '' : (' height="' + this.height + '"'); + return openTag('img', this.className) + ' src="' + this.url + '"' + w + h + '>'; + }, + + toMarkdown: function() { + return '![' + (this.alt || '') + '](' + this.url + + (this.title ? ' "' + this.title + '"' : '') + ')'; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Text(text) { + this.text = text; +} + +Text.prototype = subclass(Node, { + toHTML: function() { + return this.text; + }, + + toMarkdown: function() { + return this.text; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Script(scriptType, scriptText) { + this.scriptType = scriptType; + this.scriptText = scriptText; +} + +Script.prototype = subclass(Node, { + toHTML: function() { + return ''; + }, + + toMarkdown: function() { + return ''; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Embed(url, title, alt, groups, transformer) { + this.url = url; + this.title = title; + this.alt = alt; + this.groups = groups; + this.transformer = transformer; +} + +Embed.prototype = subclass(Node, { + key: function() { + return this.url + '|' + this.alt + '|' + this.title; + }, + + transform: function(object, cb) { + if (typeof(object) == "function") { cb = object; object = undefined; } + + var args = this.groups.slice(); + if (object) { + args.unshift(object); + } + args.push.apply(args, [this.url, this.title, this.alt, _.bind(function(err, content) { + if (err) return cb ? cb(err) : 0; + + this.content = content; + cb(0, this); + }, this)]); + this.transformer.transform.apply(this.transformer, args); + }, + + toHTML: function() { + return this.content ? this.content.toHTML() : ''; + }, + + toMarkdown: function() { + return this.content ? this.content.toMarkdown() : ''; + } +}); + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +function Raw(html) { + this.html = html; +} + +Raw.prototype = subclass(Node, { + toHTML: function() { + return this.html; + }, + + toMarkdown: function() { + return ''; + } +}); + +// ************************************************************************************************* + +function subclass(a, b) { + function f() {} + if (a) { + f.prototype = a.prototype; + } + return _.extend(new f(), b); +} + +// ************************************************************************************************* + +exports.Node = Node; +exports.NodeSet = NodeSet; +exports.Paragraph = Paragraph; +exports.Header = Header; +exports.HRule = HRule; +exports.LineBreak = LineBreak; +exports.Blockquote = Blockquote; +exports.BlockCode = BlockCode; +exports.BlockHTML = BlockHTML; +exports.List = List; +exports.ListItem = ListItem; +exports.Table = Table; +exports.TableRow = TableRow; +exports.TableCell = TableCell; +exports.Emphasis = Emphasis; +exports.Strikethrough = Strikethrough; +exports.CodeSpan = CodeSpan; +exports.Link = Link; +exports.Image = Image; +exports.Text = Text; +exports.Script = Script; +exports.Embed = Embed; +exports.Raw = Raw; diff --git a/package.json b/package.json new file mode 100644 index 0000000..c9ae274 --- /dev/null +++ b/package.json @@ -0,0 +1,21 @@ +{ + "name": "markdom", + "description": "Markdown parser based on upskirt", + "url": "http://github.com/joehewitt/markdom", + "repository": { + "type": "git", + "url" : "http://github.com/joehewitt/markdom" + }, + "author": "Joe Hewitt ", + "keywords": ["markdown", "upskirt"], + "contributors": [], + "dependencies": { + "underscore": "", + "vows": ">=0.5.4" + }, + "version": "0.0.1", + "engines": { "node": ">=0.4.0" }, + "main": "./lib/markdom", + "directories": { "test": "./test" }, + "scripts": { "install": "node-waf configure build" } +} diff --git a/src/array.c b/src/array.c new file mode 100755 index 0000000..be7a816 --- /dev/null +++ b/src/array.c @@ -0,0 +1,300 @@ +/* array.c - automatic dynamic array for pointers */ + +/* + * Copyright (c) 2008, Natacha Porté + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "array.h" + +#include + + +/*************************** + * STATIC HELPER FUNCTIONS * + ***************************/ + +/* arr_realloc • realloc memory of a struct array */ +static int +arr_realloc(struct array* arr, int neosz) { + void* neo; + neo = realloc(arr->base, neosz * arr->unit); + if (neo == 0) return 0; + arr->base = neo; + arr->asize = neosz; + if (arr->size > neosz) arr->size = neosz; + return 1; } + + +/* parr_realloc • realloc memory of a struct parray */ +static int +parr_realloc(struct parray* arr, int neosz) { + void* neo; + neo = realloc(arr->item, neosz * sizeof (void*)); + if (neo == 0) return 0; + arr->item = neo; + arr->asize = neosz; + if (arr->size > neosz) arr->size = neosz; + return 1; } + + + +/*************************** + * GENERIC ARRAY FUNCTIONS * + ***************************/ + +/* arr_adjust • shrink the allocated memory to fit exactly the needs */ +int +arr_adjust(struct array *arr) { + return arr_realloc(arr, arr->size); } + + +/* arr_free • frees the structure contents (buf NOT the struct itself) */ +void +arr_free(struct array *arr) { + if (!arr) return; + free(arr->base); + arr->base = 0; + arr->size = arr->asize = 0; } + + +/* arr_grow • increases the array size to fit the given number of elements */ +int +arr_grow(struct array *arr, int need) { + if (arr->asize >= need) return 1; + else return arr_realloc(arr, need); } + + +/* arr_init • initialization of the contents of the struct */ +void +arr_init(struct array *arr, size_t unit) { + arr->base = 0; + arr->size = arr->asize = 0; + arr->unit = unit; } + + +/* arr_insert • inserting nb elements before the nth one */ +int +arr_insert(struct array *arr, int nb, int n) { + char *src, *dst; + size_t len; + if (!arr || nb <= 0 || n < 0 + || !arr_grow(arr, arr->size + nb)) + return 0; + if (n < arr->size) { + src = arr->base; + src += n * arr->unit; + dst = src + nb * arr->unit; + len = (arr->size - n) * arr->unit; + memmove(dst, src, len); } + arr->size += nb; + return 1; } + + +/* arr_item • returns a pointer to the n-th element */ +void * +arr_item(struct array *arr, int no) { + char *ptr; + if (!arr || no < 0 || no >= arr->size) return 0; + ptr = arr->base; + ptr += no * arr->unit; + return ptr; } + + +/* arr_newitem • returns the index of a new element appended to the array */ +int +arr_newitem(struct array *arr) { + if (!arr_grow(arr, arr->size + 1)) return -1; + arr->size += 1; + return arr->size - 1; } + + +/* arr_remove • removes the n-th elements of the array */ +void +arr_remove(struct array *arr, int idx) { + if (!arr || idx < 0 || idx >= arr->size) return; + arr->size -= 1; + if (idx < arr->size) { + char *dst = arr->base; + char *src; + dst += idx * arr->unit; + src = dst + arr->unit; + memmove(dst, src, (arr->size - idx) * arr->unit); } } + + +/* arr_sorted_find • O(log n) search in a sorted array, returning entry */ +void * +arr_sorted_find(struct array *arr, void *key, array_cmp_fn cmp) { + int mi, ma, cu, ret; + char *ptr = arr->base; + mi = -1; + ma = arr->size; + while (mi < ma - 1) { + cu = mi + (ma - mi) / 2; + ret = cmp(key, ptr + cu * arr->unit); + if (ret == 0) return ptr + cu * arr->unit; + else if (ret < 0) ma = cu; + else /* if (ret > 0) */ mi = cu; } + return 0; } + + +/* arr_sorted_find_i • O(log n) search in a sorted array, + * returning index of the smallest element larger than the key */ +int +arr_sorted_find_i(struct array *arr, void *key, array_cmp_fn cmp) { + int mi, ma, cu, ret; + char *ptr = arr->base; + mi = -1; + ma = arr->size; + while (mi < ma - 1) { + cu = mi + (ma - mi) / 2; + ret = cmp(key, ptr + cu * arr->unit); + if (ret == 0) { + while (cu < arr->size && ret == 0) { + cu += 1; + ret = cmp(key, ptr + cu * arr->unit); } + return cu; } + else if (ret < 0) ma = cu; + else /* if (ret > 0) */ mi = cu; } + return ma; } + + + +/*************************** + * POINTER ARRAY FUNCTIONS * + ***************************/ + +/* parr_adjust • shrinks the allocated memory to fit exactly the needs */ +int +parr_adjust(struct parray* arr) { + return parr_realloc (arr, arr->size); } + + +/* parr_free • frees the structure contents (buf NOT the struct itself) */ +void +parr_free(struct parray *arr) { + if (!arr) return; + free (arr->item); + arr->item = 0; + arr->size = 0; + arr->asize = 0; } + + +/* parr_grow • increases the array size to fit the given number of elements */ +int +parr_grow(struct parray *arr, int need) { + if (arr->asize >= need) return 1; + else return parr_realloc (arr, need); } + + +/* parr_init • initialization of the struct (which is equivalent to zero) */ +void +parr_init(struct parray *arr) { + arr->item = 0; + arr->size = 0; + arr->asize = 0; } + + +/* parr_insert • inserting nb elements before the nth one */ +int +parr_insert(struct parray *parr, int nb, int n) { + char *src, *dst; + size_t len, i; + if (!parr || nb <= 0 || n < 0 + || !parr_grow(parr, parr->size + nb)) + return 0; + if (n < parr->size) { + src = (void *)parr->item; + src += n * sizeof (void *); + dst = src + nb * sizeof (void *); + len = (parr->size - n) * sizeof (void *); + memmove(dst, src, len); + for (i = 0; i < (size_t)nb; ++i) + parr->item[n + i] = 0; } + parr->size += nb; + return 1; } + + +/* parr_pop • pops the last item of the array and returns it */ +void * +parr_pop(struct parray *arr) { + if (arr->size <= 0) return 0; + arr->size -= 1; + return arr->item[arr->size]; } + + +/* parr_push • pushes a pointer at the end of the array (= append) */ +int +parr_push(struct parray *arr, void *i) { + if (!parr_grow(arr, arr->size + 1)) return 0; + arr->item[arr->size] = i; + arr->size += 1; + return 1; } + + +/* parr_remove • removes the n-th element of the array and returns it */ +void * +parr_remove(struct parray *arr, int idx) { + void* ret; + int i; + if (!arr || idx < 0 || idx >= arr->size) return 0; + ret = arr->item[idx]; + for (i = idx+1; i < arr->size; ++i) + arr->item[i - 1] = arr->item[i]; + arr->size -= 1; + return ret; } + + +/* parr_sorted_find • O(log n) search in a sorted array, returning entry */ +void * +parr_sorted_find(struct parray *arr, void *key, array_cmp_fn cmp) { + int mi, ma, cu, ret; + mi = -1; + ma = arr->size; + while (mi < ma - 1) { + cu = mi + (ma - mi) / 2; + ret = cmp(key, arr->item[cu]); + if (ret == 0) return arr->item[cu]; + else if (ret < 0) ma = cu; + else /* if (ret > 0) */ mi = cu; } + return 0; } + + +/* parr_sorted_find_i • O(log n) search in a sorted array, + * returning index of the smallest element larger than the key */ +int +parr_sorted_find_i(struct parray *arr, void *key, array_cmp_fn cmp) { + int mi, ma, cu, ret; + mi = -1; + ma = arr->size; + while (mi < ma - 1) { + cu = mi + (ma - mi) / 2; + ret = cmp(key, arr->item[cu]); + if (ret == 0) { + while (cu < arr->size && ret == 0) { + cu += 1; + ret = cmp(key, arr->item[cu]); } + return cu; } + else if (ret < 0) ma = cu; + else /* if (ret > 0) */ mi = cu; } + return ma; } + + +/* parr_top • returns the top the stack (i.e. the last element of the array) */ +void * +parr_top(struct parray *arr) { + if (arr == 0 || arr->size <= 0) return 0; + else return arr->item[arr->size - 1]; } + +/* vim: set filetype=c: */ diff --git a/src/array.h b/src/array.h new file mode 100755 index 0000000..fd23533 --- /dev/null +++ b/src/array.h @@ -0,0 +1,148 @@ +/* array.h - automatic dynamic array for pointers */ + +/* + * Copyright (c) 2008, Natacha Porté + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef LITHIUM_ARRAY_H +#define LITHIUM_ARRAY_H + +#include + + +/******************** + * TYPE DEFINITIONS * + ********************/ + +/* struct array • generic linear array */ +struct array { + void* base; + int size; + int asize; + size_t unit; }; + + +/* struct parray • array of pointers */ +struct parray { + void ** item; + int size; + int asize; }; + + +/* array_cmp_fn • comparison functions for sorted arrays */ +typedef int (*array_cmp_fn)(void *key, void *array_entry); + + + +/*************************** + * GENERIC ARRAY FUNCTIONS * + ***************************/ + +/* arr_adjust • shrink the allocated memory to fit exactly the needs */ +int +arr_adjust(struct array *); + +/* arr_free • frees the structure contents (buf NOT the struct itself) */ +void +arr_free(struct array *); + +/* arr_grow • increases the array size to fit the given number of elements */ +int +arr_grow(struct array *, int); + +/* arr_init • initialization of the contents of the struct */ +void +arr_init(struct array *, size_t); + +/* arr_insert • inserting elements nb before the nth one */ +int +arr_insert(struct array *, int nb, int n); + +/* arr_item • returns a pointer to the n-th element */ +void * +arr_item(struct array *, int); + +/* arr_newitem • returns the index of a new element appended to the array */ +int +arr_newitem(struct array *); + +/* arr_remove • removes the n-th elements of the array */ +void +arr_remove(struct array *, int); + +/* arr_sorted_find • O(log n) search in a sorted array, returning entry */ +/* equivalent to bsearch(key, arr->base, arr->size, arr->unit, cmp) */ +void * +arr_sorted_find(struct array *, void *key, array_cmp_fn cmp); + +/* arr_sorted_find_i • O(log n) search in a sorted array, + * returning index of the smallest element larger than the key */ +int +arr_sorted_find_i(struct array *, void *key, array_cmp_fn cmp); + + +/*************************** + * POINTER ARRAY FUNCTIONS * + ***************************/ + +/* parr_adjust • shrinks the allocated memory to fit exactly the needs */ +int +parr_adjust(struct parray *); + +/* parr_free • frees the structure contents (buf NOT the struct itself) */ +void +parr_free(struct parray *); + +/* parr_grow • increases the array size to fit the given number of elements */ +int +parr_grow(struct parray *, int); + +/* parr_init • initialization of the struct (which is equivalent to zero) */ +void +parr_init(struct parray *); + +/* parr_insert • inserting nb elements before the nth one */ +int +parr_insert(struct parray *, int nb, int n); + +/* parr_pop • pops the last item of the array and returns it */ +void * +parr_pop(struct parray *); + +/* parr_push • pushes a pointer at the end of the array (= append) */ +int +parr_push(struct parray *, void *); + +/* parr_remove • removes the n-th element of the array and returns it */ +void * +parr_remove(struct parray *, int); + +/* parr_sorted_find • O(log n) search in a sorted array, returning entry */ +void * +parr_sorted_find(struct parray *, void *key, array_cmp_fn cmp); + +/* parr_sorted_find_i • O(log n) search in a sorted array, + * returning index of the smallest element larger than the key */ +int +parr_sorted_find_i(struct parray *, void *key, array_cmp_fn cmp); + +/* parr_top • returns the top the stack (i.e. the last element of the array) */ +void * +parr_top(struct parray *); + + +#endif /* ndef LITHIUM_ARRAY_H */ + +/* vim: set filetype=c: */ diff --git a/src/bridge.cc b/src/bridge.cc new file mode 100644 index 0000000..606bd92 --- /dev/null +++ b/src/bridge.cc @@ -0,0 +1,344 @@ + +#include +#include + +extern "C" { + #include + #include "markdown.h" + #include "bridge.h" +} + +using namespace v8; + +// ************************************************************************************************* + +int callNodeConstructor(Handle& fn, Handle& handler, int argCount, + Handle* fnArgs, struct buf* ob) { + TryCatch trycatch; + + Handle ret = fn->Call(handler, argCount, fnArgs); + + if (ret.IsEmpty()) { + Handle exception = trycatch.Exception(); + String::AsciiValue exception_str(exception); + printf("Exception: %s\n", *exception_str); fflush(stdout); + return 0; + } + + + Handle ids = ret->ToString(); + + char* buf = new char[ids->Utf8Length()]; + ids->WriteUtf8(buf); + bufput(ob, buf, ids->Length()); + bufputc(ob, ','); + // printf("returned %d %s\n", ob->size, ob->data); fflush(stdout); + delete buf; + + return 1; +} + +// ************************************************************************************************* + +extern "C" int +markdom_handle_header(struct buf* ob, int level, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("header"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _level = Number::New(level); + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_level, _content}; + return callNodeConstructor(fn, handler, 2, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_paragraph(struct buf* ob, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("paragraph"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_content}; + return callNodeConstructor(fn, handler, 1, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_blockquote(struct buf* ob, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("blockquote"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_content}; + return callNodeConstructor(fn, handler, 1, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_blockHTML(struct buf* ob, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("blockHTML"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_content}; + return callNodeConstructor(fn, handler, 1, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_blockCode(struct buf* ob, struct buf* lang, struct buf* text, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("blockCode"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _lang = String::New(lang ? lang->data: "", lang ? lang->size : 0); + Handle _text = String::New(text ? text->data: "", text ? text->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_lang, _text}; + return callNodeConstructor(fn, handler, 2, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_list(struct buf* ob, int ordered, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("list"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _ordered = Number::New(ordered); + Handle _content = String::New( + content ? content->data : "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_ordered, _content}; + return callNodeConstructor(fn, handler, 2, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_listItem(struct buf* ob, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("listItem"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_content}; + return callNodeConstructor(fn, handler, 1, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_table(struct buf* ob, struct buf* header, struct buf* body, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("table"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _header = String::New( + header ? header->data: "", header ? header->size : 0); + Handle _body = String::New( + body ? body->data: "", body ? body->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_header, _body}; + return callNodeConstructor(fn, handler, 2, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_tableRow(struct buf* ob, struct buf* cells, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("tableRow"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _cells = String::New( + cells ? cells->data: "", cells ? cells->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_cells}; + return callNodeConstructor(fn, handler, 1, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_tableCell(struct buf* ob, struct buf* content, int align, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("tableCell"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + Handle _align = Number::New(align); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_content, _align}; + return callNodeConstructor(fn, handler, 2, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_hrule(struct buf* ob, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("hrule"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle fn = Handle::Cast(fnV); + return callNodeConstructor(fn, handler, 0, 0, ob); + } + return 1; +} + +extern "C" int +markdom_handle_lineBreak(struct buf* ob, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("lineBreak"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle fn = Handle::Cast(fnV); + return callNodeConstructor(fn, handler, 0, 0, ob); + } + return 1; +} + +extern "C" int +markdom_handle_emphasis(struct buf* ob, int depth, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("emphasis"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _depth = Number::New(depth); + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_depth, _content}; + return callNodeConstructor(fn, handler, 2, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_strikethrough(struct buf* ob, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("strikethrough"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_content}; + return callNodeConstructor(fn, handler, 1, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_codespan(struct buf* ob, struct buf* text, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("codeSpan"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _text = String::New(text ? text->data: "", text ? text->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_text}; + return callNodeConstructor(fn, handler, 1, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_link(struct buf* ob, struct buf *url, struct buf* title, struct buf* content, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("link"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _url = String::New(url ? url->data: "", url ? url->size : 0); + Handle _title = String::New(title ? title->data: "", title ? title->size : 0); + Handle _content = String::New( + content ? content->data: "", content ? content->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_url, _title, _content}; + return callNodeConstructor(fn, handler, 3, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_autolink(struct buf* ob, struct buf *url, int type, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("autolink"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _url = String::New(url ? url->data: "", url ? url->size : 0); + Handle _type = Number::New(type); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_url, _type}; + return callNodeConstructor(fn, handler, 2, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_image(struct buf* ob, struct buf *url, struct buf *title, struct buf *alt, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("image"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _url = String::New(url ? url->data: "", url ? url->size : 0); + Handle _title = String::New(title ? title->data : "", title ? title->size : 0); + Handle _alt = String::New(alt ? alt->data : "", alt ? alt->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_url, _title, _alt}; + return callNodeConstructor(fn, handler, 3, fnArgs, ob); + } + return 1; +} + +extern "C" int +markdom_handle_text(struct buf* ob, struct buf *text, void *user) { + Handle handler = Handle::Cast(*(Handle*)user); + Handle fnName = String::New("text"); + Handle fnV = handler->Get(fnName); + if (fnV->IsFunction()) { + Handle _text = String::New(text ? text->data: "", text ? text->size : 0); + + Handle fn = Handle::Cast(fnV); + Handle fnArgs[] = {_text}; + return callNodeConstructor(fn, handler, 1, fnArgs, ob); + } + return 1; +} diff --git a/src/bridge.h b/src/bridge.h new file mode 100644 index 0000000..45e6569 --- /dev/null +++ b/src/bridge.h @@ -0,0 +1,27 @@ + +#ifndef BRIDGE_H +#define BRIDGE_H + +int markdom_handle_header(struct buf* ob, int level, struct buf* content, void *user); +int markdom_handle_paragraph(struct buf* ob, struct buf* content, void *user); +int markdom_handle_blockquote(struct buf* ob, struct buf* content, void *user); +int markdom_handle_blockCode(struct buf* ob, struct buf* lang, struct buf* content, void *user); +int markdom_handle_blockHTML(struct buf* ob, struct buf* content, void *user); +int markdom_handle_list(struct buf* ob, int ordered, struct buf* items, void *user); +int markdom_handle_listItem(struct buf* ob, struct buf* content, void *user); +int markdom_handle_table(struct buf* ob, struct buf* header, struct buf* body, void *user); +int markdom_handle_tableRow(struct buf* ob, struct buf* cells, void *user); +int markdom_handle_tableCell(struct buf* ob, struct buf* content, int align, void *user); +int markdom_handle_hrule(struct buf* ob, void *user); +int markdom_handle_lineBreak(struct buf* ob, void *user); +int markdom_handle_emphasis(struct buf* ob, int depth, struct buf *content, void *user); +int markdom_handle_strikethrough(struct buf* ob, struct buf *content, void *user); +int markdom_handle_codespan(struct buf* ob, struct buf *content, void *user); +int markdom_handle_link(struct buf* ob, struct buf *url, struct buf *title, struct buf* content, + void *user); +int markdom_handle_autolink(struct buf* ob, struct buf *url, int tpye, void *user); +int markdom_handle_image(struct buf* ob, struct buf *url, struct buf *title, struct buf *alt, + void *user); +int markdom_handle_text(struct buf* ob, struct buf *text, void *user); + +#endif // BRIDGE_H diff --git a/src/buffer.c b/src/buffer.c new file mode 100755 index 0000000..f7f25d9 --- /dev/null +++ b/src/buffer.c @@ -0,0 +1,291 @@ +/* buffer.c - automatic buffer structure */ + +/* + * Copyright (c) 2008, Natacha Porté + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * COMPILE TIME OPTIONS + * + * BUFFER_STATS • if defined, stats are kept about memory usage + */ + +#define BUFFER_STDARG + +#include "buffer.h" + +#include +#include +#include + + +/******************** + * GLOBAL VARIABLES * + ********************/ + +#ifdef BUFFER_STATS +long buffer_stat_nb = 0; +size_t buffer_stat_alloc_bytes = 0; +#endif + + +/*************************** + * STATIC HELPER FUNCTIONS * + ***************************/ + +/* lower • retruns the lower-case variant of the input char */ +static char +lower(char c) { + return (c >= 'A' && c <= 'Z') ? (c - 'A' + 'a') : c; } + + + +/******************** + * BUFFER FUNCTIONS * + ********************/ + +/* bufcasecmp • case-insensitive buffer comparison */ +int +bufcasecmp(const struct buf *a, const struct buf *b) { + size_t i = 0; + size_t cmplen; + if (a == b) return 0; + if (!a) return -1; else if (!b) return 1; + cmplen = (a->size < b->size) ? a->size : b->size; + while (i < cmplen && lower(a->data[i]) == lower(b->data[i])) ++i; + if (i < a->size) { + if (i < b->size) return lower(a->data[i]) - lower(b->data[i]); + else return 1; } + else { if (i < b->size) return -1; + else return 0; } } + + +/* bufcmp • case-sensitive buffer comparison */ +int +bufcmp(const struct buf *a, const struct buf *b) { + size_t i = 0; + size_t cmplen; + if (a == b) return 0; + if (!a) return -1; else if (!b) return 1; + cmplen = (a->size < b->size) ? a->size : b->size; + while (i < cmplen && a->data[i] == b->data[i]) ++i; + if (i < a->size) { + if (i < b->size) return a->data[i] - b->data[i]; + else return 1; } + else { if (i < b->size) return -1; + else return 0; } } + + +/* bufcmps • case-sensitive comparison of a string to a buffer */ +int +bufcmps(const struct buf *a, const char *b) { + const size_t len = strlen(b); + size_t cmplen = len; + int r; + if (!a || !a->size) return b ? 0 : -1; + if (len < a->size) cmplen = a->size; + r = strncmp(a->data, b, cmplen); + if (r) return r; + else if (a->size == len) return 0; + else if (a->size < len) return -1; + else return 1; } + +int +bufprefix(const struct buf *buf, const char *prefix) +{ + size_t i; + + for (i = 0; i < buf->size; ++i) { + if (prefix[i] == 0) + return 0; + + if (buf->data[i] != prefix[i]) + return buf->data[i] - prefix[i]; + } + + return 0; +} + + +/* bufdup • buffer duplication */ +struct buf * +bufdup(const struct buf *src, size_t dupunit) { + size_t blocks; + struct buf *ret; + if (src == 0) return 0; + ret = malloc(sizeof (struct buf)); + if (ret == 0) return 0; + ret->unit = dupunit; + ret->size = src->size; + ret->ref = 1; + if (!src->size) { + ret->asize = 0; + ret->data = 0; + return ret; } + blocks = (src->size + dupunit - 1) / dupunit; + ret->asize = blocks * dupunit; + ret->data = malloc(ret->asize); + if (ret->data == 0) { + free(ret); + return 0; } + memcpy(ret->data, src->data, src->size); +#ifdef BUFFER_STATS + buffer_stat_nb += 1; + buffer_stat_alloc_bytes += ret->asize; +#endif + return ret; } + +/* bufnew • allocation of a new buffer */ +struct buf * +bufnew(size_t unit) { + struct buf *ret; + ret = malloc(sizeof (struct buf)); + if (ret) { +#ifdef BUFFER_STATS + buffer_stat_nb += 1; +#endif + ret->data = 0; + ret->size = ret->asize = 0; + ret->ref = 1; + ret->unit = unit; } + return ret; } + + +/* bufnullterm • NUL-termination of the string array (making a C-string) */ +void +bufnullterm(struct buf *buf) { + if (!buf || !buf->unit) return; + if (buf->size < buf->asize && buf->data[buf->size] == 0) return; + if (bufgrow(buf, buf->size + 1)) + buf->data[buf->size] = 0; } + + +/* bufprintf • formatted printing to a buffer */ +void +bufprintf(struct buf *buf, const char *fmt, ...) { + va_list ap; + if (!buf || !buf->unit) return; + va_start(ap, fmt); + vbufprintf(buf, fmt, ap); + va_end(ap); } + + +/* bufput • appends raw data to a buffer */ +void +bufput(struct buf *buf, const void *data, size_t len) { + if (!buf || !bufgrow(buf, buf->size + len)) return; + memcpy(buf->data + buf->size, data, len); + buf->size += len; } + + +/* bufputs • appends a NUL-terminated string to a buffer */ +void +bufputs(struct buf *buf, const char *str) { + bufput(buf, str, strlen (str)); } + + +/* bufrelease • decrease the reference count and free the buffer if needed */ +void +bufrelease(struct buf *buf) { + if (!buf) return; + buf->ref -= 1; + if (buf->ref == 0) { +#ifdef BUFFER_STATS + buffer_stat_nb -= 1; + buffer_stat_alloc_bytes -= buf->asize; +#endif + free(buf->data); + free(buf); } } + + +/* bufreset • frees internal data of the buffer */ +void +bufreset(struct buf *buf) { + if (!buf) return; +#ifdef BUFFER_STATS + buffer_stat_alloc_bytes -= buf->asize; +#endif + free(buf->data); + buf->data = 0; + buf->size = buf->asize = 0; } + + +/* bufset • safely assigns a buffer to another */ +void +bufset(struct buf **dest, struct buf *src) { + if (src) { + if (!src->asize) src = bufdup(src, 1); + else src->ref += 1; } + bufrelease(*dest); + *dest = src; } + + +/* bufslurp • removes a given number of bytes from the head of the array */ +void +bufslurp(struct buf *buf, size_t len) { + if (!buf || !buf->unit || len <= 0) return; + if (len >= buf->size) { + buf->size = 0; + return; } + buf->size -= len; + memmove(buf->data, buf->data + len, buf->size); } + + +/* buftoi • converts the numbers at the beginning of the buf into an int */ +int +buftoi(struct buf *buf, size_t offset_i, size_t *offset_o) { + int r = 0, neg = 0; + size_t i = offset_i; + if (!buf || !buf->size) return 0; + if (buf->data[i] == '+') i += 1; + else if (buf->data[i] == '-') { + neg = 1; + i += 1; } + while (i < buf->size && buf->data[i] >= '0' && buf->data[i] <= '9') { + r = (r * 10) + buf->data[i] - '0'; + i += 1; } + if (offset_o) *offset_o = i; + return neg ? -r : r; } + + + +/* vbufprintf • stdarg variant of formatted printing into a buffer */ +void +vbufprintf(struct buf *buf, const char *fmt, va_list ap) { + int n; + va_list ap_save; + if (buf == 0 + || (buf->size >= buf->asize && !bufgrow (buf, buf->size + 1))) + return; + + va_copy(ap_save, ap); + n = vsnprintf(buf->data + buf->size, buf->asize - buf->size, fmt, ap); + + if (n < 0 || (size_t)n >= buf->asize - buf->size) { + size_t new_size = (n > 0) ? n : buf->size; + if (!bufgrow (buf, buf->size + new_size + 1)) + return; + + n = vsnprintf(buf->data + buf->size, buf->asize - buf->size, fmt, ap_save); + } + va_end(ap_save); + + if (n < 0) + return; + + buf->size += n; +} + +/* vim: set filetype=c: */ diff --git a/src/buffer.h b/src/buffer.h new file mode 100755 index 0000000..e9d7768 --- /dev/null +++ b/src/buffer.h @@ -0,0 +1,167 @@ +/* buffer.h - automatic buffer structure */ + +/* + * Copyright (c) 2008, Natacha Porté + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef LITHIUM_BUFFER_H +#define LITHIUM_BUFFER_H + +#include + +#define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) /* 16mb */ + +/******************** + * TYPE DEFINITIONS * + ********************/ + +/* struct buf • character array buffer */ +struct buf { + char * data; /* actual character data */ + size_t size; /* size of the string */ + size_t asize; /* allocated size (0 = volatile buffer) */ + size_t unit; /* reallocation unit size (0 = read-only buffer) */ + int ref; }; /* reference count */ + +/********** + * MACROS * + **********/ + +#define STRLEN(x) (sizeof(x) - 1) + +/* CONST_BUF • global buffer from a string litteral */ +#define CONST_BUF(name, string) \ + static struct buf name = { string, sizeof string -1, sizeof string } + + +/* VOLATILE_BUF • macro for creating a volatile buffer on the stack */ +#define VOLATILE_BUF(name, strname) \ + struct buf name = { strname, strlen(strname) } + + +/* BUFPUTSL • optimized bufputs of a string litteral */ +#define BUFPUTSL(output, litteral) \ + bufput(output, litteral, sizeof litteral - 1) + + + +/******************** + * BUFFER FUNCTIONS * + ********************/ + +/* bufcasecmp • case-insensitive buffer comparison */ +int +bufcasecmp(const struct buf *, const struct buf *); + +/* bufcmp • case-sensitive buffer comparison */ +int +bufcmp(const struct buf *, const struct buf *); + +/* bufcmps • case-sensitive comparison of a string to a buffer */ +int +bufcmps(const struct buf *, const char *); + +/* bufprefix * compare the beggining of a buffer with a string */ +int +bufprefix(const struct buf *buf, const char *prefix); + +/* bufdup • buffer duplication */ +struct buf * +bufdup(const struct buf *, size_t) + __attribute__ ((malloc)); + +/* bufnew • allocation of a new buffer */ +struct buf * +bufnew(size_t) + __attribute__ ((malloc)); + +/* bufnullterm • NUL-termination of the string array (making a C-string) */ +void +bufnullterm(struct buf *); + +/* bufprintf • formatted printing to a buffer */ +void +bufprintf(struct buf *, const char *, ...) + __attribute__ ((format (printf, 2, 3))); + +/* bufput • appends raw data to a buffer */ +void +bufput(struct buf *, const void*, size_t); + +/* bufputs • appends a NUL-terminated string to a buffer */ +void +bufputs(struct buf *, const char*); + +/* bufrelease • decrease the reference count and free the buffer if needed */ +void +bufrelease(struct buf *); + +/* bufreset • frees internal data of the buffer */ +void +bufreset(struct buf *); + +/* bufset • safely assigns a buffer to another */ +void +bufset(struct buf **, struct buf *); + +/* bufslurp • removes a given number of bytes from the head of the array */ +void +bufslurp(struct buf *, size_t); + +/* buftoi • converts the numbers at the beginning of the buf into an int */ +int +buftoi(struct buf *, size_t, size_t *); + + + +#ifdef BUFFER_STDARG +#include + +/* vbufprintf • stdarg variant of formatted printing into a buffer */ +void +vbufprintf(struct buf *, const char*, va_list); + +#endif /* def BUFFER_STDARG */ + +#include + +/* bufgrow • increasing the allocated size to the given value */ +static inline int +bufgrow(struct buf *buf, size_t neosz) { + size_t neoasz; + void *neodata; + if (!buf || !buf->unit || neosz > BUFFER_MAX_ALLOC_SIZE) return 0; + if (buf->asize >= neosz) return 1; + neoasz = buf->asize + buf->unit; + while (neoasz < neosz) neoasz += buf->unit; + neodata = realloc(buf->data, neoasz); + if (!neodata) return 0; +#ifdef BUFFER_STATS + buffer_stat_alloc_bytes += (neoasz - buf->asize); +#endif + buf->data = (char*)neodata; + buf->asize = neoasz; + return 1; } + +/* bufputc • appends a single char to a buffer */ +static inline void +bufputc(struct buf *buf, char c) { + if (!buf || !bufgrow(buf, buf->size + 1)) return; + buf->data[buf->size] = c; + buf->size += 1; } + +#endif /* ndef LITHIUM_BUFFER_H */ + +/* vim: set filetype=c: */ diff --git a/src/dom.c b/src/dom.c new file mode 100755 index 0000000..2eec9ca --- /dev/null +++ b/src/dom.c @@ -0,0 +1,574 @@ +/* + * Copyright (c) 2009, Natacha Porté + * Copyright (c) 2011, Vicent Marti + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "markdown.h" +#include "xhtml.h" +#include "bridge.h" + +#include +#include +#include +#include + +struct xhtml_renderopt { + struct { + int header_count; + int current_level; + } toc_data; + + struct { + int in_squote; + int in_dquote; + } quotes; + + unsigned int flags; + void* user; +}; + +static inline void +put_scaped_char(struct buf *ob, char c) +{ + switch (c) { + case '<': BUFPUTSL(ob, "<"); break; + case '>': BUFPUTSL(ob, ">"); break; + case '&': BUFPUTSL(ob, "&"); break; + case '"': BUFPUTSL(ob, """); break; + default: bufputc(ob, c); break; + } +} + +/* lus_attr_escape • copy the buffer entity-escaping '<', '>', '&' and '"' */ +static void +lus_attr_escape(struct buf *ob, const char *src, size_t size) +{ + size_t i = 0, org; + while (i < size) { + /* copying directly unescaped characters */ + org = i; + while (i < size && src[i] != '<' && src[i] != '>' + && src[i] != '&' && src[i] != '"') + i += 1; + // if (i > org) bufput(ob, src + org, i - org); + + /* escaping */ + if (i >= size) break; + + // put_scaped_char(ob, src[i]); + i++; + } +} + +static int +is_html_tag(struct buf *tag, const char *tagname) +{ + size_t i = 0; + + if (i < tag->size && tag->data[0] != '<') + return 0; + + i++; + + while (i < tag->size && isspace(tag->data[i])) + i++; + + if (i < tag->size && tag->data[i] == '/') + i++; + + while (i < tag->size && isspace(tag->data[i])) + i++; + + for (; i < tag->size; ++i, ++tagname) { + if (*tagname == 0) + break; + + if (tag->data[i] != *tagname) + return 0; + } + + if (i == tag->size) + return 0; + + return (isspace(tag->data[i]) || tag->data[i] == '>'); +} + +/******************** + * GENERIC RENDERER * + ********************/ +static int +rndr_autolink(struct buf *ob, struct buf *link, enum mkd_autolink type, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + if (!link || !link->size) + return 0; + + if ((options->flags & XHTML_SAFELINK) != 0 && + !is_safe_link(link->data, link->size) && + type != MKDA_EMAIL) + return 0; + + return markdom_handle_autolink(ob, link, type, options->user); +} + +static void +rndr_blockcode(struct buf *ob, struct buf *text, struct buf *lang, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_blockCode(ob, lang, text, options->user); +} + +/* + * GitHub style code block: + * + *
    
    + *		...
    + *		
    + * + * Unlike other parsers, we store the language identifier in the
    ,
    + * and don't let the user generate custom classes.
    + *
    + * The language identifier in the 
     block gets postprocessed and all
    + * the code inside gets syntax highlighted with Pygments. This is much safer
    + * than letting the user specify a CSS class for highlighting.
    + *
    + * Note that we only generate HTML for the first specifier.
    + * E.g.
    + *		~~~~ {.python .numbered}	=>	
    
    + */
    +static void
    +rndr_blockcode_github(struct buf *ob, struct buf *text, struct buf *lang, void *opaque)
    +{
    +	if (ob->size) bufputc(ob, '\n');
    +
    +	if (lang && lang->size) {
    +		size_t i = 0;
    +		BUFPUTSL(ob, "
    size; ++i)
    +			if (isspace(lang->data[i]))
    +				break;
    +
    +		if (lang->data[0] == '.')
    +			bufput(ob, lang->data + 1, i - 1);
    +		else
    +			bufput(ob, lang->data, i);
    +
    +		BUFPUTSL(ob, "\">");
    +	} else
    +		BUFPUTSL(ob, "
    ");
    +
    +	if (text)
    +		lus_attr_escape(ob, text->data, text->size);
    +
    +	BUFPUTSL(ob, "
    \n"); +} + +static void +rndr_blockquote(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_blockquote(ob, text, options->user); +} + +static int +rndr_codespan(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + return markdom_handle_codespan(ob, text, options->user); +} + +static int +rndr_strikethrough(struct buf *ob, struct buf *text, void *opaque) +{ + if (!text || !text->size) + return 0; + + struct xhtml_renderopt *options = opaque; + return markdom_handle_strikethrough(ob, text, options->user); +} + +static int +rndr_double_emphasis(struct buf *ob, struct buf *text, void *opaque) +{ + if (!text || !text->size) + return 0; + + struct xhtml_renderopt *options = opaque; + return markdom_handle_emphasis(ob, 2, text, options->user); +} + +static int +rndr_emphasis(struct buf *ob, struct buf *text, void *opaque) +{ + if (!text || !text->size) return 0; + + struct xhtml_renderopt *options = opaque; + return markdom_handle_emphasis(ob, 1, text, options->user); +} + +static void +rndr_header(struct buf *ob, struct buf *text, int level, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_header(ob, level, text, options->user); +} + +static int +rndr_link(struct buf *ob, struct buf *link, struct buf *title, struct buf *content, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + if ((options->flags & XHTML_SAFELINK) != 0 && !is_safe_link(link->data, link->size)) + return 0; + + return markdom_handle_link(ob, link, title, content, options->user); +} + +static void +rndr_list(struct buf *ob, struct buf *text, int flags, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_list(ob, flags & MKD_LIST_ORDERED, text, options->user); +} + +static void +rndr_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + if (text) { + while (text->size && text->data[text->size - 1] == '\n') + text->size -= 1; + } + markdom_handle_listItem(ob, text, options->user); +} + +static void +rndr_paragraph(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + size_t i = 0; + + if (!text || !text->size) + return; + + while (i < text->size && isspace(text->data[i])) i++; + + if (i == text->size) + return; + + if (options->flags & XHTML_HARD_WRAP) { + size_t org; + while (i < text->size) { + org = i; + while (i < text->size && text->data[i] != '\n') + i++; + + if (i >= text->size) + break; + + i++; + } + } + markdom_handle_paragraph(ob, text, options->user); + + /* Close any open quotes at the end of the paragraph */ + options->quotes.in_squote = 0; + options->quotes.in_dquote = 0; +} + +static void +rndr_raw_block(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_blockHTML(ob, text, options->user); +} + +static int +rndr_triple_emphasis(struct buf *ob, struct buf *text, void *opaque) +{ + if (!text || !text->size) return 0; + struct xhtml_renderopt *options = opaque; + return markdom_handle_emphasis(ob, 3, text, options->user); + return 1; +} + + +/********************** + * XHTML 1.0 RENDERER * + **********************/ + +static void +rndr_hrule(struct buf *ob, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_hrule(ob, options->user); +} + +static int +rndr_image(struct buf *ob, struct buf *link, struct buf *title, struct buf *alt, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + if (!link || !link->size) return 0; + + return markdom_handle_image(ob, link, title, alt, options->user); +} + +static int +rndr_linebreak(struct buf *ob, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + return markdom_handle_lineBreak(ob, options->user); +} + +static int +rndr_raw_html(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + return markdom_handle_blockHTML(ob, text, options->user); +} + +static void +rndr_table(struct buf *ob, struct buf *header, struct buf *body, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_table(ob, header, body, options->user); +} + +static void +rndr_tablerow(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_tableRow(ob, text, options->user); +} + +static void +rndr_tablecell(struct buf *ob, struct buf *text, int align, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + markdom_handle_tableCell(ob, text, align, options->user); +} + +static struct { + char c0; + const char *pattern; + const char *entity; + int skip; +} smartypants_subs[] = { + { '\'', "'s>", "’", 0 }, + { '\'', "'t>", "’", 0 }, + { '\'', "'re>", "’", 0 }, + { '\'', "'ll>", "’", 0 }, + { '\'', "'ve>", "’", 0 }, + { '\'', "'m>", "’", 0 }, + { '\'', "'d>", "’", 0 }, + { '-', "--", "—", 1 }, + { '-', "<->", "–", 0 }, + { '.', "...", "…", 2 }, + { '.', ". . .", "…", 4 }, + { '(', "(c)", "©", 2 }, + { '(', "(r)", "®", 2 }, + { '(', "(tm)", "™", 3 }, + { '3', "<3/4>", "¾", 2 }, + { '3', "<3/4ths>", "¾", 2 }, + { '1', "<1/2>", "½", 2 }, + { '1', "<1/4>", "¼", 2 }, + { '1', "<1/4th>", "¼", 2 }, + { '&', "�", 0, 3 }, +}; + +#define SUBS_COUNT (sizeof(smartypants_subs) / sizeof(smartypants_subs[0])) + +static inline int +word_boundary(char c) +{ + return isspace(c) || ispunct(c); +} + +static int +smartypants_cmpsub(const struct buf *buf, size_t start, const char *prefix) +{ + size_t i; + + if (prefix[0] == '<') { + if (start == 0 || !word_boundary(buf->data[start - 1])) + return 0; + + prefix++; + } + + for (i = start; i < buf->size; ++i) { + char c, p; + + c = tolower(buf->data[i]); + p = *prefix++; + + if (p == 0) + return 1; + + if (p == '>') + return word_boundary(c); + + if (c != p) + return 0; + } + + return (*prefix == '>'); +} + +static int +smartypants_quotes(struct buf *ob, struct buf *text, size_t i, int is_open) +{ + char ent[8]; + + if (is_open && i + 1 < text->size && !word_boundary(text->data[i + 1])) + return 0; + + if (!is_open && i > 0 && !word_boundary(text->data[i - 1])) + return 0; + + snprintf(ent, sizeof(ent), "&%c%cquo;", + is_open ? 'r' : 'l', + text->data[i] == '\'' ? 's' : 'd'); + + bufputs(ob, ent); + return 1; +} + +static void +rndr_normal_text(struct buf *ob, struct buf *text, void *opaque) +{ + if (text) { + struct xhtml_renderopt *options = opaque; + markdom_handle_text(ob, text, options->user); + } +} + +static void +rndr_smartypants(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + size_t i; + + if (!text) + return; + + for (i = 0; i < text->size; ++i) { + size_t sub; + char c = text->data[i]; + + for (sub = 0; sub < SUBS_COUNT; ++sub) { + if (c == smartypants_subs[sub].c0 && + smartypants_cmpsub(text, i, smartypants_subs[sub].pattern)) { + + if (smartypants_subs[sub].entity) + bufputs(ob, smartypants_subs[sub].entity); + + i += smartypants_subs[sub].skip; + break; + } + } + + if (sub < SUBS_COUNT) + continue; + + switch (c) { + case '\"': + if (smartypants_quotes(ob, text, i, options->quotes.in_dquote)) { + options->quotes.in_dquote = !options->quotes.in_dquote; + continue; + } + break; + + case '\'': + if (smartypants_quotes(ob, text, i, options->quotes.in_squote)) { + options->quotes.in_squote = !options->quotes.in_squote; + continue; + } + break; + } + + /* + * Copy raw character + */ + put_scaped_char(ob, c); + } +} + +void +ups_dom_renderer(struct mkd_renderer *renderer, unsigned int render_flags, void* user) +{ + static const struct mkd_renderer renderer_default = { + rndr_blockcode, + rndr_blockquote, + rndr_raw_block, + rndr_header, + rndr_hrule, + rndr_list, + rndr_listitem, + rndr_paragraph, + rndr_table, + rndr_tablerow, + rndr_tablecell, + + rndr_autolink, + rndr_codespan, + rndr_double_emphasis, + rndr_emphasis, + rndr_image, + rndr_linebreak, + rndr_link, + rndr_raw_html, + rndr_triple_emphasis, + rndr_strikethrough, + + NULL, + rndr_normal_text, + + NULL, + NULL, + + NULL + }; + + struct xhtml_renderopt *options; + options = calloc(1, sizeof(struct xhtml_renderopt)); + options->flags = render_flags; + options->user = user; + + memcpy(renderer, &renderer_default, sizeof(struct mkd_renderer)); + renderer->opaque = options; + + // XXXjoe This stuff is all ignored + // if (render_flags & XHTML_SKIP_IMAGES) + // renderer->image = NULL; + // + // if (render_flags & XHTML_SKIP_LINKS) { + // renderer->link = NULL; + // renderer->autolink = NULL; + // } + // + // if (render_flags & XHTML_SMARTYPANTS) + // renderer->normal_text = rndr_smartypants; + // + // if (render_flags & XHTML_GITHUB_BLOCKCODE) + // renderer->blockcode = rndr_blockcode_github; + + +} diff --git a/src/dom.h b/src/dom.h new file mode 100644 index 0000000..4aa9cc2 --- /dev/null +++ b/src/dom.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2011, Vicent Marti + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef UPSKIRT_DOM_H +#define UPSKIRT_DOM_H + +extern void +ups_dom_renderer(struct mkd_renderer *renderer, unsigned int render_flags, void* user); + +#endif + diff --git a/src/markdom.cc b/src/markdom.cc new file mode 100644 index 0000000..5d95837 --- /dev/null +++ b/src/markdom.cc @@ -0,0 +1,122 @@ +#include +#include + +extern "C" { + #include "markdown.h" + #include "xhtml.h" + #include "dom.h" + #include "bridge.h" +} + +#define READ_UNIT 1024 +#define OUTPUT_UNIT 64 + +using namespace v8; + +static Handle ToHTML(const Arguments& args) { + HandleScope scope; + + if (args.Length() < 1) { + return ThrowException(Exception::TypeError(String::New("Source argument missing"))); + } + + String::Utf8Value in(args[0]); + + struct mkd_renderer renderer; + + struct buf* ob = bufnew(OUTPUT_UNIT); + struct buf* ib = bufnew(READ_UNIT); + bufputs(ib, (char*)*in); + + size_t iterations = 1; + for (size_t i = 0; i < iterations; ++i) { + ob->size = 0; + + ups_xhtml_renderer(&renderer, 0); + ups_markdown(ob, ib, &renderer, 0xFF); + ups_free_renderer(&renderer); + } + + Handle ret = String::New(ob->data, ob->size); + + bufrelease(ib); + bufrelease(ob); + + return scope.Close(ret); +} + +static Handle ToDOM(const Arguments& args) { + HandleScope scope; + + if (args.Length() < 2) { + return ThrowException(Exception::TypeError(String::New("Handler argument missing"))); + } + if (args.Length() < 1) { + return ThrowException(Exception::TypeError(String::New("Source argument missing"))); + } + + String::Utf8Value in(args[0]); + Handle handler = args[1]; + int options = 0xFF; + + if (args.Length() > 2) { + Handle disableEmphasis = String::New("disableEmphasis"); + Handle disableTable = String::New("disableTables"); + Handle disableFencedCode = String::New("disableFencedCode"); + Handle disableAutolink = String::New("disableAutolink"); + Handle disableStrikethrough = String::New("disableStrikethrough"); + Handle disableHTML = String::New("disableHTML"); + + Handle optionMap = Handle::Cast(args[2]); + if (optionMap->Get(disableEmphasis)->IsTrue()) { + options &= ~MKDEXT_LAX_EMPHASIS; + } + if (optionMap->Get(disableTable)->IsTrue()) { + options &= ~MKDEXT_TABLES; + } + if (optionMap->Get(disableFencedCode)->IsTrue()) { + options &= ~MKDEXT_FENCED_CODE; + } + if (optionMap->Get(disableAutolink)->IsTrue()) { + options &= ~MKDEXT_AUTOLINK; + } + if (optionMap->Get(disableStrikethrough)->IsTrue()) { + options &= ~MKDEXT_STRIKETHROUGH; + } + if (optionMap->Get(disableHTML)->IsTrue()) { + options &= ~MKDEXT_LAX_HTML_BLOCKS; + } + } + + struct buf *ib, *ob; + struct mkd_renderer renderer; + size_t i, iterations = 1; + + /* performing markdown parsing */ + ob = bufnew(OUTPUT_UNIT); + ib = bufnew(READ_UNIT); + bufputs(ib, (char*)*in); + + for (i = 0; i < iterations; ++i) { + ob->size = 0; + + ups_dom_renderer(&renderer, 0, &handler); + ups_markdown(ob, ib, &renderer, options); + ups_free_renderer(&renderer); + } + + Handle ret = String::New(ob->data, ob->size); + + bufrelease(ib); + bufrelease(ob); + + return scope.Close(ret); +} + +extern "C" void +init (Handle target) { + HandleScope scope; + target->Set(String::New("version"), String::New("0.1")); + NODE_SET_METHOD(target, "toHTML", ToHTML); + NODE_SET_METHOD(target, "toDOM", ToDOM); +} diff --git a/src/markdown.c b/src/markdown.c new file mode 100755 index 0000000..3d88b4b --- /dev/null +++ b/src/markdown.c @@ -0,0 +1,2068 @@ +/* markdown.c - generic markdown parser */ + +/* + * Copyright (c) 2009, Natacha Porté + * Copyright (c) 2011, Vicent Marti + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "markdown.h" +#include "array.h" + +#include +#include +#include /* for strncasecmp */ +#include +#include + +#define TEXT_UNIT 64 /* unit for the copy of the input buffer */ +#define WORK_UNIT 64 /* block-level working buffer */ + +#define MKD_LI_END 8 /* internal list flag */ + +/*************** + * LOCAL TYPES * + ***************/ + +/* link_ref • reference to a link */ +struct link_ref { + struct buf *id; + struct buf *link; + struct buf *title; +}; + +/* char_trigger • function pointer to render active chars */ +/* returns the number of chars taken care of */ +/* data is the pointer of the beginning of the span */ +/* offset is the number of valid chars before data */ +struct render; +typedef size_t +(*char_trigger)(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size); + + +/* render • structure containing one particular render */ +struct render { + struct mkd_renderer make; + struct array refs; + char_trigger active_char[256]; + struct parray work; + unsigned int ext_flags; + size_t max_nesting; +}; + +/* html_tag • structure for quick HTML tag search (inspired from discount) */ +struct html_tag { + const char *text; + size_t size; +}; + +static inline struct buf * +rndr_newbuf(struct render *rndr) +{ + struct buf *work = NULL; + + if (rndr->work.size < rndr->work.asize) { + work = rndr->work.item[rndr->work.size++]; + work->size = 0; + } else { + work = bufnew(WORK_UNIT); + parr_push(&rndr->work, work); + } + + return work; +} + +static inline void +rndr_popbuf(struct render *rndr) +{ + rndr->work.size--; +} + +/******************** + * GLOBAL VARIABLES * + ********************/ + +/* block_tags • recognised block tags, sorted by cmp_html_tag */ +static struct html_tag block_tags[] = { +/*0*/ { "p", 1 }, + { "dl", 2 }, + { "h1", 2 }, + { "h2", 2 }, + { "h3", 2 }, + { "h4", 2 }, + { "h5", 2 }, + { "h6", 2 }, + { "ol", 2 }, + { "ul", 2 }, +/*10*/ { "del", 3 }, + { "div", 3 }, +/*12*/ { "ins", 3 }, + { "pre", 3 }, + { "form", 4 }, + { "math", 4 }, + { "table", 5 }, + { "iframe", 6 }, + { "script", 6 }, + { "fieldset", 8 }, + { "noscript", 8 }, + { "blockquote", 10 } +}; + +#define INS_TAG (block_tags + 12) +#define DEL_TAG (block_tags + 10) + +/*************************** + * HELPER FUNCTIONS * + ***************************/ +int +is_safe_link(const char *link, size_t link_len) +{ + static const size_t valid_uris_count = 4; + static const char *valid_uris[] = { + "http://", "https://", "ftp://", "mailto://" + }; + + size_t i; + + for (i = 0; i < valid_uris_count; ++i) { + size_t len = strlen(valid_uris[i]); + + if (link_len > len && strncasecmp(link, valid_uris[i], len) == 0) + return 1; + } + + return 0; +} + +/* cmp_link_ref • comparison function for link_ref sorted arrays */ +static int +cmp_link_ref(void *key, void *array_entry) +{ + struct link_ref *lr = array_entry; + return bufcasecmp(key, lr->id); +} + +/* cmp_link_ref_sort • comparison function for link_ref qsort */ +static int +cmp_link_ref_sort(const void *a, const void *b) +{ + const struct link_ref *lra = a; + const struct link_ref *lrb = b; + return bufcasecmp(lra->id, lrb->id); +} + +/* cmp_html_tag • comparison function for bsearch() (stolen from discount) */ +static int +cmp_html_tag(const void *a, const void *b) +{ + const struct html_tag *hta = a; + const struct html_tag *htb = b; + if (hta->size != htb->size) return (int)((ssize_t)hta->size - (ssize_t)htb->size); + return strncasecmp(hta->text, htb->text, hta->size); +} + + +/* find_block_tag • returns the current block tag */ +static struct html_tag * +find_block_tag(char *data, size_t size) +{ + size_t i = 0; + struct html_tag key; + + /* looking for the word end */ + while (i < size && ((data[i] >= '0' && data[i] <= '9') + || (data[i] >= 'A' && data[i] <= 'Z') + || (data[i] >= 'a' && data[i] <= 'z'))) + i += 1; + if (i >= size) return 0; + + /* binary search of the tag */ + key.text = data; + key.size = i; + return bsearch(&key, block_tags, + sizeof block_tags / sizeof block_tags[0], + sizeof block_tags[0], cmp_html_tag); +} + +/**************************** + * INLINE PARSING FUNCTIONS * + ****************************/ + +/* is_mail_autolink • looks for the address part of a mail autolink and '>' */ +/* this is less strict than the original markdown e-mail address matching */ +static size_t +is_mail_autolink(char *data, size_t size) +{ + size_t i = 0, nb = 0; + + /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ + while (i < size && (data[i] == '-' || data[i] == '.' + || data[i] == '_' || data[i] == '@' + || (data[i] >= 'a' && data[i] <= 'z') + || (data[i] >= 'A' && data[i] <= 'Z') + || (data[i] >= '0' && data[i] <= '9'))) { + if (data[i] == '@') nb += 1; + i += 1; } + if (i >= size || data[i] != '>' || nb != 1) return 0; + return i + 1; +} + +/* tag_length • returns the length of the given tag, or 0 is it's not valid */ +static size_t +tag_length(char *data, size_t size, enum mkd_autolink *autolink) +{ + size_t i, j; + + /* a valid tag can't be shorter than 3 chars */ + if (size < 3) return 0; + + /* begins with a '<' optionally followed by '/', followed by letter */ + if (data[0] != '<') return 0; + i = (data[1] == '/') ? 2 : 1; + if ((data[i] < 'a' || data[i] > 'z') + && (data[i] < 'A' || data[i] > 'Z')) return 0; + + /* scheme test */ + *autolink = MKDA_NOT_AUTOLINK; + + /* try to find the beggining of an URI */ + while (i < size && (isalpha(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) + i++; + + if (i > 1 && data[i] == '@') { + if ((j = is_mail_autolink(data + i, size - i)) != 0) { + *autolink = MKDA_EMAIL; + return i + j; + } + } + + if (i > 2 && data[i] == ':') { + *autolink = MKDA_NORMAL; + i++; + } + + /* completing autolink test: no whitespace or ' or " */ + if (i >= size || i == '>') + *autolink = MKDA_NOT_AUTOLINK; + else if (*autolink) { + j = i; + while (i < size && data[i] != '>' && data[i] != '\'' + && data[i] != '"' && data[i] != ' ' && data[i] != '\t' + && data[i] != '\t') + i += 1; + if (i >= size) return 0; + if (i > j && data[i] == '>') return i + 1; + /* one of the forbidden chars has been found */ + *autolink = MKDA_NOT_AUTOLINK; + } + + /* looking for sometinhg looking like a tag end */ + while (i < size && data[i] != '>') i += 1; + if (i >= size) return 0; + return i + 1; +} + +/* parse_inline • parses inline markdown elements */ +static void +parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size) +{ + size_t i = 0, end = 0; + char_trigger action = 0; + struct buf work = { 0, 0, 0, 0, 0 }; + + if (rndr->work.size > rndr->max_nesting) + return; + + while (i < size) { + /* copying inactive chars into the output */ + while (end < size && (action = rndr->active_char[(unsigned char)data[end]]) == 0) { + end++; + } + + if (rndr->make.normal_text) { + work.data = data + i; + work.size = end - i; + rndr->make.normal_text(ob, &work, rndr->make.opaque); + } + else + bufput(ob, data + i, end - i); + + if (end >= size) break; + i = end; + + /* calling the trigger */ + end = action(ob, rndr, data + i, i, size - i); + if (!end) /* no action from the callback */ + end = i + 1; + else { + i += end; + end = i; + } + } +} + +/* find_emph_char • looks for the next emph char, skipping other constructs */ +static size_t +find_emph_char(char *data, size_t size, char c) +{ + size_t i = 1; + + while (i < size) { + while (i < size && data[i] != c + && data[i] != '`' && data[i] != '[') + i += 1; + if (data[i] == c) return i; + + /* not counting escaped chars */ + if (i && data[i - 1] == '\\') { i += 1; continue; } + + /* skipping a code span */ + if (data[i] == '`') { + size_t tmp_i = 0; + i += 1; + while (i < size && data[i] != '`') { + if (!tmp_i && data[i] == c) tmp_i = i; + i += 1; } + if (i >= size) return tmp_i; + i += 1; } + + /* skipping a link */ + else if (data[i] == '[') { + size_t tmp_i = 0; + char cc; + i += 1; + while (i < size && data[i] != ']') { + if (!tmp_i && data[i] == c) tmp_i = i; + i += 1; } + i += 1; + while (i < size && (data[i] == ' ' + || data[i] == '\t' || data[i] == '\n')) + i += 1; + if (i >= size) return tmp_i; + if (data[i] != '[' && data[i] != '(') { /* not a link*/ + if (tmp_i) return tmp_i; + else continue; } + cc = data[i]; + i += 1; + while (i < size && data[i] != cc) { + if (!tmp_i && data[i] == c) tmp_i = i; + i += 1; } + if (i >= size) return tmp_i; + i += 1; } } + return 0; +} + +/* parse_emph1 • parsing single emphase */ +/* closed by a symbol not preceded by whitespace and not followed by symbol */ +static size_t +parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c) +{ + size_t i = 0, len; + struct buf *work = 0; + int r; + + if (!rndr->make.emphasis) return 0; + + /* skipping one symbol if coming from emph3 */ + if (size > 1 && data[0] == c && data[1] == c) i = 1; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (!len) return 0; + i += len; + if (i >= size) return 0; + + if (i + 1 < size && data[i + 1] == c) { + i += 1; + continue; + } + + if (data[i] == c && !isspace(data[i - 1])) { + + if ((rndr->ext_flags & MKDEXT_LAX_EMPHASIS) == 0) { + if (!(i + 1 == size || isspace(data[i + 1]) || ispunct(data[i + 1]))) + continue; + } + + work = rndr_newbuf(rndr); + parse_inline(work, rndr, data, i); + r = rndr->make.emphasis(ob, work, rndr->make.opaque); + rndr_popbuf(rndr); + return r ? i + 1 : 0; + } + } + + return 0; +} + +/* parse_emph2 • parsing single emphase */ +static size_t +parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c) +{ + int (*render_method)(struct buf *ob, struct buf *text, void *opaque); + size_t i = 0, len; + struct buf *work = 0; + int r; + + render_method = (c == '~') ? rndr->make.strikethrough : rndr->make.double_emphasis; + + if (!render_method) + return 0; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (!len) return 0; + i += len; + + if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !isspace(data[i - 1])) { + work = rndr_newbuf(rndr); + parse_inline(work, rndr, data, i); + r = render_method(ob, work, rndr->make.opaque); + rndr_popbuf(rndr); + return r ? i + 2 : 0; + } + i++; + } + return 0; +} + +/* parse_emph3 • parsing single emphase */ +/* finds the first closing tag, and delegates to the other emph */ +static size_t +parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c) +{ + size_t i = 0, len; + int r; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (!len) return 0; + i += len; + + /* skip whitespace preceded symbols */ + if (data[i] != c || isspace(data[i - 1])) + continue; + + if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->make.triple_emphasis) { + /* triple symbol found */ + struct buf *work = rndr_newbuf(rndr); + + parse_inline(work, rndr, data, i); + r = rndr->make.triple_emphasis(ob, work, rndr->make.opaque); + rndr_popbuf(rndr); + return r ? i + 3 : 0; + + } else if (i + 1 < size && data[i + 1] == c) { + /* double symbol found, handing over to emph1 */ + len = parse_emph1(ob, rndr, data - 2, size + 2, c); + if (!len) return 0; + else return len - 2; + + } else { + /* single symbol found, handing over to emph2 */ + len = parse_emph2(ob, rndr, data - 1, size + 1, c); + if (!len) return 0; + else return len - 1; + } + } + return 0; +} + +/* char_emphasis • single and double emphasis parsing */ +static size_t +char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + char c = data[0]; + size_t ret; + + if (size > 2 && data[1] != c) { + /* whitespace cannot follow an opening emphasis; + * strikethrough only takes two characters '~~' */ + if (c == '~' || isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0) + return 0; + + return ret + 1; + } + + if (size > 3 && data[1] == c && data[2] != c) { + if (isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0) + return 0; + + return ret + 2; + } + + if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { + if (c == '~' || isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0) + return 0; + + return ret + 3; + } + + return 0; +} + + +/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */ +static size_t +char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + if (offset < 2 || data[-1] != ' ' || data[-2] != ' ') + return 0; + + /* removing the last space from ob and rendering */ + while (ob->size && ob->data[ob->size - 1] == ' ') + ob->size--; + + return rndr->make.linebreak(ob, rndr->make.opaque) ? 1 : 0; +} + + +/* char_codespan • '`' parsing a code span (assuming codespan != 0) */ +static size_t +char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + size_t end, nb = 0, i, f_begin, f_end; + + /* counting the number of backticks in the delimiter */ + while (nb < size && data[nb] == '`') + nb++; + + /* finding the next delimiter */ + i = 0; + for (end = nb; end < size && i < nb; end++) { + if (data[end] == '`') i++; + else i = 0; + } + + if (i < nb && end >= size) + return 0; /* no matching delimiter */ + + /* trimming outside whitespaces */ + f_begin = nb; + while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t')) + f_begin++; + + f_end = end - nb; + while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t')) + f_end--; + + /* real code span */ + if (f_begin < f_end) { + struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 }; + if (!rndr->make.codespan(ob, &work, rndr->make.opaque)) + end = 0; + } else { + if (!rndr->make.codespan(ob, 0, rndr->make.opaque)) + end = 0; + } + + return end; +} + + +/* char_escape • '\\' backslash escape */ +static size_t +char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + struct buf work = { 0, 0, 0, 0, 0 }; + + if (size > 1) { + if (rndr->make.normal_text) { + work.data = data + 1; + work.size = 1; + rndr->make.normal_text(ob, &work, rndr->make.opaque); + } + else bufputc(ob, data[1]); + } + + return 2; +} + +/* char_entity • '&' escaped when it doesn't belong to an entity */ +/* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */ +static size_t +char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + size_t end = 1; + struct buf work; + + if (end < size && data[end] == '#') + end++; + + while (end < size && isalnum(data[end])) + end++; + + if (end < size && data[end] == ';') + end += 1; /* real entity */ + else + return 0; /* lone '&' */ + + if (rndr->make.entity) { + work.data = data; + work.size = end; + rndr->make.entity(ob, &work, rndr->make.opaque); + } + else bufput(ob, data, end); + + return end; +} + +/* char_langle_tag • '<' when tags or autolinks are allowed */ +static size_t +char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + enum mkd_autolink altype = MKDA_NOT_AUTOLINK; + size_t end = tag_length(data, size, &altype); + struct buf work = { data, end, 0, 0, 0 }; + int ret = 0; + + if (end > 2) { + if (rndr->make.autolink && altype != MKDA_NOT_AUTOLINK) { + work.data = data + 1; + work.size = end - 2; + ret = rndr->make.autolink(ob, &work, altype, rndr->make.opaque); + } + else if (rndr->make.raw_html_tag) + ret = rndr->make.raw_html_tag(ob, &work, rndr->make.opaque); + } + + if (!ret) return 0; + else return end; +} + +static size_t +char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + struct buf work = { data, 0, 0, 0, 0 }; + + if (offset > 0 && !isspace(data[-1])) + return 0; + + if (!is_safe_link(data, size)) + return 0; + + while (work.size < size && !isspace(data[work.size])) + work.size++; + + if (rndr->make.autolink) + rndr->make.autolink(ob, &work, MKDA_NORMAL, rndr->make.opaque); + + return work.size; +} + +/* char_link • '[': parsing a link or an image */ +static size_t +char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + int is_img = (offset && data[-1] == '!'), level; + size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0; + struct buf *content = 0; + struct buf *link = 0; + struct buf *title = 0; + size_t org_work_size = rndr->work.size; + int text_has_nl = 0, ret = 0; + + /* checking whether the correct renderer exists */ + if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link)) + goto cleanup; + + /* looking for the matching closing bracket */ + for (level = 1; i < size; i += 1) { + if (data[i] == '\n') + text_has_nl = 1; + + else if (data[i - 1] == '\\') + continue; + + else if (data[i] == '[') + level++; + + else if (data[i] == ']') { + level--; + if (level <= 0) + break; + } + } + + if (i >= size) + goto cleanup; + + txt_e = i; + i += 1; + + /* skip any amount of whitespace or newline */ + /* (this is much more laxist than original markdown syntax) */ + while (i < size && isspace(data[i])) + i++; + + /* inline style link */ + if (i < size && data[i] == '(') { + /* skipping initial whitespace */ + i += 1; + + while (i < size && isspace(data[i])) + i++; + + link_b = i; + + /* looking for link end: ' " ) */ + while (i < size && data[i] != '\'' && data[i] != '"' && + (data[i] != ')' || data[i - 1] == '\\')) + i++; + + if (i >= size) goto cleanup; + link_e = i; + + /* looking for title end if present */ + if (data[i] == '\'' || data[i] == '"') { + i++; + title_b = i; + + while (i < size && (data[i] != ')' || data[i - 1] == '\\')) i++; + if (i >= size) goto cleanup; + + /* skipping whitespaces after title */ + title_e = i - 1; + while (title_e > title_b && isspace(data[title_e])) + title_e--; + + /* checking for closing quote presence */ + if (data[title_e] != '\'' && data[title_e] != '"') { + title_b = title_e = 0; + link_e = i; + } + } + + /* remove whitespace at the end of the link */ + while (link_e > link_b && isspace(data[link_e - 1])) + link_e--; + + /* remove optional angle brackets around the link */ + if (data[link_b] == '<') link_b++; + if (data[link_e - 1] == '>') link_e--; + + /* building escaped link and title */ + if (link_e > link_b) { + link = rndr_newbuf(rndr); + bufput(link, data + link_b, link_e - link_b); + } + + if (title_e > title_b) { + title = rndr_newbuf(rndr); + bufput(title, data + title_b, title_e - title_b); + } + + i++; + } + + /* reference style link */ + else if (i < size && data[i] == '[') { + struct buf id = { 0, 0, 0, 0, 0 }; + struct link_ref *lr; + + /* looking for the id */ + i += 1; + link_b = i; + while (i < size && data[i] != ']') i++; + if (i >= size) goto cleanup; + link_e = i; + + /* finding the link_ref */ + if (link_b == link_e) { + if (text_has_nl) { + struct buf *b = rndr_newbuf(rndr); + size_t j; + + for (j = 1; j < txt_e; j++) { + if (data[j] != '\n') + bufputc(b, data[j]); + else if (data[j - 1] != ' ') + bufputc(b, ' '); + } + + id.data = b->data; + id.size = b->size; + } else { + id.data = data + 1; + id.size = txt_e - 1; + } + } else { + id.data = data + link_b; + id.size = link_e - link_b; + } + + lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref); + if (!lr) goto cleanup; + + /* keeping link and title from link_ref */ + link = lr->link; + title = lr->title; + i += 1; + } + + /* shortcut reference style link */ + else { + struct buf id = { 0, 0, 0, 0, 0 }; + struct link_ref *lr; + + /* crafting the id */ + if (text_has_nl) { + struct buf *b = rndr_newbuf(rndr); + size_t j; + + for (j = 1; j < txt_e; j++) { + if (data[j] != '\n') + bufputc(b, data[j]); + else if (data[j - 1] != ' ') + bufputc(b, ' '); + } + + id.data = b->data; + id.size = b->size; + } else { + id.data = data + 1; + id.size = txt_e - 1; + } + + /* finding the link_ref */ + lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref); + if (!lr) goto cleanup; + + /* keeping link and title from link_ref */ + link = lr->link; + title = lr->title; + + /* rewinding the whitespace */ + i = txt_e + 1; + } + + /* building content: img alt is escaped, link content is parsed */ + if (txt_e > 1) { + content = rndr_newbuf(rndr); + if (is_img) bufput(content, data + 1, txt_e - 1); + else parse_inline(content, rndr, data + 1, txt_e - 1); + } + + /* calling the relevant rendering function */ + if (is_img) { + if (ob->size && ob->data[ob->size - 1] == '!') + ob->size -= 1; + + ret = rndr->make.image(ob, link, title, content, rndr->make.opaque); + } else + ret = rndr->make.link(ob, link, title, content, rndr->make.opaque); + + /* cleanup */ +cleanup: + rndr->work.size = (int)org_work_size; + return ret ? i : 0; +} + + + +/********************************* + * BLOCK-LEVEL PARSING FUNCTIONS * + *********************************/ + +/* is_empty • returns the line length when it is empty, 0 otherwise */ +static size_t +is_empty(char *data, size_t size) +{ + size_t i; + for (i = 0; i < size && data[i] != '\n'; i += 1) + if (data[i] != ' ' && data[i] != '\t') return 0; + return i + 1; +} + +/* is_hrule • returns whether a line is a horizontal rule */ +static int +is_hrule(char *data, size_t size) +{ + size_t i = 0, n = 0; + char c; + + /* skipping initial spaces */ + if (size < 3) return 0; + if (data[0] == ' ') { i += 1; + if (data[1] == ' ') { i += 1; + if (data[2] == ' ') { i += 1; } } } + + /* looking at the hrule char */ + if (i + 2 >= size + || (data[i] != '*' && data[i] != '-' && data[i] != '_')) + return 0; + c = data[i]; + + /* the whole line must be the char or whitespace */ + while (i < size && data[i] != '\n') { + if (data[i] == c) n += 1; + else if (data[i] != ' ' && data[i] != '\t') + return 0; + i += 1; } + + return n >= 3; +} + +/* check if a line is a code fence; return its size if it is */ +static size_t +is_codefence(char *data, size_t size, struct buf *syntax) +{ + size_t i = 0, n = 0; + char c; + + /* skipping initial spaces */ + if (size < 3) return 0; + if (data[0] == ' ') { i += 1; + if (data[1] == ' ') { i += 1; + if (data[2] == ' ') { i += 1; } } } + + /* looking at the hrule char */ + if (i + 2 >= size || !(data[i] == '~' || data[i] == '`')) + return 0; + + c = data[i]; + + /* the whole line must be the char or whitespace */ + while (i < size && data[i] == c) { + n++; i++; + } + + if (n < 3) + return 0; + + if (syntax != NULL) { + size_t syn = 0; + + while (i < size && (data[i] == ' ' || data[i] == '\t')) + i++; + + syntax->data = data + i; + + if (i < size && data[i] == '{') { + i++; syntax->data++; + + while (i < size && data[i] != '}' && data[i] != '\n') { + syn++; i++; + } + + if (i == size || data[i] != '}') + return 0; + + /* strip all whitespace at the beggining and the end + * of the {} block */ + while (syn > 0 && isspace(syntax->data[0])) { + syntax->data++; syn--; + } + + while (syn > 0 && isspace(syntax->data[syn - 1])) + syn--; + + i++; + } else { + while (i < size && !isspace(data[i])) { + syn++; i++; + } + } + + syntax->size = syn; + } + + while (i < size && data[i] != '\n') { + if (!isspace(data[i])) + return 0; + + i++; + } + + return i + 1; +} + +/* is_headerline • returns whether the line is a setext-style hdr underline */ +static int +is_headerline(char *data, size_t size) +{ + size_t i = 0; + + /* test of level 1 header */ + if (data[i] == '=') { + for (i = 1; i < size && data[i] == '='; i += 1); + while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1; + return (i >= size || data[i] == '\n') ? 1 : 0; } + + /* test of level 2 header */ + if (data[i] == '-') { + for (i = 1; i < size && data[i] == '-'; i += 1); + while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1; + return (i >= size || data[i] == '\n') ? 2 : 0; } + + return 0; +} + +/* prefix_quote • returns blockquote prefix length */ +static size_t +prefix_quote(char *data, size_t size) +{ + size_t i = 0; + if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == '>') { + if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t')) + return i + 2; + else return i + 1; } + else return 0; +} + +/* prefix_code • returns prefix length for block code*/ +static size_t +prefix_code(char *data, size_t size) +{ + if (size > 0 && data[0] == '\t') return 1; + if (size > 3 && data[0] == ' ' && data[1] == ' ' + && data[2] == ' ' && data[3] == ' ') return 4; + return 0; +} + +/* prefix_oli • returns ordered list item prefix */ +static size_t +prefix_oli(char *data, size_t size) +{ + size_t i = 0; + if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i += 1; + if (i >= size || data[i] < '0' || data[i] > '9') return 0; + while (i < size && data[i] >= '0' && data[i] <= '9') i += 1; + if (i + 1 >= size || data[i] != '.' + || (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0; + return i + 2; +} + +/* prefix_uli • returns ordered list item prefix */ +static size_t +prefix_uli(char *data, size_t size) +{ + size_t i = 0; + if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i += 1; + if (i + 1 >= size + || (data[i] != '*' && data[i] != '+' && data[i] != '-') + || (data[i + 1] != ' ' && data[i + 1] != '\t')) + return 0; + return i + 2; +} + + +/* parse_block • parsing of one block, returning next char to parse */ +static void parse_block(struct buf *ob, struct render *rndr, + char *data, size_t size); + + +/* parse_blockquote • hanldes parsing of a blockquote fragment */ +static size_t +parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size) +{ + size_t beg, end = 0, pre, work_size = 0; + char *work_data = 0; + struct buf *out = 0; + + out = rndr_newbuf(rndr); + beg = 0; + while (beg < size) { + for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); + + pre = prefix_quote(data + beg, end - beg); + + if (pre) + beg += pre; /* skipping prefix */ + + /* empty line followed by non-quote line */ + else if (is_empty(data + beg, end - beg) && + (end >= size || (prefix_quote(data + end, size - end) == 0 && + !is_empty(data + end, size - end)))) + break; + + if (beg < end) { /* copy into the in-place working buffer */ + /* bufput(work, data + beg, end - beg); */ + if (!work_data) + work_data = data + beg; + else if (data + beg != work_data + work_size) + memmove(work_data + work_size, data + beg, end - beg); + work_size += end - beg; + } + beg = end; + } + + parse_block(out, rndr, work_data, work_size); + if (rndr->make.blockquote) + rndr->make.blockquote(ob, out, rndr->make.opaque); + rndr_popbuf(rndr); + return end; +} + +static size_t +parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render); + +/* parse_blockquote • hanldes parsing of a regular paragraph */ +static size_t +parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size) +{ + size_t i = 0, end = 0; + int level = 0; + struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */ + + while (i < size) { + for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */; + + if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0) + break; + + if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) { + if (data[i] == '<' && rndr->make.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) { + end = i; + break; + } + } + + if (data[i] == '#' || is_hrule(data + i, size - i)) { + end = i; + break; + } + + i = end; + } + + work.size = i; + while (work.size && data[work.size - 1] == '\n') + work.size--; + + if (!level) { + struct buf *tmp = rndr_newbuf(rndr); + parse_inline(tmp, rndr, work.data, work.size); + if (rndr->make.paragraph) + rndr->make.paragraph(ob, tmp, rndr->make.opaque); + rndr_popbuf(rndr); + } else { + struct buf *header_work; + + if (work.size) { + size_t beg; + i = work.size; + work.size -= 1; + + while (work.size && data[work.size] != '\n') + work.size -= 1; + + beg = work.size + 1; + while (work.size && data[work.size - 1] == '\n') + work.size -= 1; + + if (work.size > 0) { + struct buf *tmp = rndr_newbuf(rndr); + parse_inline(tmp, rndr, work.data, work.size); + + if (rndr->make.paragraph) + rndr->make.paragraph(ob, tmp, rndr->make.opaque); + + rndr_popbuf(rndr); + work.data += beg; + work.size = i - beg; + } + else work.size = i; + } + + header_work = rndr_newbuf(rndr); + parse_inline(header_work, rndr, work.data, work.size); + + if (rndr->make.header) + rndr->make.header(ob, header_work, (int)level, rndr->make.opaque); + + rndr_popbuf(rndr); + } + + return end; +} + +/* parse_fencedcode • hanldes parsing of a block-level code fragment */ +static size_t +parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size) +{ + size_t beg, end; + struct buf *work = 0; + struct buf lang = { 0, 0, 0, 0, 0 }; + + beg = is_codefence(data, size, &lang); + if (beg == 0) return 0; + + work = rndr_newbuf(rndr); + + while (beg < size) { + size_t fence_end; + + fence_end = is_codefence(data + beg, size - beg, NULL); + if (fence_end != 0) { + beg += fence_end; + break; + } + + for (end = beg + 1; end < size && data[end - 1] != '\n'; end += 1); + + if (beg < end) { + /* verbatim copy to the working buffer, + escaping entities */ + if (is_empty(data + beg, end - beg)) + bufputc(work, '\n'); + else bufput(work, data + beg, end - beg); + } + beg = end; + } + + if (work->size && work->data[work->size - 1] != '\n') + bufputc(work, '\n'); + + if (rndr->make.blockcode) + rndr->make.blockcode(ob, work, lang.size ? &lang : NULL, rndr->make.opaque); + + rndr_popbuf(rndr); + return beg; +} + +static size_t +parse_blockcode(struct buf *ob, struct render *rndr, char *data, size_t size) +{ + size_t beg, end, pre; + struct buf *work = 0; + + work = rndr_newbuf(rndr); + + beg = 0; + while (beg < size) { + for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {}; + pre = prefix_code(data + beg, end - beg); + + if (pre) + beg += pre; /* skipping prefix */ + else if (!is_empty(data + beg, end - beg)) + /* non-empty non-prefixed line breaks the pre */ + break; + + if (beg < end) { + /* verbatim copy to the working buffer, + escaping entities */ + if (is_empty(data + beg, end - beg)) + bufputc(work, '\n'); + else bufput(work, data + beg, end - beg); + } + beg = end; + } + + while (work->size && work->data[work->size - 1] == '\n') + work->size -= 1; + + bufputc(work, '\n'); + + if (rndr->make.blockcode) + rndr->make.blockcode(ob, work, NULL, rndr->make.opaque); + + rndr_popbuf(rndr); + return beg; +} + +/* parse_listitem • parsing of a single list item */ +/* assuming initial prefix is already removed */ +static size_t +parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int *flags) +{ + struct buf *work = 0, *inter = 0; + size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i; + int in_empty = 0, has_inside_empty = 0; + + /* keeping book of the first indentation prefix */ + while (orgpre < 3 && orgpre < size && data[orgpre] == ' ') + orgpre++; + + beg = prefix_uli(data, size); + if (!beg) + beg = prefix_oli(data, size); + + if (!beg) + return 0; + + /* skipping to the beginning of the following line */ + end = beg; + while (end < size && data[end - 1] != '\n') + end++; + + /* getting working buffers */ + work = rndr_newbuf(rndr); + inter = rndr_newbuf(rndr); + + /* putting the first line into the working buffer */ + bufput(work, data + beg, end - beg); + beg = end; + + /* process the following lines */ + while (beg < size) { + end++; + + while (end < size && data[end - 1] != '\n') + end++; + + /* process an empty line */ + if (is_empty(data + beg, end - beg)) { + in_empty = 1; + beg = end; + continue; + } + + /* calculating the indentation */ + i = 0; + while (i < 4 && beg + i < end && data[beg + i] == ' ') + i++; + + pre = i; + if (data[beg] == '\t') { i = 1; pre = 8; } + + /* checking for a new item */ + if ((prefix_uli(data + beg + i, end - beg - i) && + !is_hrule(data + beg + i, end - beg - i)) || + prefix_oli(data + beg + i, end - beg - i)) { + if (in_empty) + has_inside_empty = 1; + + if (pre == orgpre) /* the following item must have */ + break; /* the same indentation */ + + if (!sublist) + sublist = work->size; + } + /* joining only indented stuff after empty lines */ + else if (in_empty && i < 4 && data[beg] != '\t') { + *flags |= MKD_LI_END; + break; + } + else if (in_empty) { + bufputc(work, '\n'); + has_inside_empty = 1; + } + + in_empty = 0; + + /* adding the line without prefix into the working buffer */ + bufput(work, data + beg + i, end - beg - i); + beg = end; + } + + /* render of li contents */ + if (has_inside_empty) + *flags |= MKD_LI_BLOCK; + + if (*flags & MKD_LI_BLOCK) { + /* intermediate render of block li */ + if (sublist && sublist < work->size) { + parse_block(inter, rndr, work->data, sublist); + parse_block(inter, rndr, work->data + sublist, work->size - sublist); + } + else + parse_block(inter, rndr, work->data, work->size); + } else { + /* intermediate render of inline li */ + if (sublist && sublist < work->size) { + parse_inline(inter, rndr, work->data, sublist); + parse_block(inter, rndr, work->data + sublist, work->size - sublist); + } + else + parse_inline(inter, rndr, work->data, work->size); + } + + /* render of li itself */ + if (rndr->make.listitem) + rndr->make.listitem(ob, inter, *flags, rndr->make.opaque); + + rndr_popbuf(rndr); + rndr_popbuf(rndr); + return beg; +} + + +/* parse_list • parsing ordered or unordered list block */ +static size_t +parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int flags) +{ + struct buf *work = 0; + size_t i = 0, j; + + work = rndr_newbuf(rndr); + + while (i < size) { + j = parse_listitem(work, rndr, data + i, size - i, &flags); + i += j; + + if (!j || (flags & MKD_LI_END)) + break; + } + + if (rndr->make.list) + rndr->make.list(ob, work, flags, rndr->make.opaque); + rndr_popbuf(rndr); + return i; +} + + +/* parse_atxheader • parsing of atx-style headers */ +static size_t +parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size) +{ + size_t level = 0; + size_t i, end, skip; + + if (!size || data[0] != '#') + return 0; + + while (level < size && level < 6 && data[level] == '#') + level++; + + for (i = level; i < size && (data[i] == ' ' || data[i] == '\t'); i++); + + for (end = i; end < size && data[end] != '\n'; end++); + skip = end; + + while (end && data[end - 1] == '#') + end--; + + while (end && (data[end - 1] == ' ' || data[end - 1] == '\t')) + end--; + + if (end > i) { + struct buf *work = rndr_newbuf(rndr); + + parse_inline(work, rndr, data + i, end - i); + + if (rndr->make.header) + rndr->make.header(ob, work, (int)level, rndr->make.opaque); + + rndr_popbuf(rndr); + } + + return skip; +} + + +/* htmlblock_end • checking end of HTML block : [ \t]*\n[ \t*]\n */ +/* returns the length on match, 0 otherwise */ +static size_t +htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size) +{ + size_t i, w; + + /* assuming data[0] == '<' && data[1] == '/' already tested */ + + /* checking tag is a match */ + if (tag->size + 3 >= size + || strncasecmp(data + 2, tag->text, tag->size) + || data[tag->size + 2] != '>') + return 0; + + /* checking white lines */ + i = tag->size + 3; + w = 0; + if (i < size && (w = is_empty(data + i, size - i)) == 0) + return 0; /* non-blank after tag */ + i += w; + w = 0; + + if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) { + if (i < size) + w = is_empty(data + i, size - i); + } else { + if (i < size && (w = is_empty(data + i, size - i)) == 0) + return 0; /* non-blank line after tag line */ + } + + return i + w; +} + + +/* parse_htmlblock • parsing of inline HTML block */ +static size_t +parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render) +{ + size_t i, j = 0; + struct html_tag *curtag; + int found; + struct buf work = { data, 0, 0, 0, 0 }; + + /* identification of the opening tag */ + if (size < 2 || data[0] != '<') return 0; + curtag = find_block_tag(data + 1, size - 1); + + /* handling of special cases */ + if (!curtag) { + + /* HTML comment, laxist form */ + if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { + i = 5; + + while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) + i++; + + i++; + + if (i < size) + j = is_empty(data + i, size - i); + + if (j) { + work.size = i + j; + if (do_render && rndr->make.blockhtml) + rndr->make.blockhtml(ob, &work, rndr->make.opaque); + return work.size; + } + } + + /* HR, which is the only self-closing block tag considered */ + if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { + i = 3; + while (i < size && data[i] != '>') + i += 1; + + if (i + 1 < size) { + i += 1; + j = is_empty(data + i, size - i); + if (j) { + work.size = i + j; + if (do_render && rndr->make.blockhtml) + rndr->make.blockhtml(ob, &work, rndr->make.opaque); + return work.size; + } + } + } + + /* no special case recognised */ + return 0; + } + + /* looking for an unindented matching closing tag */ + /* followed by a blank line */ + i = 1; + found = 0; + + /* if not found, trying a second pass looking for indented match */ + /* but not if tag is "ins" or "del" (following original Markdown.pl) */ + if (curtag != INS_TAG && curtag != DEL_TAG) { + i = 1; + while (i < size) { + i++; + while (i < size && !(data[i - 1] == '<' && data[i] == '/')) + i++; + + if (i + 2 + curtag->size >= size) + break; + + j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1); + + if (j) { + i += j - 1; + found = 1; + break; + } + } + } + + if (!found) return 0; + + /* the end of the block has been found */ + work.size = i; + if (do_render && rndr->make.blockhtml) + rndr->make.blockhtml(ob, &work, rndr->make.opaque); + + return i; +} + +static void +parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, size_t columns, int *col_data) +{ + size_t i = 0, col; + struct buf *row_work = 0; + + row_work = rndr_newbuf(rndr); + + if (i < size && data[i] == '|') + i++; + + for (col = 0; col < columns && i < size; ++col) { + size_t cell_start, cell_end; + struct buf *cell_work; + + cell_work = rndr_newbuf(rndr); + + while (i < size && isspace(data[i])) + i++; + + cell_start = i; + + while (i < size && data[i] != '|') + i++; + + cell_end = i - 1; + + while (cell_end > cell_start && isspace(data[cell_end])) + cell_end--; + + parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start); + if (rndr->make.table_cell) + rndr->make.table_cell(row_work, cell_work, col_data ? col_data[col] : 0, rndr->make.opaque); + + rndr_popbuf(rndr); + i++; + } + + for (; col < columns; ++col) { + struct buf empty_cell = {0, 0, 0, 0, 0}; + if (rndr->make.table_cell) + rndr->make.table_cell(row_work, &empty_cell, col_data ? col_data[col] : 0, rndr->make.opaque); + } + + if (rndr->make.table_row) + rndr->make.table_row(ob, row_work, rndr->make.opaque); + + rndr_popbuf(rndr); +} + +static size_t +parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size, size_t *columns, int **column_data) +{ + int pipes; + size_t i = 0, col, header_end, under_end; + + pipes = 0; + while (i < size && data[i] != '\n') + if (data[i++] == '|') + pipes++; + + if (i == size || pipes == 0) + return 0; + + header_end = i; + + if (data[0] == '|') + pipes--; + + if (i > 2 && data[i - 1] == '|') + pipes--; + + *columns = pipes + 1; + *column_data = calloc(*columns, sizeof(int)); + + /* Parse the header underline */ + i++; + if (i < size && data[i] == '|') + i++; + + under_end = i; + while (under_end < size && data[under_end] != '\n') + under_end++; + + for (col = 0; col < *columns && i < under_end; ++col) { + if (data[i] == ':') { + i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L; + } + + while (i < under_end && data[i] == '-') + i++; + + if (i < under_end && data[i] == ':') { + i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R; + } + + if (i < under_end && data[i] != '|') + break; + + i++; + } + + if (col < *columns) + return 0; + + parse_table_row(ob, rndr, data, header_end, *columns, *column_data); + return under_end + 1; +} + +static size_t +parse_table(struct buf *ob, struct render *rndr, char *data, size_t size) +{ + size_t i; + + struct buf *header_work = 0; + struct buf *body_work = 0; + + size_t columns; + int *col_data = NULL; + + header_work = rndr_newbuf(rndr); + body_work = rndr_newbuf(rndr); + + i = parse_table_header(header_work, rndr, data, size, &columns, &col_data); + if (i > 0) { + + while (i < size) { + size_t row_start; + int pipes = 0; + + row_start = i; + + while (i < size && data[i] != '\n') + if (data[i++] == '|') + pipes++; + + if (pipes == 0 || i == size) { + i = row_start; + break; + } + + parse_table_row(body_work, rndr, data + row_start, i - row_start, columns, col_data); + i++; + } + + if (rndr->make.table) + rndr->make.table(ob, header_work, body_work, rndr->make.opaque); + } + + free(col_data); + rndr_popbuf(rndr); + rndr_popbuf(rndr); + return i; +} + +/* parse_block • parsing of one block, returning next char to parse */ +static void +parse_block(struct buf *ob, struct render *rndr, char *data, size_t size) +{ + size_t beg, end, i; + char *txt_data; + beg = 0; + + if (rndr->work.size > rndr->max_nesting) + return; + + while (beg < size) { + txt_data = data + beg; + end = size - beg; + + if (data[beg] == '#') + beg += parse_atxheader(ob, rndr, txt_data, end); + + else if (data[beg] == '<' && rndr->make.blockhtml && + (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0) + beg += i; + + else if ((i = is_empty(txt_data, end)) != 0) + beg += i; + + else if (is_hrule(txt_data, end)) { + if (rndr->make.hrule) + rndr->make.hrule(ob, rndr->make.opaque); + + while (beg < size && data[beg] != '\n') + beg++; + + beg++; + } + + else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && + (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0) + beg += i; + + else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 && + (i = parse_table(ob, rndr, txt_data, end)) != 0) + beg += i; + + else if (prefix_quote(txt_data, end)) + beg += parse_blockquote(ob, rndr, txt_data, end); + + else if (prefix_code(txt_data, end)) + beg += parse_blockcode(ob, rndr, txt_data, end); + + else if (prefix_uli(txt_data, end)) + beg += parse_list(ob, rndr, txt_data, end, 0); + + else if (prefix_oli(txt_data, end)) + beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED); + + else + beg += parse_paragraph(ob, rndr, txt_data, end); + } +} + + + +/********************* + * REFERENCE PARSING * + *********************/ + +/* is_ref • returns whether a line is a reference or not */ +static int +is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs) +{ +/* int n; */ + size_t i = 0; + size_t id_offset, id_end; + size_t link_offset, link_end; + size_t title_offset, title_end; + size_t line_end; + struct link_ref *lr; +/* struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */ + + /* up to 3 optional leading spaces */ + if (beg + 3 >= end) return 0; + if (data[beg] == ' ') { i = 1; + if (data[beg + 1] == ' ') { i = 2; + if (data[beg + 2] == ' ') { i = 3; + if (data[beg + 3] == ' ') return 0; } } } + i += beg; + + /* id part: anything but a newline between brackets */ + if (data[i] != '[') return 0; + i += 1; + id_offset = i; + while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') + i += 1; + if (i >= end || data[i] != ']') return 0; + id_end = i; + + /* spacer: colon (space | tab)* newline? (space | tab)* */ + i += 1; + if (i >= end || data[i] != ':') return 0; + i += 1; + while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; + if (i < end && (data[i] == '\n' || data[i] == '\r')) { + i += 1; + if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; } + while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; + if (i >= end) return 0; + + /* link: whitespace-free sequence, optionally between angle brackets */ + if (data[i] == '<') i += 1; + link_offset = i; + while (i < end && data[i] != ' ' && data[i] != '\t' + && data[i] != '\n' && data[i] != '\r') i += 1; + if (data[i - 1] == '>') link_end = i - 1; + else link_end = i; + + /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ + while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; + if (i < end && data[i] != '\n' && data[i] != '\r' + && data[i] != '\'' && data[i] != '"' && data[i] != '(') + return 0; + line_end = 0; + /* computing end-of-line */ + if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i; + if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') + line_end = i + 1; + + /* optional (space|tab)* spacer after a newline */ + if (line_end) { + i = line_end + 1; + while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; } + + /* optional title: any non-newline sequence enclosed in '"() + alone on its line */ + title_offset = title_end = 0; + if (i + 1 < end + && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { + i += 1; + title_offset = i; + /* looking for EOL */ + while (i < end && data[i] != '\n' && data[i] != '\r') i += 1; + if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') + title_end = i + 1; + else title_end = i; + /* stepping back */ + i -= 1; + while (i > title_offset && (data[i] == ' ' || data[i] == '\t')) + i -= 1; + if (i > title_offset + && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) { + line_end = title_end; + title_end = i; } } + if (!line_end) return 0; /* garbage after the link */ + + /* a valid ref has been found, filling-in return structures */ + if (last) *last = line_end; + if (!refs) return 1; + lr = arr_item(refs, arr_newitem(refs)); + lr->id = bufnew(id_end - id_offset); + bufput(lr->id, data + id_offset, id_end - id_offset); + lr->link = bufnew(link_end - link_offset); + bufput(lr->link, data + link_offset, link_end - link_offset); + if (title_end > title_offset) { + lr->title = bufnew(title_end - title_offset); + bufput(lr->title, data + title_offset, + title_end - title_offset); } + else lr->title = 0; + return 1; +} + +static void expand_tabs(struct buf *ob, const char *line, size_t size) +{ + size_t i = 0, tab = 0; + + while (i < size) { + size_t org = i; + + while (i < size && line[i] != '\t') { + i++; tab++; + } + + if (i > org) + bufput(ob, line + org, i - org); + + if (i >= size) + break; + + do { + bufputc(ob, ' '); tab++; + } while (tab % 4); + + i++; + } +} + +/********************** + * EXPORTED FUNCTIONS * + **********************/ + +/* markdown • parses the input buffer and renders it into the output buffer */ +void +ups_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer, unsigned int extensions) { + struct link_ref *lr; + struct buf *text; + size_t i, beg, end; + struct render rndr; + + /* filling the render structure */ + if (!rndrer) + return; + + text = bufnew(TEXT_UNIT); + if (!text) + return; + + rndr.make = *rndrer; + arr_init(&rndr.refs, sizeof (struct link_ref)); + parr_init(&rndr.work); + + for (i = 0; i < 256; i += 1) + rndr.active_char[i] = 0; + + if (rndr.make.emphasis || rndr.make.double_emphasis || rndr.make.triple_emphasis) { + rndr.active_char['*'] = char_emphasis; + rndr.active_char['_'] = char_emphasis; + if (extensions & MKDEXT_STRIKETHROUGH) + rndr.active_char['~'] = char_emphasis; + } + + if (rndr.make.codespan) + rndr.active_char['`'] = char_codespan; + + if (rndr.make.linebreak) + rndr.active_char['\n'] = char_linebreak; + + if (rndr.make.image || rndr.make.link) + rndr.active_char['['] = char_link; + + rndr.active_char['<'] = char_langle_tag; + rndr.active_char['\\'] = char_escape; + rndr.active_char['&'] = char_entity; + + if (extensions & MKDEXT_AUTOLINK) { + rndr.active_char['h'] = char_autolink; // http, https + rndr.active_char['H'] = char_autolink; + + rndr.active_char['f'] = char_autolink; // ftp + rndr.active_char['F'] = char_autolink; + + rndr.active_char['m'] = char_autolink; // mailto + rndr.active_char['M'] = char_autolink; + } + + /* Extension data */ + rndr.ext_flags = extensions; + rndr.max_nesting = 16; + + /* first pass: looking for references, copying everything else */ + beg = 0; + while (beg < ib->size) /* iterating over lines */ + if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs)) + beg = end; + else { /* skipping to the next line */ + end = beg; + while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r') + end += 1; + + /* adding the line body if present */ + if (end > beg) + expand_tabs(text, ib->data + beg, end - beg); + + while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) { + /* add one \n per newline */ + if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n')) + bufputc(text, '\n'); + end += 1; + } + + beg = end; + } + + /* sorting the reference array */ + if (rndr.refs.size) + qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit, cmp_link_ref_sort); + + /* adding a final newline if not already present */ + if (!text->size) + goto cleanup; + + if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r') + bufputc(text, '\n'); + + /* second pass: actual rendering */ + if (rndr.make.doc_header) + rndr.make.doc_header(ob, rndr.make.opaque); + + parse_block(ob, &rndr, text->data, text->size); + + if (rndr.make.doc_footer) + rndr.make.doc_footer(ob, rndr.make.opaque); + + /* clean-up */ +cleanup: + bufrelease(text); + lr = rndr.refs.base; + for (i = 0; i < (size_t)rndr.refs.size; i += 1) { + bufrelease(lr[i].id); + bufrelease(lr[i].link); + bufrelease(lr[i].title); + } + + arr_free(&rndr.refs); + + assert(rndr.work.size == 0); + + for (i = 0; i < (size_t)rndr.work.asize; i += 1) + bufrelease(rndr.work.item[i]); + + parr_free(&rndr.work); +} + +/* vim: set filetype=c: */ diff --git a/src/markdown.h b/src/markdown.h new file mode 100755 index 0000000..cae5347 --- /dev/null +++ b/src/markdown.h @@ -0,0 +1,112 @@ +/* markdown.h - generic markdown parser */ + +/* + * Copyright (c) 2009, Natacha Porté + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef UPSKIRT_MARKDOWN_H +#define UPSKIRT_MARKDOWN_H + +#include "buffer.h" + +/******************** + * TYPE DEFINITIONS * + ********************/ + +/* mkd_autolink • type of autolink */ +enum mkd_autolink { + MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/ + MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */ + MKDA_EMAIL, /* e-mail link without explit mailto: */ +}; + +enum mkd_extensions { + MKDEXT_LAX_EMPHASIS = (1 << 0), + MKDEXT_TABLES = (1 << 1), + MKDEXT_FENCED_CODE = (1 << 2), + MKDEXT_AUTOLINK = (1 << 3), + MKDEXT_STRIKETHROUGH = (1 << 4), + MKDEXT_LAX_HTML_BLOCKS = (1 << 5), +}; + +/* mkd_renderer • functions for rendering parsed data */ +struct mkd_renderer { + /* block level callbacks - NULL skips the block */ + void (*blockcode)(struct buf *ob, struct buf *text, struct buf *lang, void *opaque); + void (*blockquote)(struct buf *ob, struct buf *text, void *opaque); + void (*blockhtml)(struct buf *ob, struct buf *text, void *opaque); + void (*header)(struct buf *ob, struct buf *text, int level, void *opaque); + void (*hrule)(struct buf *ob, void *opaque); + void (*list)(struct buf *ob, struct buf *text, int flags, void *opaque); + void (*listitem)(struct buf *ob, struct buf *text, int flags, void *opaque); + void (*paragraph)(struct buf *ob, struct buf *text, void *opaque); + void (*table)(struct buf *ob, struct buf *header, struct buf *body, void *opaque); + void (*table_row)(struct buf *ob, struct buf *text, void *opaque); + void (*table_cell)(struct buf *ob, struct buf *text, int flags, void *opaque); + + + /* span level callbacks - NULL or return 0 prints the span verbatim */ + int (*autolink)(struct buf *ob, struct buf *link, enum mkd_autolink type, void *opaque); + int (*codespan)(struct buf *ob, struct buf *text, void *opaque); + int (*double_emphasis)(struct buf *ob, struct buf *text, void *opaque); + int (*emphasis)(struct buf *ob, struct buf *text, void *opaque); + int (*image)(struct buf *ob, struct buf *link, struct buf *title, struct buf *alt, void *opaque); + int (*linebreak)(struct buf *ob, void *opaque); + int (*link)(struct buf *ob, struct buf *link, struct buf *title, struct buf *content, void *opaque); + int (*raw_html_tag)(struct buf *ob, struct buf *tag, void *opaque); + int (*triple_emphasis)(struct buf *ob, struct buf *text, void *opaque); + int (*strikethrough)(struct buf *ob, struct buf *text, void *opaque); + + /* low level callbacks - NULL copies input directly into the output */ + void (*entity)(struct buf *ob, struct buf *entity, void *opaque); + void (*normal_text)(struct buf *ob, struct buf *text, void *opaque); + + /* header and footer */ + void (*doc_header)(struct buf *ob, void *opaque); + void (*doc_footer)(struct buf *ob, void *opaque); + + /* user data */ + void *opaque; +}; + +/********* + * FLAGS * + *********/ + +/* list/listitem flags */ +#define MKD_LIST_ORDERED 1 +#define MKD_LI_BLOCK 2 /*
  • containing block data */ + +#define MKD_TABLE_ALIGN_L (1 << 0) +#define MKD_TABLE_ALIGN_R (1 << 1) +#define MKD_TABLE_ALIGN_CENTER (MKD_TABLE_ALIGN_L | MKD_TABLE_ALIGN_R) + +/******************* + * Auxiliar methods + *******************/ +int +is_safe_link(const char *link, size_t link_len); + +/********************** + * EXPORTED FUNCTIONS * + **********************/ + +/* markdown • parses the input buffer and renders it into the output buffer */ +extern void +ups_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndr, unsigned int extensions); + +#endif + +/* vim: set filetype=c: */ diff --git a/src/xhtml.c b/src/xhtml.c new file mode 100755 index 0000000..cd34ec2 --- /dev/null +++ b/src/xhtml.c @@ -0,0 +1,797 @@ +/* + * Copyright (c) 2009, Natacha Porté + * Copyright (c) 2011, Vicent Marti + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "markdown.h" +#include "xhtml.h" + +#include +#include +#include +#include + +struct xhtml_renderopt { + struct { + int header_count; + int current_level; + } toc_data; + + struct { + int in_squote; + int in_dquote; + } quotes; + + unsigned int flags; +}; + +static inline void +put_scaped_char(struct buf *ob, char c) +{ + switch (c) { + case '<': BUFPUTSL(ob, "<"); break; + case '>': BUFPUTSL(ob, ">"); break; + case '&': BUFPUTSL(ob, "&"); break; + case '"': BUFPUTSL(ob, """); break; + default: bufputc(ob, c); break; + } +} + +/* lus_attr_escape • copy the buffer entity-escaping '<', '>', '&' and '"' */ +static void +lus_attr_escape(struct buf *ob, const char *src, size_t size) +{ + size_t i = 0, org; + while (i < size) { + /* copying directly unescaped characters */ + org = i; + while (i < size && src[i] != '<' && src[i] != '>' + && src[i] != '&' && src[i] != '"') + i += 1; + if (i > org) bufput(ob, src + org, i - org); + + /* escaping */ + if (i >= size) break; + + put_scaped_char(ob, src[i]); + i++; + } +} + +static int +is_html_tag(struct buf *tag, const char *tagname) +{ + size_t i = 0; + + if (i < tag->size && tag->data[0] != '<') + return 0; + + i++; + + while (i < tag->size && isspace(tag->data[i])) + i++; + + if (i < tag->size && tag->data[i] == '/') + i++; + + while (i < tag->size && isspace(tag->data[i])) + i++; + + for (; i < tag->size; ++i, ++tagname) { + if (*tagname == 0) + break; + + if (tag->data[i] != *tagname) + return 0; + } + + if (i == tag->size) + return 0; + + return (isspace(tag->data[i]) || tag->data[i] == '>'); +} + +/******************** + * GENERIC RENDERER * + ********************/ +static int +rndr_autolink(struct buf *ob, struct buf *link, enum mkd_autolink type, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + if (!link || !link->size) + return 0; + + if ((options->flags & XHTML_SAFELINK) != 0 && + !is_safe_link(link->data, link->size) && + type != MKDA_EMAIL) + return 0; + + BUFPUTSL(ob, "data, link->size); + BUFPUTSL(ob, "\">"); + + /* + * Pretty printing: if we get an email address as + * an actual URI, e.g. `mailto:foo@bar.com`, we don't + * want to print the `mailto:` prefix + */ + if (bufprefix(link, "mailto:") == 0) { + lus_attr_escape(ob, link->data + 7, link->size - 7); + } else { + lus_attr_escape(ob, link->data, link->size); + } + + BUFPUTSL(ob, ""); + + return 1; +} + +static void +rndr_blockcode(struct buf *ob, struct buf *text, struct buf *lang, void *opaque) +{ + if (ob->size) bufputc(ob, '\n'); + + if (lang && lang->size) { + size_t i = 0; + BUFPUTSL(ob, "
    size; ++i) {
    +			if (lang->data[i] == '.' && (i == 0 || isspace(lang->data[i - 1])))
    +				continue;
    +
    +			bufputc(ob, lang->data[i]);
    +		}
    +
    +		BUFPUTSL(ob, "\">");
    +	} else
    +		BUFPUTSL(ob, "
    ");
    +
    +	if (text)
    +		lus_attr_escape(ob, text->data, text->size);
    +
    +	BUFPUTSL(ob, "
    \n"); +} + +/* + * GitHub style code block: + * + *
    
    + *		...
    + *		
    + * + * Unlike other parsers, we store the language identifier in the
    ,
    + * and don't let the user generate custom classes.
    + *
    + * The language identifier in the 
     block gets postprocessed and all
    + * the code inside gets syntax highlighted with Pygments. This is much safer
    + * than letting the user specify a CSS class for highlighting.
    + *
    + * Note that we only generate HTML for the first specifier.
    + * E.g.
    + *		~~~~ {.python .numbered}	=>	
    
    + */
    +static void
    +rndr_blockcode_github(struct buf *ob, struct buf *text, struct buf *lang, void *opaque)
    +{
    +	if (ob->size) bufputc(ob, '\n');
    +
    +	if (lang && lang->size) {
    +		size_t i = 0;
    +		BUFPUTSL(ob, "
    size; ++i)
    +			if (isspace(lang->data[i]))
    +				break;
    +
    +		if (lang->data[0] == '.')
    +			bufput(ob, lang->data + 1, i - 1);
    +		else
    +			bufput(ob, lang->data, i);
    +
    +		BUFPUTSL(ob, "\">");
    +	} else
    +		BUFPUTSL(ob, "
    ");
    +
    +	if (text)
    +		lus_attr_escape(ob, text->data, text->size);
    +
    +	BUFPUTSL(ob, "
    \n"); +} + +static void +rndr_blockquote(struct buf *ob, struct buf *text, void *opaque) +{ + BUFPUTSL(ob, "
    \n"); + if (text) bufput(ob, text->data, text->size); + BUFPUTSL(ob, "
    "); +} + +static int +rndr_codespan(struct buf *ob, struct buf *text, void *opaque) +{ + BUFPUTSL(ob, ""); + if (text) lus_attr_escape(ob, text->data, text->size); + BUFPUTSL(ob, ""); + return 1; +} + +static int +rndr_strikethrough(struct buf *ob, struct buf *text, void *opaque) +{ + if (!text || !text->size) + return 0; + + BUFPUTSL(ob, ""); + bufput(ob, text->data, text->size); + BUFPUTSL(ob, ""); + return 1; +} + +static int +rndr_double_emphasis(struct buf *ob, struct buf *text, void *opaque) +{ + if (!text || !text->size) + return 0; + + BUFPUTSL(ob, ""); + bufput(ob, text->data, text->size); + BUFPUTSL(ob, ""); + + return 1; +} + +static int +rndr_emphasis(struct buf *ob, struct buf *text, void *opaque) +{ + if (!text || !text->size) return 0; + BUFPUTSL(ob, ""); + if (text) bufput(ob, text->data, text->size); + BUFPUTSL(ob, ""); + return 1; +} + +static void +rndr_header(struct buf *ob, struct buf *text, int level, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + if (ob->size) + bufputc(ob, '\n'); + + if (options->flags & XHTML_TOC) { + bufprintf(ob, "", options->toc_data.header_count++); + } + + bufprintf(ob, "", level); + if (text) bufput(ob, text->data, text->size); + bufprintf(ob, "\n", level); +} + +static int +rndr_link(struct buf *ob, struct buf *link, struct buf *title, struct buf *content, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + if ((options->flags & XHTML_SAFELINK) != 0 && !is_safe_link(link->data, link->size)) + return 0; + + BUFPUTSL(ob, "size) lus_attr_escape(ob, link->data, link->size); + if (title && title->size) { + BUFPUTSL(ob, "\" title=\""); + lus_attr_escape(ob, title->data, title->size); } + BUFPUTSL(ob, "\">"); + if (content && content->size) bufput(ob, content->data, content->size); + BUFPUTSL(ob, ""); + return 1; +} + +static void +rndr_list(struct buf *ob, struct buf *text, int flags, void *opaque) +{ + if (ob->size) bufputc(ob, '\n'); + bufput(ob, flags & MKD_LIST_ORDERED ? "
      \n" : "
        \n", 5); + if (text) bufput(ob, text->data, text->size); + bufput(ob, flags & MKD_LIST_ORDERED ? "
    \n" : "\n", 6); +} + +static void +rndr_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) +{ + BUFPUTSL(ob, "
  • "); + if (text) { + while (text->size && text->data[text->size - 1] == '\n') + text->size -= 1; + bufput(ob, text->data, text->size); } + BUFPUTSL(ob, "
  • \n"); +} + +static void +rndr_paragraph(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + size_t i = 0; + + if (ob->size) bufputc(ob, '\n'); + + if (!text || !text->size) + return; + + while (i < text->size && isspace(text->data[i])) i++; + + if (i == text->size) + return; + + BUFPUTSL(ob, "

    "); + if (options->flags & XHTML_HARD_WRAP) { + size_t org; + while (i < text->size) { + org = i; + while (i < text->size && text->data[i] != '\n') + i++; + + if (i > org) + bufput(ob, text->data + org, i - org); + + if (i >= text->size) + break; + + BUFPUTSL(ob, "
    \n"); + i++; + } + } else { + bufput(ob, &text->data[i], text->size - i); + } + BUFPUTSL(ob, "

    \n"); + + /* Close any open quotes at the end of the paragraph */ + options->quotes.in_squote = 0; + options->quotes.in_dquote = 0; +} + +static void +rndr_raw_block(struct buf *ob, struct buf *text, void *opaque) +{ + size_t org, sz; + if (!text) return; + sz = text->size; + while (sz > 0 && text->data[sz - 1] == '\n') sz -= 1; + org = 0; + while (org < sz && text->data[org] == '\n') org += 1; + if (org >= sz) return; + if (ob->size) bufputc(ob, '\n'); + bufput(ob, text->data + org, sz - org); + bufputc(ob, '\n'); +} + +static int +rndr_triple_emphasis(struct buf *ob, struct buf *text, void *opaque) +{ + if (!text || !text->size) return 0; + BUFPUTSL(ob, ""); + bufput(ob, text->data, text->size); + BUFPUTSL(ob, ""); + return 1; +} + + +/********************** + * XHTML 1.0 RENDERER * + **********************/ + +static void +rndr_hrule(struct buf *ob, void *opaque) +{ + if (ob->size) bufputc(ob, '\n'); + BUFPUTSL(ob, "
    \n"); +} + +static int +rndr_image(struct buf *ob, struct buf *link, struct buf *title, struct buf *alt, void *opaque) +{ + if (!link || !link->size) return 0; + BUFPUTSL(ob, "data, link->size); + BUFPUTSL(ob, "\" alt=\""); + if (alt && alt->size) + lus_attr_escape(ob, alt->data, alt->size); + if (title && title->size) { + BUFPUTSL(ob, "\" title=\""); + lus_attr_escape(ob, title->data, title->size); } + BUFPUTSL(ob, "\" />"); + + return 1; +} + +static int +rndr_linebreak(struct buf *ob, void *opaque) +{ + BUFPUTSL(ob, "
    \n"); + return 1; +} + +static int +rndr_raw_html(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + int escape_html = 0; + + if (options->flags & XHTML_SKIP_HTML) + escape_html = 1; + + else if ((options->flags & XHTML_SKIP_STYLE) != 0 && is_html_tag(text, "style")) + escape_html = 1; + + else if ((options->flags & XHTML_SKIP_LINKS) != 0 && is_html_tag(text, "a")) + escape_html = 1; + + else if ((options->flags & XHTML_SKIP_IMAGES) != 0 && is_html_tag(text, "img")) + escape_html = 1; + + + if (escape_html) + lus_attr_escape(ob, text->data, text->size); + else + bufput(ob, text->data, text->size); + + return 1; +} + +static void +rndr_table(struct buf *ob, struct buf *header, struct buf *body, void *opaque) +{ + if (ob->size) bufputc(ob, '\n'); + BUFPUTSL(ob, "\n"); + if (header) + bufput(ob, header->data, header->size); + BUFPUTSL(ob, "\n\n"); + if (body) + bufput(ob, body->data, body->size); + BUFPUTSL(ob, "\n
    "); +} + +static void +rndr_tablerow(struct buf *ob, struct buf *text, void *opaque) +{ + if (ob->size) bufputc(ob, '\n'); + BUFPUTSL(ob, "\n"); + if (text) + bufput(ob, text->data, text->size); + BUFPUTSL(ob, "\n"); +} + +static void +rndr_tablecell(struct buf *ob, struct buf *text, int align, void *opaque) +{ + if (ob->size) bufputc(ob, '\n'); + switch (align) { + case MKD_TABLE_ALIGN_L: + BUFPUTSL(ob, ""); + break; + + case MKD_TABLE_ALIGN_R: + BUFPUTSL(ob, ""); + break; + + case MKD_TABLE_ALIGN_CENTER: + BUFPUTSL(ob, ""); + break; + + default: + BUFPUTSL(ob, ""); + break; + } + + if (text) + bufput(ob, text->data, text->size); + BUFPUTSL(ob, ""); +} + +static struct { + char c0; + const char *pattern; + const char *entity; + int skip; +} smartypants_subs[] = { + { '\'', "'s>", "’", 0 }, + { '\'', "'t>", "’", 0 }, + { '\'', "'re>", "’", 0 }, + { '\'', "'ll>", "’", 0 }, + { '\'', "'ve>", "’", 0 }, + { '\'', "'m>", "’", 0 }, + { '\'', "'d>", "’", 0 }, + { '-', "--", "—", 1 }, + { '-', "<->", "–", 0 }, + { '.', "...", "…", 2 }, + { '.', ". . .", "…", 4 }, + { '(', "(c)", "©", 2 }, + { '(', "(r)", "®", 2 }, + { '(', "(tm)", "™", 3 }, + { '3', "<3/4>", "¾", 2 }, + { '3', "<3/4ths>", "¾", 2 }, + { '1', "<1/2>", "½", 2 }, + { '1', "<1/4>", "¼", 2 }, + { '1', "<1/4th>", "¼", 2 }, + { '&', "�", 0, 3 }, +}; + +#define SUBS_COUNT (sizeof(smartypants_subs) / sizeof(smartypants_subs[0])) + +static inline int +word_boundary(char c) +{ + return isspace(c) || ispunct(c); +} + +static int +smartypants_cmpsub(const struct buf *buf, size_t start, const char *prefix) +{ + size_t i; + + if (prefix[0] == '<') { + if (start == 0 || !word_boundary(buf->data[start - 1])) + return 0; + + prefix++; + } + + for (i = start; i < buf->size; ++i) { + char c, p; + + c = tolower(buf->data[i]); + p = *prefix++; + + if (p == 0) + return 1; + + if (p == '>') + return word_boundary(c); + + if (c != p) + return 0; + } + + return (*prefix == '>'); +} + +static int +smartypants_quotes(struct buf *ob, struct buf *text, size_t i, int is_open) +{ + char ent[8]; + + if (is_open && i + 1 < text->size && !word_boundary(text->data[i + 1])) + return 0; + + if (!is_open && i > 0 && !word_boundary(text->data[i - 1])) + return 0; + + snprintf(ent, sizeof(ent), "&%c%cquo;", + is_open ? 'r' : 'l', + text->data[i] == '\'' ? 's' : 'd'); + + bufputs(ob, ent); + return 1; +} + +static void +rndr_normal_text(struct buf *ob, struct buf *text, void *opaque) +{ + if (text) + lus_attr_escape(ob, text->data, text->size); +} + +static void +rndr_smartypants(struct buf *ob, struct buf *text, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + size_t i; + + if (!text) + return; + + for (i = 0; i < text->size; ++i) { + size_t sub; + char c = text->data[i]; + + for (sub = 0; sub < SUBS_COUNT; ++sub) { + if (c == smartypants_subs[sub].c0 && + smartypants_cmpsub(text, i, smartypants_subs[sub].pattern)) { + + if (smartypants_subs[sub].entity) + bufputs(ob, smartypants_subs[sub].entity); + + i += smartypants_subs[sub].skip; + break; + } + } + + if (sub < SUBS_COUNT) + continue; + + switch (c) { + case '\"': + if (smartypants_quotes(ob, text, i, options->quotes.in_dquote)) { + options->quotes.in_dquote = !options->quotes.in_dquote; + continue; + } + break; + + case '\'': + if (smartypants_quotes(ob, text, i, options->quotes.in_squote)) { + options->quotes.in_squote = !options->quotes.in_squote; + continue; + } + break; + } + + /* + * Copy raw character + */ + put_scaped_char(ob, c); + } +} + +static void +toc_header(struct buf *ob, struct buf *text, int level, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + if (level > options->toc_data.current_level) { + if (level > 1) + BUFPUTSL(ob, "
  • "); + BUFPUTSL(ob, "
      \n"); + } + + if (level < options->toc_data.current_level) { + BUFPUTSL(ob, "
    "); + if (options->toc_data.current_level > 1) + BUFPUTSL(ob, "
  • \n"); + } + + options->toc_data.current_level = level; + + bufprintf(ob, "
  • ", options->toc_data.header_count++); + if (text) + bufput(ob, text->data, text->size); + BUFPUTSL(ob, "
  • \n"); +} + +static void +toc_finalize(struct buf *ob, void *opaque) +{ + struct xhtml_renderopt *options = opaque; + + while (options->toc_data.current_level > 1) { + BUFPUTSL(ob, "
  • \n"); + options->toc_data.current_level--; + } + + if (options->toc_data.current_level) + BUFPUTSL(ob, "\n"); +} + +void +ups_toc_renderer(struct mkd_renderer *renderer) +{ + static const struct mkd_renderer toc_render = { + NULL, + NULL, + NULL, + toc_header, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + + NULL, + rndr_codespan, + rndr_double_emphasis, + rndr_emphasis, + NULL, + NULL, + NULL, + NULL, + rndr_triple_emphasis, + rndr_strikethrough, + + NULL, + NULL, + + NULL, + toc_finalize, + + NULL + }; + + struct xhtml_renderopt *options; + options = calloc(1, sizeof(struct xhtml_renderopt)); + options->flags = XHTML_TOC; + + memcpy(renderer, &toc_render, sizeof(struct mkd_renderer)); + renderer->opaque = options; +} + +void +ups_xhtml_renderer(struct mkd_renderer *renderer, unsigned int render_flags) +{ + static const struct mkd_renderer renderer_default = { + rndr_blockcode, + rndr_blockquote, + rndr_raw_block, + rndr_header, + rndr_hrule, + rndr_list, + rndr_listitem, + rndr_paragraph, + rndr_table, + rndr_tablerow, + rndr_tablecell, + + rndr_autolink, + rndr_codespan, + rndr_double_emphasis, + rndr_emphasis, + rndr_image, + rndr_linebreak, + rndr_link, + rndr_raw_html, + rndr_triple_emphasis, + rndr_strikethrough, + + NULL, + rndr_normal_text, + + NULL, + NULL, + + NULL + }; + + struct xhtml_renderopt *options; + options = calloc(1, sizeof(struct xhtml_renderopt)); + options->flags = render_flags; + + memcpy(renderer, &renderer_default, sizeof(struct mkd_renderer)); + renderer->opaque = options; + + if (render_flags & XHTML_SKIP_IMAGES) + renderer->image = NULL; + + if (render_flags & XHTML_SKIP_LINKS) { + renderer->link = NULL; + renderer->autolink = NULL; + } + + if (render_flags & XHTML_SMARTYPANTS) + renderer->normal_text = rndr_smartypants; + + if (render_flags & XHTML_GITHUB_BLOCKCODE) + renderer->blockcode = rndr_blockcode_github; + + +} + +void +ups_free_renderer(struct mkd_renderer *renderer) +{ + free(renderer->opaque); +} + diff --git a/src/xhtml.h b/src/xhtml.h new file mode 100644 index 0000000..dc2ae6c --- /dev/null +++ b/src/xhtml.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2011, Vicent Marti + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef UPSKIRT_XHTML_H +#define UPSKIRT_XHTML_H + +typedef enum { + XHTML_SKIP_HTML = (1 << 0), + XHTML_SKIP_STYLE = (1 << 1), + XHTML_SKIP_IMAGES = (1 << 2), + XHTML_SKIP_LINKS = (1 << 3), + XHTML_SMARTYPANTS = (1 << 4), + XHTML_EXPAND_TABS = (1 << 5), + XHTML_SAFELINK = (1 << 7), + XHTML_TOC = (1 << 8), + XHTML_HARD_WRAP = (1 << 9), + XHTML_GITHUB_BLOCKCODE = (1 << 10), +} render_mode; + +extern void +ups_xhtml_renderer(struct mkd_renderer *renderer, unsigned int render_flags); + +extern void +ups_toc_renderer(struct mkd_renderer *renderer); + +extern void +ups_free_renderer(struct mkd_renderer *renderer); + +#endif + diff --git a/test/markdom-test.js b/test/markdom-test.js new file mode 100644 index 0000000..5357a48 --- /dev/null +++ b/test/markdom-test.js @@ -0,0 +1,88 @@ +var path = require('path'), + assert = require('assert'), + vows = require('vows'); + +require.paths.unshift(path.join(__dirname, '..', 'lib')); + +var markdom = require('markdom'); +var flickr = require('flickr-reflection'); +var fs = require('fs'); +var _ = require('underscore'); + +var flickrOptions = { + key: '379e503c91f1a375a1e8d1cc4b319caf', + secret: '6f933c11df597e28', + apis: ['photos'] +}; + +// ************************************************************************************************* + +vows.describe('markdom basics').addBatch({ + 'blah': { + topic: 'blah', + + testNothing: function() { + fs.readFile('/Users/joehewitt/Code/tests/markdom.md', 'utf8', function(err, body) { + var transformer = new FlickrTransformer(); + var html = markdom.toDOM(body, {}, transformer); + transformer.loadImages(function() { + console.log(html.toHTML()); + }); + // console.log(markdom.toHTML(body)); + }); + } + }, +}).export(module); + +// ************************************************************************************************* + +function FlickrTransformer() { + this.flickrImages = []; +} + +FlickrTransformer.prototype = _.extend(new markdom.NodeTransformer(), { + rePhotoURL: /http:\/\/(.*?)\.flickr\.com\/photos\/(.*?)\/(.*?)\//, + + header: function(node) { + if (node.level == 1) { + return new markdom.nodeTypes.Text(''); + } else { + return node; + } + }, + + image: function(node) { + var flickrURL = node.url; + var m = this.rePhotoURL.exec(flickrURL); + if (m) { + var photoId = m[3]; + + var newImage = new markdom.nodeTypes.Image(''); + this.flickrImages[photoId] = newImage; + var link = new markdom.nodeTypes.Link(flickrURL, null, newImage); + return link; + } + }, + + loadImages: function(cb) { + flickr.connect(flickrOptions, _.bind(function(err, api) { + if (err) throw err; + + for (var photoId in this.flickrImages) { + api.photos.getSizes({photo_id: photoId}, _.bind(function(err, data) { + if (err) throw err; + + for (var i = 0; i < data.sizes.size.length; ++i) { + var sizeInfo = data.sizes.size[i]; + if (sizeInfo.width > 600) { + this.flickrImages[photoId].url = sizeInfo.source; + cb(); + break; + } + } + }, this)); + } + }, this)); + + } +}); diff --git a/wscript b/wscript new file mode 100644 index 0000000..f89153f --- /dev/null +++ b/wscript @@ -0,0 +1,29 @@ + +srcdir = '.' +# import sys +# blddir = 'build/%s' % sys.platform +blddir = 'build' +VERSION = '0.0.1' + +def set_options(opt): + opt.tool_options("compiler_cc") + opt.tool_options('compiler_cxx') + +def configure(conf): + conf.check_tool("compiler_cc") + conf.check_tool("compiler_cxx") + conf.check_tool("node_addon") + #conf.env.append_value('CCFLAGS', ['-fstack-protector', '-O', '-g', '-march=native']) + conf.env.append_value('CCFLAGS', ['-O3']) + +def build(bld): + obj = bld.new_task_gen('cxx', 'shlib', 'node_addon', 'cc') + obj.target = '_markdom' + obj.source = """ + src/markdown.c + src/array.c + src/buffer.c + src/xhtml.c + src/dom.c + src/bridge.cc + src/markdom.cc"""