diff --git a/.gitignore b/.gitignore index d18d94b..5657c42 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ - -*.DS_Store -xmlparser.sublime-project - -xmlparser.sublime-workspace + +*.DS_Store +xmlparser.sublime-project + +xmlparser.sublime-workspace diff --git a/README.md b/README.md index 057a3c2..06f4ab3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ -用纯js实现xml的解析, 纯的! - +用纯js实现xml的解析, 纯的! + 不推荐用于浏览器js, 只在不能使用dom的方法的情况下使用, 例如nodejs环境中 \ No newline at end of file diff --git a/elements.js b/elements.js new file mode 100644 index 0000000..65b01fc --- /dev/null +++ b/elements.js @@ -0,0 +1,263 @@ + +;Z.$package('imatlas', function(z, undefined){ + + /** + * 节点定义 + * @class + * @name Node + */ + var Node = this.Node = z.$class({ + init: function(){ + this.nodeType = null; + this.nodeName = null; + this.nodeValue = null; + this.parentNode = null; + this.attributes = {}; + this.childNodes = []; + }, + hasAttributes: function(){ + return !z.isEmpty(this.attributes); + }, + hasChildNodes: function(){ + return !!this.childNodes.length; + }, + appendChild: function(node){ + if(!this.contains(node)){ + node.parentNode = this; + this.childNodes.push(node); + } + }, + removeChild: function(node){ + z.array.removeChild(this.childNodes, node); + node.parentNode = null; + }, + contains: function(node){ + return z.array.contains(this.childNodes, node); + }, + cloneNode: function(){ + return z.duplicate(this); + }, + //,TODO insertBefore, insertAfter, replaceChild, normalize + // + toObject: function(){ + var obj = {}, val; + for(var i in this){ + if(!this.hasOwnProperty(i)){ + continue; + } + val = this[i]; + if(z.isString(val) || z.isBoolean(val) || z.isNumber(val) || z.isUndefined(val)){ + obj[i] = val; + }else if(z.isObject(val) && i === 'attributes'){ + obj[i] = val; + }else if(z.isArray(val) && i === 'childNodes'){ + obj[i] = []; + for(var j in val){ + obj[i].push(val[j].toObject()); + } + } + } + return obj; + }, + toJSONString: function(){ + + var str = JSON.stringify(this.toObject()); + return str; + }, + toXMLString: function(){ + + }, + toString: function(){ + return this.toJSONString(); + } + }); + + /* + * Node 类型常量定义 + */ + Node.ELEMENT_NODE = 1; + Node.ATTRIBUTE_NODE = 2; + Node.TEXT_NODE = 3; + Node.CDATA_SECTION_NODE = 4; + Node.COMMENT_NODE = 8; + Node.DOCUMENT_NODE = 9; + Node.DOCUMENT_TYPE_NODE = 10; + Node.META_NODE = 12; + + + /** + * @class + * @name Element + */ + var Element = this.Element = z.$class({extend: Node }, { + init: function(tagName){ + this.tagName = tagName; + this.nodeType = Node.ELEMENT_NODE; + this.nodeName = tagName; + }, + getAttribute: function(key){ + return this.attributes[key]; + }, + setAttribute: function(key, value){ + this.attributes[key] = value; + }, + hasAttribute: function(key){ + return !!this.attributes[key]; + }, + removeAttribute: function(key){ + delete this.attributes[key]; + }, + getElementsByTagName: function(tag){ + //TODO 广度优先,深度优先 + }, + //============= 扩展的方法 ======================== + /** + * getAttribute和setAttribute的简写 + * @param {String} key + * @param {String} value @optional + * @return {String}, {Node} + */ + attr: function(key, value){ + if(arguments.length === 2){ + this.setAttribute(key, value); + }else{ + return this.getAttribute(key); + } + }, + /** + * getElement 系列方法的简写, 仅支持简易的一级选择器 + * @param {String} selector + * @return {Array}, {Node} + * @example + * find('#id'); + * find('.class'); + * find('tag'); + * find('@attr=value'); + * + */ + find: function(selector){ + var s = selector.charAt(0); + var value = selector.substring(1); + var attr; + if(s === '#'){ + attr = 'id'; + }else if(s === '.'){ + attr = 'class' + }else if(s === '@'){ + value = value.split('='); + attr = value[0]; + value = value[1]; + } + if(attr){//attr + return z.array.filter(this.childNodes, attr, value); + }else{//tag + return this.getElementsByTagName(selector); + } + } + }); + + /** + * @class + * @name Text + */ + var Text = this.Text = z.$class({extend: Node}, { + init: function(value){ + this.nodeValue = value; + this.nodeType = Node.TEXT_NODE; + }, + toObject: function(){ + var obj = { + nodeType: this.nodeType, + nodeValue: this.nodeValue + }; + return obj; + } + }); + + /** + * @class + * @name Comment + */ + var Comment = this.Comment = z.$class({extend: Node}, { + init: function(value){ + this.nodeValue = value; + this.nodeType = Node.COMMENT_NODE; + }, + toObject: function(){ + var obj = { + nodeType: this.nodeType, + nodeValue: this.nodeValue + }; + return obj; + } + }); + + /** + * @class + * @name CDATA + */ + var CDATA = this.CDATA = z.$class({extend: Node}, { + init: function(value){ + this.nodeValue = value; + this.nodeType = Node.CDATA_SECTION_NODE; + }, + toObject: function(){ + var obj = { + nodeType: this.nodeType, + nodeValue: this.nodeValue + }; + return obj; + } + }); + + /** + * 文档定义 + * @class + * @name Document + */ + var Document = this.Document = z.$class({extend: Element}, { + init: function(){ + this.nodeName = 'document'; + this.nodeType = Node.DOCUMENT_NODE; + delete this.tagName; + } + }); + + var DocumentType = this.DocumentType = z.$class({extend: Node}, { + init: function(value){ + this.nodeName = value; + this.nodeType = Node.DOCUMENT_TYPE_NODE; + }, + toObject: function(){ + var obj = { + nodeName: this.nodeName, + nodeType: this.nodeType, + nodeValue: this.nodeValue + }; + return obj; + } + }); + + /** + * + * @class + * @name XMLMetaNode + */ + var XMLMetaNode = this.XMLMetaNode = z.$class({extend: Node}, { + init: function(value){ + this.nodeName = value; + this.nodeType = Node.DOCUMENT_TYPE_NODE; + }, + toObject: function(){ + var obj = { + nodeName: this.nodeName, + nodeType: this.nodeType, + nodeValue: this.nodeValue + }; + return obj; + } + }); + + + +}); \ No newline at end of file diff --git a/interpreter.js b/interpreter.js new file mode 100644 index 0000000..f61afb6 --- /dev/null +++ b/interpreter.js @@ -0,0 +1,151 @@ + +;Z.$package('imatlas', function(z, undefined){ + + var MATCH_NONE = 0;//没有匹配 + var MATCH_EXACTLY = 1;//完全匹配了一个 + var MATCH_POLYSEMY = 2;//存在多个可能的匹配项 + + function isArray (obj){ + return Object.prototype.toString.call(obj) === '[object Array]'; + } + + /** + * 通用文本解析引擎,可以用来解析xml, JSON等 + */ + function Interpreter(tokenArray){ + this.tokenArray = tokenArray; + } + + Interpreter.prototype = { + /** + * 判读一个字符或字符串是否是token, 严格匹配 + * @param {String} text + * @param {Array} extra + * @return {Boolean} + */ + isToken: function(text, extra){ + var match = this.tokenArray.indexOf(text) > -1; + if(match && extra){ + return extra.indexOf(text) > -1; + } + return match; + }, + /** + * 判断一个字符或字符串是否匹配到了一个 token + * 匹配情况分三种 + * 1: 唯一完全匹配, 返回会 MATCH_ONE + * 2: 匹配了多个开头, 返回 MATCH_POLYSEMY + * 3: 未有匹配, 返回 MATCH_NONE + * @param {String} text + * @return {Number} + */ + checkToken: function(text){ + var count = 0; + for(var i = 0, t; t = this.tokenArray[i]; i++){ + if(t.indexOf(text) === 0){ + count++; + } + if (count >= 2) {//已经有两个匹配了, 可以退出循环了 + break; + } + } + if(count > 1){ + return MATCH_POLYSEMY; + } + if(!count){//0 + return MATCH_NONE; + } + if(this.isToken(text)){ + return MATCH_EXACTLY; + } + return MATCH_POLYSEMY; + }, + /** + * 设置将要解析的文本,准备解析 + * @param {String} text + */ + prepare: function(text){ + this.text = text; + this.length = text.length; + this.pos = 0; + }, + + getProgress: function(){ + return this.pos; + }, + + eat: function(){ + var ch, m, + text, + buffer = '', + polysemy = false + ; + while(this.pos < this.length){ + ch = this.text.charAt(this.pos); + if(polysemy){ + text = buffer + ch; + }else{ + text = ch; + } + m = this.checkToken(text); + if(m === MATCH_POLYSEMY){ + if(!polysemy && buffer.length){ + return buffer; + } + buffer += ch; + this.pos ++; + polysemy = true; + }else if(m === MATCH_EXACTLY){ + if(polysemy){ + this.pos ++; + return text; + } + if(buffer.length){ + return buffer; + } + this.pos ++; + return ch; + }else{ + if(polysemy){ + return buffer; + } + buffer += ch; + this.pos ++; + } + } + return null; + }, + + eatUntil: function(until){ + if(until && !isArray(until)){ + until = [until]; + } + var pos = this.pos, + first = this.eat(), + secend + ; + if(this.isToken(first, until)){ + this.pos = pos; + // return first; + return ''; + } + pos = this.pos; + while((secend = this.eat()) !== null && !this.isToken(secend, until)){ + first += secend; + pos = this.pos; + } + this.pos = pos; + return first; + }, + + eatNotMove: function(){ + var pos = this.pos; + var token = this.eatUntil(); + this.pos = pos; + return token; + } + }; + + this.Interpreter = Interpreter; + +}); \ No newline at end of file diff --git a/test.html b/test.html index e5f1c3e..a7efc9f 100644 --- a/test.html +++ b/test.html @@ -1,37 +1,38 @@ - - - - - Test - - - - -
- - - - - + + + + + Test + + + + +