Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

simplify converter

  • Loading branch information...
commit ff1282556b5fb760abad06790e92de5ef7c18604 1 parent 6001ba8
@nightwing nightwing authored
Showing with 153 additions and 274 deletions.
  1. +16 −34 tool/mode_highlight_rules.tmpl.js
  2. +137 −240 tool/tmlanguage.js
View
50 tool/mode_highlight_rules.tmpl.js
@@ -3,7 +3,7 @@
*
* Copyright (c) 2012, Ajax.org B.V.
* All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
@@ -14,7 +14,7 @@
* * Neither the name of Ajax.org B.V. nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -26,35 +26,19 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- *
- * Contributor(s):
- *
- *
- *
* ***** END LICENSE BLOCK ***** */
-/*
- THIS FILE WAS AUTOGENERATED BY %name% (UUID: %uuid%) */
-
-/*******
-
- THIS FILE MIGHT NOT BE PERFECT, PARTICULARLY:
-
- IN DECIDING STATES TO TRANSITION TO,
-
- IGNORING WHITESPACE,
-
- IGNORING GROUPS WITH ?:,
-
- EXTENDING EXISTING MODES,
-
- GATHERING KEYWORDS, OR
-
- RULE PREFERENCE ORDER.
-
- ...But it's a good start from an existing *.tmlanguage file.
-
-*******/
+/* THIS FILE WAS AUTOGENERATED FROM %name% (UUID: %uuid%) */
+/****************************************************************
+ * IT MIGHT NOT BE PERFECT, PARTICULARLY: *
+ * IN DECIDING STATES TO TRANSITION TO, *
+ * IGNORING WHITESPACE, *
+ * IGNORING GROUPS WITH ?:, *
+ * EXTENDING EXISTING MODES, *
+ * GATHERING KEYWORDS, OR *
+ * DECIDING WHEN TO USE PUSH. *
+ * ...But it's a good start from an existing *.tmlanguage file. *
+ ****************************************************************/
define(function(require, exports, module) {
"use strict";
@@ -63,14 +47,12 @@ var oop = require("../lib/oop");
var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;
var %language%HighlightRules = function() {
-
// regexp must not have capturing parentheses. Use (?:) instead.
// regexps are ordered -> the first match is used
- this.$rules =
- %languageTokens%
-
- %repositoryRules%
+ this.$rules = %languageTokens%
+
+ this.normalizeRules();
};
oop.inherits(%language%HighlightRules, TextHighlightRules);
View
377 tool/tmlanguage.js
@@ -1,294 +1,191 @@
var fs = require("fs");
var util = require("util");
-
-// for tracking token states
-var startState = { start: [] }, statesObj = { };
+var lib = require("./lib");
+var parseLanguage = lib.parsePlist;
-var args = process.argv.splice(2);
-var tmLanguageFile = args[0];
-var devMode = args[1];
-
-var parseString = require("plist").parseString;
-function parseLanguage(languageXml, callback) {
- parseString(languageXml, function(_, language) {
- callback(language[0])
- });
-}
-
function logDebug(string, obj) {
- console.log(string, obj);
+ console.log(string, obj);
}
-String.prototype.splice = function( idx, rem, s ) {
- return (this.slice(0,idx) + s + this.slice(idx + Math.abs(rem)));
-};
-String.prototype.replaceAt = function (index, char) {
- return this.substr(0, index) + char + this.substr(index + 1);
-}
+// tmLanguage processor
-function keyCount(obj) {
- return Object.keys(obj).length;
-}
-
-/**
-
-Scrubbing is sometimes necessary, but there appears to be no
-automated way to do it...
-
-
-function cleanSingleCapture(match) {
- // if there's a single "( )", screw that and make it "(?: )"
- return match.replace("(", "(?:");
+// for tracking token states
+var states = {start: []};
+var stateName = "start";
+
+function processRules(rules){
+ if (rules.patterns)
+ states.start = processPatterns(rules.patterns);
+ if (rules.repository)
+ processRepository(rules.repository);
+ return states;
}
+function processRepository(r) {
+ for (var key in r) {
+ var p = r[key];
+ if (p.begin)
+ var stateObj = [processPattern(r[key])];
+ else if (p.patterns && !p.repository)
+ var stateObj = processPatterns(p.patterns);
+ else
+ var stateObj = [processPattern(r[key])];
-function cleanMultiCapture(match) {
- // regexp will be a quoted string, so turn "\" into "\\"
- var spaceFinderRegExp = new RegExp("\\\\s.| .", "g");
- var m;
- /*
- essentially turns things like
-
- \\s*(mixin) ([\\w\\-]+)\\s*(\\()
-
- into
-
- (\\s*mixin)( [\\w\\-]+)(\\s*\\()
-
- so that mode parser stops complaining
-
- while ((m = spaceFinderRegExp.exec(match)) != null) {
- var idx = m.index;
- var nextParenIdx = match.indexOf("(", idx);
-
- if (nextParenIdx > idx) {
- match = match.splice(idx, 0, "(").replaceAt(nextParenIdx + 1, '');
+ if (stateObj)
+ states["#" + key] = stateObj;
}
- }
-
- //console.log("match", match);
- return match;
}
-*/
-
-// stupid yet necessary function, to transform JSON id comments into real comments
-function restoreComments(objStr) {
- return objStr.replace(/"\s+(\/\/.+)",/g, "\$1").replace(/ \/\/ ERROR/g, '", // ERROR');
+function processPatterns(pl) {
+ return pl.map(processPattern);
}
+function processPattern(p) {
-function checkForLookBehind(str) {
- var lookbehindRegExp = new RegExp("\\?<[=|!]", "g");
- return lookbehindRegExp.test(str) ? str + " // ERROR: This contains a lookbehind, which JS does not support :(" : str;
-}
-
-function removeXFlag(str) {
- if (str.slice(0,4) == "(?x)") {
- str = str.replace(/\\.|\[([^\]\\]|\\.)*?\]|\s+|(?:#[^\n]*)/g, function(s) {
- if (s[0] == "[")
- return s;
- if (s[0] == "\\")
- return /[#\s]/.test(s[1]) ? s[1] : s;
- return "";
- });
- }
- return str;
-}
-
-function transformRegExp(str) {
- str = removeXFlag(str);
- str = checkForLookBehind(str);
- return str;
-}
-
-function assembleStateObjs(strState, pattern) {
- var patterns = pattern.patterns;
- var stateObj = {};
- var tokenElem = [];
-
- if (patterns) {
- for (var p in patterns) {
- stateObj = {}; // this is apparently necessary
-
- if (patterns[p].include) {
- stateObj.include = patterns[p].include;
- }
- else {
- stateObj.token = patterns[p].name;
- stateObj.regex = transformRegExp(patterns[p].match);
- }
- statesObj[strState].push(stateObj);
+ if (p.end == "(?!\\G)" && p.patterns && p.patterns.length == 1) {
+ var rule = processPattern(p.patterns[0]);
}
+ else if (p.begin && p.end) {
+ var rule = simpleRule(p.begin, p.name, p.beginCaptures || p.captures)
+
+ var next = processPatterns(p.patterns || []);
+ var endRule = simpleRule(p.end, p.name, p.endCaptures || p.captures);
+ endRule.next = "pop";
+ next.push(endRule);
+
+ if (p.name || p.contentName)
+ next.push({defaultToken: p.name || p.contentName});
- stateObj = {};
- stateObj.token = "TODO";
- stateObj.regex = transformRegExp(pattern.end);
- stateObj.next = "start";
- }
- else {
- stateObj.token = "TODO";
- stateObj.regex = transformRegExp(pattern.end);
- stateObj.next = "start";
-
- statesObj[strState].push(stateObj);
-
- stateObj = {};
- stateObj.token = "TODO";
- stateObj.regex = ".+";
- stateObj.next = strState;
- }
-
- return stateObj;
-}
-
-function extractPatterns(patterns) {
- var state = 0;
- patterns.forEach(function(pattern) {
- state++;
- var i = 1;
- var tokenArray = [];
- var tokenObj = { token: [] };
- var stateObj = {};
-
- if (pattern.comment) {
- startState.start.push(" // " + pattern.comment.trim());
+ rule.push = next;
}
-
- // it needs a state transition
- if (pattern.begin && pattern.end) {
- var strState = "state_" + state;
- if ( pattern.beginCaptures === undefined && pattern.endCaptures === undefined) {
- tokenObj.token.push(pattern.captures);
- }
- else if (pattern.beginCaptures) {
- tokenObj.token.push(pattern.beginCaptures);
- }
- else if (pattern.endCaptures) {
- tokenObj.token.push(pattern.endCaptures);
- }
-
- if (tokenObj.token === undefined) {
- if (pattern.name)
- tokenObj.token.push(pattern.name);
- else
- logDebug("There's no token name for this state transition", pattern)
- }
-
- if (tokenObj.token === undefined) {
- tokenObj.token.push(pattern.name);
- }
-
- statesObj[strState] = [ ];
- statesObj[strState].push(assembleStateObjs(strState, pattern));
-
- tokenObj.regex = transformRegExp(pattern.begin);
- tokenObj.next = strState;
+ else if (p.match) {
+ var rule = simpleRule(p.match, p.name, p.captures)
}
- else if( ( pattern.begin || pattern.end ) && !( pattern.begin && pattern.end ) ) {
- logDebug("Somehow, there's pattern.begin or pattern.end--but not both?", pattern);
+ else if (p.include) {
+ var rule = {include: p.include};
}
+
+ if (p.comment)
+ rule.comment = (rule.comment || "") + p.comment;
- else if (pattern.captures) {
- tokenObj.token.push([]);
- tokenObj.token.push(pattern.captures);
- tokenObj.regex = transformRegExp(pattern.match);
+ if (p.repository)
+ processRepository(p.repository);
+ return rule;
+}
+function simpleRule(regex, name, captures) {
+ name = name || "text";
+ var rule = {};
+
+ regex = transformRegExp(regex, rule);
+ if (captures) {
+ var tokenArray = [];
+ Object.keys(captures).forEach(function(x){
+ tokenArray[x] = captures[x] && captures[x].name;
+ });
+ if (tokenArray.length == 1) {
+ name = tokenArray[0];
+ } else {
+ for (var i = 0; i < tokenArray.length; i++)
+ if (!tokenArray[i])
+ tokenArray[i] = name;
+ name = tokenArray;
+ rule.todo = "fix grouping";
+ }
}
-
- else if (pattern.match) {
- tokenObj.token.push(pattern.name);
- tokenObj.regex = transformRegExp(pattern.match);
+
+ try {new RegExp(regex);} catch(e) {
+ rule.TODO = "FIXME: regexp doesn't have js equivalent";
}
+ rule.token = name;
+ rule.regex = regex;
+ return rule;
+}
- else if (pattern.include) {
- tokenObj.token.push(pattern.include);
- tokenObj.regex = "";
- }
- else {
- tokenObj.token.push("");
- tokenObj.regex = "";
- logDebug("I've gone through every choice, and have no clue what this is:", pattern);
- }
+// regex transformation
- // sometimes captures have names--not sure when or why
- if (pattern.name) {
- tokenObj.token.push(pattern.name);
+function removeXFlag(str) {
+ if (str && str.slice(0,4) == "(?x)") {
+ str = str.replace(/\\.|\[([^\]\\]|\\.)*?\]|\s+|(?:#[^\n]*)/g, function(s) {
+ if (s[0] == "[")
+ return s;
+ if (s[0] == "\\")
+ return /[#\s]/.test(s[1]) ? s[1] : s;
+ return "";
+ });
}
+ return str;
+}
- startState.start.push(tokenObj);
- });
-
- var resultingObj = startState;
-
- for (var state in statesObj) {
- resultingObj[state] = statesObj[state];
- }
-
- return restoreComments(JSON.stringify(resultingObj, null, " "));
+function transformRegExp(str, rule) {
+ str = removeXFlag(str);
+ str = str.replace(/\\n(?!\?).?/g, '$'); // replace newlines by $ except if its postfixed by ?
+ if (/\(\?[i]\:/g.test(str)) {
+ str = str.replace(/\(\?[ims\-]\:/g, "(?:"); // checkForInvariantRegex
+ rule && (rule.caseInsensitive = true);
+ }
+ return str;
}
-function fillTemplate(template, replacements) {
- return template.replace(/%(.+?)%/g, function(str, m) {
- return replacements[m] || "";
- });
+//
+function extractPatterns(tmRules) {
+ var patterns = processRules(tmRules);
+ return lib.restoreJSONComments(lib.formatJSON(patterns, " "));
+
}
+
+
+// cli stuff
var modeTemplate = fs.readFileSync(__dirname + "/mode.tmpl.js", "utf8");
var modeHighlightTemplate = fs.readFileSync(__dirname + "/mode_highlight_rules.tmpl.js", "utf8");
-function convertLanguage(name) {
- var tmLanguage = fs.readFileSync(__dirname + "/" + name, "utf8");
+function convertLanguageFile(name) {
+ var tmLanguage = fs.readFileSync(process.cwd() + "/" + name, "utf8");
parseLanguage(tmLanguage, function(language) {
- var languageHighlightFilename = language.name.replace(/[-_]/g, "").toLowerCase();
- var languageNameSanitized = language.name.replace(/-/g, "");
-
- var languageHighlightFile = __dirname + "/../lib/ace/mode/" + languageHighlightFilename + "_highlight_rules.js";
- var languageModeFile = __dirname + "/../lib/ace/mode/" + languageHighlightFilename + ".js";
-
- console.log("Converting " + name + " to " + languageHighlightFile);
+ var languageHighlightFilename = language.name.replace(/[-_]/g, "").toLowerCase();
+ var languageNameSanitized = language.name.replace(/-/g, "");
+
+ var languageHighlightFile = __dirname + "/../lib/ace/mode/" + languageHighlightFilename + "_highlight_rules.js";
+ var languageModeFile = __dirname + "/../lib/ace/mode/" + languageHighlightFilename + ".js";
+
+ console.log("Converting " + name + " to " + languageHighlightFile);
if (devMode) {
- console.log(util.inspect(language.patterns, false, 4));
- console.log(util.inspect(language.repository, false, 4));
+ console.log(util.inspect(language.patterns, false, 4));
+ console.log(util.inspect(language.repository, false, 4));
}
- var languageMode = fillTemplate(modeTemplate, {
- language: languageNameSanitized,
- languageHighlightFilename: languageHighlightFilename
+ var languageMode = lib.fillTemplate(modeTemplate, {
+ language: languageNameSanitized,
+ languageHighlightFilename: languageHighlightFilename
});
- var patterns = extractPatterns(language.patterns);
- var repository = {};
-
- if (language.repository) {
- for (var r in language.repository) {
- repository[r] = language.repository[r];
- }
- repository = restoreComments(JSON.stringify(repository, null, " "));
- }
+ var patterns = extractPatterns(language);
- var languageHighlightRules = fillTemplate(modeHighlightTemplate, {
- language: languageNameSanitized,
- languageTokens: patterns,
- repositoryRules: "/*** START REPOSITORY RULES\n" + repository + "\nEND REPOSITORY RULES ***/",
- uuid: language.uuid,
- name: name
+ var languageHighlightRules = lib.fillTemplate(modeHighlightTemplate, {
+ language: languageNameSanitized,
+ languageTokens: patterns.trim(),
+ uuid: language.uuid,
+ name: name
});
if (devMode) {
- console.log(languageMode)
- console.log(languageHighlightRules)
- console.log("Not writing, 'cause we're in dev mode, baby.");
+ console.log(languageMode);
+ console.log(languageHighlightRules);
+ console.log("Not writing, 'cause we're in dev mode, baby.");
}
else {
- fs.writeFileSync(languageHighlightFile, languageHighlightRules);
- fs.writeFileSync(languageModeFile, languageMode);
+ fs.writeFileSync(languageHighlightFile, languageHighlightRules);
+ fs.writeFileSync(languageModeFile, languageMode);
}
});
}
+var args = process.argv.splice(2);
+var tmLanguageFile = args[0];
+var devMode = args[1];
if (tmLanguageFile === undefined) {
- console.error("Please pass in a language file via the command line.");
- process.exit(1);
+ console.error("Please pass in a language file via the command line.");
+ process.exit(1);
}
-convertLanguage(tmLanguageFile);
+convertLanguageFile(tmLanguageFile);
Please sign in to comment.
Something went wrong with that request. Please try again.