Permalink
Browse files

Initial checkin

  • Loading branch information...
0 parents commit f78afd2fdce380117d44541a497d3efe75318b1e @duaneg committed Aug 26, 2011
Showing with 595 additions and 0 deletions.
  1. +3 −0 .gitignore
  2. +69 −0 README.md
  3. +10 −0 bin/ajs-xgettext
  4. +342 −0 lib/extract.js
  5. +26 −0 package.json
  6. +13 −0 test/i18n.ajs
  7. +132 −0 test/test_extractor.js
3 .gitignore
@@ -0,0 +1,3 @@
+*~
+.*.swp
+lib-cov
69 README.md
@@ -0,0 +1,69 @@
+# ajs-xgettext
+
+A utility for extracting localised text from [AJS](https://github.com/kainosnoema/ajs) templates (and probably other EJS-style templates). Extracted text is stored in the standard PO template format.
+
+PO template file(s) can then be processed by the [GNU gettext](http://www.gnu.org/software/gettext/manual/gettext.html) or other compatible tools. Binary message catalogues can be read and accessed using [node-gettext](https://github.com/andris9/node-gettext).
+
+## Installation
+
+```` bash
+$ npm install ajs-xgettext
+````
+
+## Usage
+
+```` bash
+$ ajs-xgettext template.ajs
+````
+
+By default the extractor looks for translation methods from the gettext module, as described in its [README](https://github.com/andris9/node-gettext/blob/master/README.md), and assumes the gettext module is available in the ````gt```` variable.
+
+The ````--gettext```` option allows you to specify an alternate variable for accessing the gettext module. E.g.:
+
+> ```` bash
+> $ ajs-xgettext --gettext=GetText template.ajs
+> ````
+
+The ````--method```` option allows you to specify alternate gettext object method calls and which of their parameters to extract. E.g.:
+
+> ```` bash
+> $ ajs-xgettext --method=gt --method=dngt:2,3 template.ajs
+> ````
+
+The ````--function```` option allows you to specify top-level translation functions. E.g.:
+
+> ```` bash
+> $ ajs-xgettext --function=_ --function=_s:1,2 template.ajs
+> ````
+
+Please run with ````--help```` to see all available options.
+
+## Hacking
+
+To run the unit tests use [expresso](http://visionmedia.github.com/expresso/). It should work if you run it without arguments from the top-level project directory.
+
+## License
+
+(The MIT License)
+
+Copyright (c) 2011 Duane Griffin <duaneg@dghda.com>
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
10 bin/ajs-xgettext
@@ -0,0 +1,10 @@
+#!/usr/bin/env node
+
+var extractor = require("../lib/extract");
+
+extractor.cli(undefined, function(err) {
+ if (err) {
+ console.error(err);
+ process.exit(1);
+ }
+});
342 lib/extract.js
@@ -0,0 +1,342 @@
+var extractor = function() {
+ var fs = require("fs");
+ var path = require("path");
+
+ var _ = require("underscore");
+ var ajs = require("ajs");
+ var async = require("async");
+ var Compiler = require("ajs/lib/compiler");
+ var Parser = require("ajs/lib/parser");
+ var Node = Parser.Node;
+
+ var header = [
+ "<% if (header) {%>" +
+ "# Translations template for <%- project.name %>.",
+ "# Copyright (C) <%- year %> <%= copyright %>.",
+ "# This file is distributed under the same license as the <%- project.name %> project.",
+ "# <%- author %>, <%= year %>.",
+ "#",
+ "#, fuzzy",
+ 'msgid ""',
+ 'msgstr ""',
+ '"Project-Id-Version: <%- project.name %> <%= project.version %>\\\\n"',
+ '"Report-Msgid-Bugs-To: <%- bugs %>\\\\n"',
+ '"POT-Creation-Date: <%- now %>\\\\n"',
+ '"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\\\n"',
+ '"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\\\n"',
+ '"Language-Team: LANGUAGE <LL@li.org>\\\\n"',
+ '"MIME-Version: 1.0\\\\n"',
+ '"Content-Type: text/plain; charset=utf-8\\\\n"',
+ '"Content-Transfer-Encoding: 8bit\\\\n"',
+ '"Generated-By: <%- details.name %> <%= details.version %>\\\\n"',
+ "<% }; %>" +
+ "<% translations.forEach(function(trans) { trans.comments.forEach(function(comment) { %>",
+ "#: <%- comment %>" +
+ "<% }); %>",
+ "msgid <%- trans.msgid %>",
+ "msgstr <%- trans.msgstr %>",
+ "<% }); %>",
+ ];
+
+ var details = JSON.parse(fs.readFileSync(path.join(path.dirname(module.filename), "..", "package.json")), "utf-8");
+
+ return {
+
+ // Parse cmd-line arguments and extract i18n messages from one or more templates as specified
+ cmdline: function(argv, cmdline_cb, output) {
+ var usage = "usage: " + details.name + " [options] <template> [<template>...]";
+
+ // Get text functions and which of their arguments are message IDs
+ var gt_funcs = {
+ gettext: [1],
+ dgettext: [2],
+ ngettext: [1, 2],
+ dngettext: [2, 3],
+ pgettext: [2],
+ dpgettext: [3],
+ npgettext: [2, 3],
+ ndgettext: [3, 4],
+ };
+
+ // Additional functions to recognize
+ var gt_extras = {};
+
+ var opts = require('nomnom')
+ .scriptName(details.name)
+ .opts({
+
+ // Output-related options
+ output: {
+ default: "messages.pot",
+ metavar: "FILE",
+ help: "file to write output to [defaults to stdout]",
+ },
+ append: {
+ flag: true,
+ help: "append to an existing output file, if any",
+ },
+
+ // Get-text method related options
+ gtobj: {
+ full: "gettext",
+ default: "gt",
+ metavar: "NAME",
+ help: "name of gettext module object [gt]",
+ },
+ gtmeth: {
+ full: "method",
+ string: "--method=NAME[:<index>[,index]+]",
+ list: true,
+ help: "additional gettext object methods to recognize, with optional 1-based message ID parameter indices",
+ },
+ gtfunc: {
+ full: "function",
+ string: "--function=NAME[:<index>[,index]+]",
+ list: true,
+ help: "additional global-scope functions to recognize, with optional 1-based message ID parameter indices",
+ },
+
+ // Header-related options
+ nohdr: {
+ flag: true,
+ full: "omit-header",
+ string: "--omit-header",
+ help: "omit the header",
+ },
+ project: {
+ default: "PROJECT",
+ help: "the project's name",
+ },
+ version: {
+ default: "VERSION",
+ help: "the project's version",
+ },
+ copyright: {
+ default: "ORGANIZATION",
+ help: "the copyright owner",
+ },
+ bugs_addr: {
+ full: "bugs-addr",
+ default: "EMAIL@ADDRESS",
+ help: "the email or web address for reporting bugs",
+ },
+ }).parseArgs(_.isArray(argv) ? argv : process.argv.slice(2));
+
+ // Handle writing to stdout or a provided stream
+ if (output !== undefined) {
+ opts.output = output;
+ } else if (opts.output === "-") {
+ opts.output = process.stdout;
+ }
+
+ // Parse extra gettext functions
+ var parse_gt_func = function(dest, gtexpr) {
+ var index = gtexpr.indexOf(":");
+ var params;
+ var gtfunc;
+ if (index === -1) {
+ gtfunc = gtexpr;
+ params = [1];
+ } else {
+ gtfunc = gtexpr.substr(0, index);
+ params = _.map(gtexpr.substr(index + 1).split(","), function(index) {
+ var ii = parseInt(index);
+ if (!ii || ii < 1) {
+ throw new Error("invalid 1-based parameter index: " + index);
+ }
+ return ii;
+ });
+ }
+ dest[gtfunc] = params;
+ }
+
+ try {
+ _.each(opts.gtmeth, function(gtexpr) {
+ parse_gt_func(gt_funcs, gtexpr);
+ });
+ _.each(opts.gtfunc, function(gtexpr) {
+ parse_gt_func(gt_extras, gtexpr);
+ });
+ } catch (err) {
+ cmdline_cb(usage + "\n" + err.message);
+ return;
+ }
+
+ // Append implies no header
+ if (opts.append) {
+ opts.nohdr = true;
+ }
+
+ // Validate options & arguments
+ if (opts._.length === 0) {
+ cmdline_cb(usage + "\n" + "No template specified.");
+ return;
+ }
+
+ // Check whether a function call is a gettext call
+ var gettext_func_params = function(funcexpr) {
+ var index = funcexpr.indexOf(".");
+ if (index === -1) {
+ return funcexpr in gt_extras ? gt_extras[funcexpr] : null;
+ } else if (funcexpr.substr(0, index) !== opts.gtobj) {
+ return [];
+ }
+
+ return funcexpr.substr(index + 1) in gt_funcs ? gt_funcs[funcexpr.substr(index + 1)] : [];
+ };
+
+ // Extract text to localise from functions
+ var get_localised_text = function(functext, args) {
+ var params = gettext_func_params(functext);
+ if (params === null || params.length === 0) {
+ return [];
+ }
+
+ // HACK: this is not really safe
+ var index = 1;
+ return _.select(args, function(arg) {
+ return _.include(params, index++) && arg !== null;
+ });
+ };
+
+ // Process each specified template
+ async.forEachSeries(opts._, function(filename, callback) {
+ fs.readFile(filename, 'utf-8', function(err, source) {
+ if (err) {
+ return callback("Error reading template '" + filename + "': " + err.message);
+ }
+
+ // Translations
+ var translations = [];
+
+ // AJS compiler
+ var compiler = new Compiler(source, {
+ filename: filename,
+ tree: true,
+ });
+
+ // Process a function node
+ var process_func = function(func, args) {
+ var functext = compiler[func.type].apply(compiler, func.children);
+
+ var argstext = [];
+ _.each(args, function(arg) {
+ if (arg.type === "N_STRING") {
+ argstext.push(compiler[arg.type].apply(compiler, arg.children));
+ } else {
+ argstext.push(null);
+ }
+ });
+
+ _.each(get_localised_text(functext, argstext), function(text) {
+ translations.push({
+ comments: [filename + ":" + func.line],
+ msgid: text,
+ msgstr: '""',
+ });
+ });
+ };
+
+ // Walk the AST
+ var walker = function(node) {
+ try {
+ if (node.type === "N_CALL") {
+ process_func.apply(compiler, node.children);
+ } else {
+ _.each(_.flatten(node.children), function(child) {
+ if (child) {
+ walker(child);
+ }
+ });
+ }
+ } catch (err) {
+ console.error(node.line + ":" + err.message);
+ }
+ };
+
+ // Walk the AST
+ walker(compiler.compile());
+
+ var fulltime = function(when) {
+ return now.getFullYear() + "-" + now.getMonth() + "-" + now.getDate();
+ };
+
+ // Write out translations template
+ var now = new Date();
+ ajs.render(header.join("\n"), {
+ locals: {
+ header: !opts.nohdr,
+ project: {
+ name: opts.project,
+ version: opts.version,
+ },
+ copyright: opts.copyright,
+ author: "FIRST AUTHOR <EMAIL@ADDRESS>",
+ year: now.getFullYear(),
+ bugs: opts.bugs_addr,
+ now: fulltime(now),
+ details: details,
+ translations: translations,
+ },
+ }, function(rendered) {
+ if (typeof(opts.output) === "object" && "write" in opts.output) {
+ opts.output.write(rendered, "utf-8");
+ callback();
+ } else if (opts.append) {
+
+ // This is all pretty ugly
+ var buffer = new Buffer(rendered);
+ fs.open(opts.output, "a", "0666", function(err, fd) {
+ var write_remaining = function(written) {
+ if (written === buffer.length) {
+ callback();
+ return
+ }
+
+ // Write a chunk
+ fs.write(fd, buffer, written, buffer.length - written, null, function(err2, chunk_written) {
+ if (err2) {
+ callback("Error writing output: " + err2.message);
+ } else {
+ write_remaining(written + chunk_written);
+ }
+ });
+ };
+ write_remaining(0);
+ });
+ } else {
+ try {
+ fs.writeFile(opts.output, rendered, "utf-8", function(err) {
+
+ // Append subsequent extracted text and don't repeat the header
+ opts.append = true;
+ opts.nohdr = true;
+ if (err) {
+ callback("Error writing output to '" + opts.output + "': " + err);
+ } else {
+ callback();
+ }
+ });
+ } catch (err) {
+ callback("Error writing output to '" + opts.output + "': " + err.message);
+ }
+ }
+ });
+ });
+ }, function(err) {
+ cmdline_cb(err);
+ });
+ },
+ };
+}();
+
+exports.cli = extractor.cmdline;
+
+if (require.main === module) {
+ extractor.cmdline(undefined, function(err) {
+ if (err) {
+ console.error(err);
+ process.exit(1);
+ }
+ });
+}
26 package.json
@@ -0,0 +1,26 @@
+{
+ "name": "ajs-xgettext",
+ "version": "0.1.0",
+ "description": "Extract localised text from AJS templates",
+ "main": "lib/extract",
+ "bin": "bin/ajs-xgettext",
+ "dependencies": {
+ "ajs": ">=0.0.4",
+ "async": ">=0.1.6",
+ "nomnom": ">=1.0.0",
+ "underscore": ">=1.1.6"
+ },
+ "devDependencies": {
+ "bufferstream": ">=0.4.6",
+ "expresso": ">=0.8.1",
+ "temp": ">=0.2.0"
+ },
+ "scripts": { "test": "expresso" },
+ "engines": { "node": ">=0.4.8" },
+ "author": "Duane Griffin <duaneg@dghda.com>",
+ "keywords": ["ajs", "ejs", "template", "i18n", "l10n", "gettext"],
+ "repository": {
+ "type": "git",
+ "url" : "https://github.com/duaneg/ajs-xgettext.git"
+ }
+}
13 test/i18n.ajs
@@ -0,0 +1,13 @@
+This is a test of i18n extraction code.
+<%- gt.textdomain("et", "This is not to be translated") %>
+<%- _("This is localised text") %>
+<%- _("This is localised text with a hard-coded parameter:", "param") %>
+<%- _("This is localised text with a variable parameter:", somevar) %>
+<%- gt.dgettext("en", "This is text to output in English") %>
+<%- gt.dgettext("en", avariable) %>
+<%- gt.dgettext("zh_CN", "This is text to output in Chinese") %>
+<%- gt.ngettext("This is text without a plural form", "This is text with %d plural forms", count) %>
+<%- gt.dngettext("zh_CN", "This is Chinese text without a plural form", "This is Chinese text with %d plural forms", count) %>
+<%- gt.gtmeth("This is an additional gt method") %>
+<%- gtfunc("This is an additional gt global function", 0, "...with some odd params...", "<not localised>", "<this is>", "not") %>
+<%- nongt("This isn't to be translated either") %>
132 test/test_extractor.js
@@ -0,0 +1,132 @@
+var assert = require("assert");
+var fs = require("fs");
+var path = require("path");
+var temp = require('temp');
+var BufferStream = require('bufferstream')
+
+var extract = require("../lib/extract");
+
+var testdir = "test";
+
+exports.testNoTemplate = function(beforeExit) {
+ var called = 0;
+
+ extract.cli([], function(err) {
+ called++;
+ assert.equal(err, "usage: ajs-xgettext [options] <template> [<template>...]\nNo template specified.");
+ });
+
+ beforeExit(function() {
+ assert.equal(called, 1);
+ });
+};
+
+exports.testBadTemplate = function(beforeExit) {
+ var called = 0;
+
+ extract.cli(["non-existent file"], function(err) {
+ called++;
+ assert.equal(err, "Error reading template 'non-existent file': ENOENT, No such file or directory 'non-existent file'");
+ });
+
+ beforeExit(function() {
+ assert.equal(called, 1);
+ });
+};
+
+exports.testBadOutput1 = function(beforeExit) {
+ var called = 0;
+
+ extract.cli(["--output=/", path.join(testdir, "i18n.ajs")], function(err) {
+ called++;
+ assert.equal(err, "Error writing output to '/': Error: EISDIR, Is a directory '/'");
+ });
+
+ beforeExit(function() {
+ assert.equal(called, 1);
+ });
+};
+
+exports.testBadFuncSpec = function(beforeExit) {
+ var called = 0;
+
+ extract.cli(["--gtmeth=_:0", path.join(testdir, "i18n.ajs")], function(err) {
+ called++;
+ assert.equal(err, "usage: ajs-xgettext [options] <template> [<template>...]\ninvalid 1-based parameter index: 0");
+ });
+
+ beforeExit(function() {
+ assert.equal(called, 1);
+ });
+};
+
+exports.testExtract = function(beforeExit) {
+ var called = 0;
+ var chunks = [];
+ var output = new BufferStream({encoding: "utf8", size: "flexible"});
+ output.split("\n");
+ output.on("split", function(chunk, token) {
+ chunks.push(chunk.toString());
+ });
+
+ extract.cli(["--omit-header", "--method=gtmeth", "--function=_", "--function=gtfunc:1,3,5", path.join(testdir, "i18n.ajs")], function(err) {
+ called++;
+ assert.isUndefined(err);
+ }, output);
+
+ beforeExit(function() {
+ assert.equal(chunks.length, 52);
+ assert.equal(chunks[2], 'msgid "This is localised text"');
+ assert.equal(chunks[3], 'msgstr ""');
+ assert.equal(chunks[34], 'msgid "This is Chinese text with %d plural forms"');
+ assert.equal(chunks[50], 'msgid "<this is>"');
+ assert.equal(called, 1);
+ });
+};
+
+exports.testWrite = function(beforeExit) {
+ var called = 0;
+ var output_file = temp.path({suffix: ".txt"});
+ var output_stream = new BufferStream({encoding: "utf8", size: "flexible"});
+
+ extract.cli([path.join(testdir, "i18n.ajs")], function(err) {
+ called++;
+ assert.isUndefined(err);
+ }, output_stream);
+
+ extract.cli(["--output", output_file, path.join(testdir, "i18n.ajs")], function(err) {
+ called++;
+ assert.isUndefined(err);
+ });
+
+ beforeExit(function() {
+ assert.equal(called, 2);
+ assert.equal(fs.readFileSync(output_file, "utf-8"), output_stream.toString());
+ fs.unlinkSync(output_file);
+ });
+};
+
+exports.testWriteAppend = function(beforeExit) {
+ var called = 0;
+ var output_file = temp.path({suffix: ".txt"});
+ var output_stream = new BufferStream({encoding: "utf8", size: "flexible"});
+ var prefix = "# Hello\n";
+
+ fs.writeFileSync(output_file, prefix, "utf-8");
+
+ extract.cli(["--omit-header", path.join(testdir, "i18n.ajs")], function(err) {
+ called++;
+ assert.isUndefined(err);
+ }, output_stream);
+
+ extract.cli(["--append", "--output", output_file, path.join(testdir, "i18n.ajs")], function(err) {
+ called++;
+ assert.isUndefined(err);
+ });
+
+ beforeExit(function() {
+ assert.equal(called, 2);
+ assert.equal(fs.readFileSync(output_file, "utf-8"), prefix + output_stream.toString());
+ fs.unlinkSync(output_file);
+ });
+};

0 comments on commit f78afd2

Please sign in to comment.