Permalink
Browse files

Initial template based rendering

  • Loading branch information...
deorus committed Feb 17, 2015
1 parent 637e07c commit 1a01813f038f0ec11660f46589a02ed8951249f9
Showing with 69 additions and 10 deletions.
  1. +17 −0 README.md
  2. +5 −0 pom.xml
  3. +47 −10 src/main/java/com/mapr/synth/Synth.java
@@ -465,3 +465,20 @@ There are three basic strategies supported:
* *OPTIMISTIC* With this convention, no quoting of strings is done. This should not normally be used since it is very easy to get unparseable data.

The default convention is DOUBLE_QUOTE.

Template based Data Generation
=====================

This approach uses Freemarker templating engine to render custom templates. The data variables in the template are fed from a specified schema.

## Command-line options:

`-format TEMPLATE` to have the output format coming from the template

`-template file` link to a Freemarker template

`-schema file` to specify the schema (see above)

## Template notation

To print the value of a variable in the template, use ${name.asText()} placeholder.
@@ -76,6 +76,11 @@
<artifactId>jackson-databind</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.freemarker</groupId>
<artifactId>freemarker</artifactId>
<version>2.3.21</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
@@ -24,17 +24,18 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.mapr.synth.samplers.SchemaSampler;
import freemarker.template.Configuration;
import freemarker.template.Template;
import freemarker.template.TemplateException;
import freemarker.template.TemplateExceptionHandler;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.OptionDef;
import org.kohsuke.args4j.spi.IntOptionHandler;
import org.kohsuke.args4j.spi.Setter;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.io.*;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadMXBean;
import java.nio.file.Files;
@@ -64,14 +65,18 @@ public static void main(String[] args) throws IOException, CmdLineException, Int
"[ -count <number>G|M|K ] " +
"-schema schema-file " +
"[-quote DOUBLE_QUOTE|BACK_SLASH|OPTIMISTIC] " +
"[-format JSON|TSV|CSV ] " +
"[-format JSON|TSV|CSV|TEMPLATE ] " +
"[-threads n] " +
"[-output output-directory-name] ");
throw e;
}

Preconditions.checkArgument(opts.threads > 0 && opts.threads <= 2000,
"Must have at least one thread and no more than 2000");

Preconditions.checkArgument(opts.format == Format.TEMPLATE && opts.template != null,
"Please specify a template file");

if (opts.threads > 1) {
Preconditions.checkArgument(!"-".equals(opts.output),
"If more than on thread is used, you have to use -output to set the output directory");
@@ -226,9 +231,30 @@ public static void header(Format format, List<String> names, PrintStream out) {
}

public static int generateFile(Options opts, SchemaSampler s, PrintStream out, int count) {
for (int i = 0; i < count; i++) {
format(opts.format, opts.quote, s.getFieldNames(), s.sample(), out);
if (opts.format == Format.TEMPLATE) {
Configuration cfg = new Configuration(Configuration.VERSION_2_3_21);
cfg.setDefaultEncoding("UTF-8");
cfg.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);

Template template = null;
try {
cfg.setDirectoryForTemplateLoading(opts.template.getParentFile());
template = cfg.getTemplate(opts.template.getName());
} catch (IOException e) {
e.printStackTrace();
}

PrintWriter writer = new PrintWriter(out);
for (int i = 0; i < count; i++) {
formatTemplate(opts.format, opts.quote, s.getFieldNames(), s.sample(), template, writer);
}

} else {
for (int i = 0; i < count; i++) {
format(opts.format, opts.quote, s.getFieldNames(), s.sample(), out);
}
}

return count;
}

@@ -270,10 +296,18 @@ public ThreadReport() {
}
}


static Joiner withCommas = Joiner.on(",");
static Joiner withTabs = Joiner.on("\t");

private static void formatTemplate(Format format, Quote quoteConvention, List<String> names, JsonNode fields, Template temp, PrintWriter writer) {
try {
temp.process(fields, writer);
} catch (TemplateException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

private static void format(Format format, Quote quoteConvention, List<String> names, JsonNode fields, PrintStream out) {
switch (format) {
@@ -309,7 +343,7 @@ private static void printDelimited(Quote quoteConvention, List<String> names, Js
}

public static enum Format {
JSON, TSV, CSV
JSON, TSV, CSV, TEMPLATE
}

public static enum Quote {
@@ -326,9 +360,12 @@ private static void printDelimited(Quote quoteConvention, List<String> names, Js
@Option(name = "-count", handler = SizeParser.class)
int count = 1000;

@Option(name = "-schema")
@Option(name = "-schema", required = false)
File schema;

@Option(name = "-template", required = false)
File template;

@Option(name = "-format")
Format format = Format.CSV;

0 comments on commit 1a01813

Please sign in to comment.