Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Jonathan Wolter
committed
Dec 11, 2011
1 parent
670f3e1
commit 1729c9f
Showing
8 changed files
with
1,484 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
source "http://rubygems.org" | ||
|
||
gem 'nokogiri' | ||
gem 'rake' | ||
gem "rspec", :require => 'spec' | ||
gem 'zipruby' # this gem because rubyzip does not support in-memory zip file modification |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
require 'rubygems' | ||
require 'nokogiri' | ||
require 'zipruby' | ||
|
||
def log(str) | ||
# puts str | ||
end | ||
|
||
module DocxTemplater | ||
class TemplateProcesser | ||
attr_reader :data | ||
|
||
# data is expected to be a hash of symbols => string or arrays of hashes. | ||
def initialize(data) | ||
@data = data | ||
end | ||
|
||
# naive and innefficient templating. | ||
def render(document) | ||
data.each do |key, value| | ||
if value.class == Array | ||
document = enter_multiple_values(document, key) | ||
document.gsub!("#SUM:#{key.to_s.upcase}#", value.count.to_s) | ||
else | ||
document.gsub!("$#{key.to_s.upcase}$", safe(value)) | ||
end | ||
end | ||
document | ||
end | ||
|
||
private | ||
|
||
def safe(text) | ||
text.to_s.gsub('&', '&').gsub('>', '>').gsub('<', '<') | ||
end | ||
|
||
def enter_multiple_values(document, key) | ||
log "enter_multiple_values for: #{key}" | ||
# TODO ideally we would not re-parse xml doc every time | ||
xml = Nokogiri::XML(document) | ||
|
||
# Often these tags in Word are broken up with various xml entries. Probably need to manually fix word/document.xml before saving the template. | ||
begin_row = "#BEGIN_ROW:#{key.to_s.upcase}#" | ||
end_row = "#END_ROW:#{key.to_s.upcase}#" | ||
begin_row_template = xml.xpath("//w:tr[contains(., '#{begin_row}')]", xml.root.namespaces).first | ||
end_row_template = xml.xpath("//w:tr[contains(., '#{end_row}')]", xml.root.namespaces).first | ||
log "begin_row_template: #{begin_row_template.to_s}" | ||
log "end_row_template: #{end_row_template.to_s}" | ||
raise "unmatched template markers: #{begin_row} nil: #{begin_row_template.nil?}, #{end_row} nil: #{end_row_template.nil?}. This could be because word messed with format, or xml became invalid. See README." unless begin_row_template && end_row_template | ||
|
||
row_templates = [] | ||
row = begin_row_template.next_sibling | ||
while (row != end_row_template) | ||
row_templates.unshift(row) | ||
row = row.next_sibling | ||
end | ||
log "row_templates: (#{row_templates.count}) #{row_templates.map(&:to_s).inspect}" | ||
|
||
# for each data, reversed so they come out in the right order | ||
data[key].reverse.each do |each_data| | ||
log "each_data: #{each_data.inspect}" | ||
|
||
# dup so we have new nodes to append | ||
row_templates.map(&:dup).each do |new_row| | ||
log " new_row: #{new_row}" | ||
innards = new_row.inner_html | ||
if !(matches = innards.scan(/\$EACH:([^\$]+)\$/)).empty? | ||
log " matches: #{matches.inspect}" | ||
matches.map(&:first).each do |each_key| | ||
log " each_key: #{each_key}" | ||
innards.gsub!("$EACH:#{each_key}$", safe(each_data[each_key.downcase.to_sym])) | ||
end | ||
end | ||
# change all the internals of the new node, even if we did not template | ||
new_row.inner_html = innards | ||
#log "new_row new innards: #{new_row.inner_html}" | ||
|
||
# add this row after the template's start | ||
begin_row_template.add_next_sibling(new_row) | ||
end | ||
end | ||
# delete unwanted template rows from document | ||
(row_templates + [begin_row_template, end_row_template]).map(&:unlink) | ||
xml.to_s | ||
end | ||
end | ||
|
||
# Creates a new word document from an existing docx file. (You may need to modify that docx since word | ||
# may munge your templating markup with in-between XML nodes.) | ||
class DocxCreator | ||
attr_reader :template_path, :data, :template_parser | ||
|
||
def initialize(template_path, data) | ||
@template_path = template_path | ||
@template_parser = TemplateProcesser.new(data) | ||
end | ||
|
||
def generate_docx_file(file_name = "output_#{Time.now.strftime("%Y-%m-%d_%H%M")}.docx") | ||
buffer = generate_docx_bytes | ||
File.open(file_name, 'w') { |f| f.write(buffer) } | ||
end | ||
|
||
def generate_docx_bytes | ||
buffer = '' | ||
|
||
# Open the existing template file (no temp files created, just read it) | ||
Zip::Archive.open(template_path) do |template| | ||
n_entries = template.num_files | ||
|
||
# Then create a new file with the output kept in-memory. | ||
Zip::Archive.open_buffer(buffer, Zip::CREATE) do |archive| | ||
n_entries.times do |i| | ||
entry_name = template.get_name(i) | ||
template.fopen(entry_name) do |f| | ||
archive.add_buffer(entry_name, copy_or_template(entry_name, f)) | ||
end | ||
end | ||
end | ||
end | ||
buffer | ||
end | ||
|
||
private | ||
|
||
def copy_or_template(entry_name, f) | ||
# Inside the word document archive is one file with contents of the actual document. Modify it. | ||
return template_parser.render(f.read) if entry_name == 'word/document.xml' | ||
f.read | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
require 'rspec' | ||
require 'render_docx_template' | ||
require 'template_processor_spec' | ||
|
||
describe "integration test" do | ||
let(:data) { DocxTemplater::TestData::DATA } | ||
let(:input_file) { '../ExampleTemplate.docx' } | ||
let(:output_file) { 'spec/tmp/IntegrationTestOutput.docx' } | ||
before { File.delete(output_file) } | ||
|
||
context "should process in incoming docx" do | ||
it "generates a valid zip file (.docx)" do | ||
DocxCreator.new(input_file, data).generate_docx_file(output_file) | ||
|
||
archive = Zip::Archive.open(output_file) | ||
archive.close | ||
|
||
puts "************************************" | ||
puts " >>> Only will work on mac <<<" | ||
puts "NOW attempting to open created file in Word." | ||
cmd = "open #{output_file}" | ||
puts " will run '#{cmd}'" | ||
puts "************************************" | ||
|
||
system cmd | ||
end | ||
|
||
it "generates a file with the same contents as the input docx" do | ||
input_entries = Zip::Archive.open(input_file) { |z| z.map(&:name) } | ||
DocxCreator.new(input_file, data).generate_docx_file(output_file) | ||
output_entries = Zip::Archive.open(output_file) { |z| z.map(&:name) } | ||
|
||
input_entries.should == output_entries | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
require 'rspec' | ||
require 'render_docx_template' | ||
|
||
module DocxTemplater | ||
module TestData | ||
DATA = { | ||
:teacher => "Priya Vora", | ||
:building => "Building #14", | ||
:classroom => :'Rm 202', | ||
:district => "Washington County Public Schools", | ||
:senority => 12.25, | ||
:roster => [ | ||
{:name => 'Sally', :age => 12, :attendence => '100%'}, | ||
{:name => :Xiao, :age => 10, :attendence => '94%'}, | ||
{:name => 'Bryan', :age => 13, :attendence => '100%'}, | ||
{:name => 'Larry', :age => 11, :attendence => '90%'}, | ||
{:name => 'Kumar', :age => 12, :attendence => '76%'}, | ||
{:name => 'Amber', :age => 11, :attendence => '100%'}, | ||
{:name => 'Isaiah', :age => 12, :attendence => '89%'}, | ||
{:name => 'Omar', :age => 12, :attendence => '99%'}, | ||
{:name => 'Xi', :age => 11, :attendence => '20%'}, | ||
{:name => 'Noushin', :age => 12, :attendence => '100%'} | ||
], | ||
:event_reports => [ | ||
{:name => 'Science Museum Field Trip', :notes => 'PTA sponsored event. Spoke to Astronaut with HAM radio.'}, | ||
{:name => 'Wilderness Center Retreat', :notes => '2 days hiking for charity:water fundraiser, $10,200 raised.'} | ||
], | ||
:created_at => "11-12-03 02:01" | ||
} | ||
end | ||
end | ||
|
||
describe DocxTemplater::TemplateProcesser do | ||
let(:data) { Marshal.load(Marshal.dump(DocxTemplater::TestData::DATA)) } # deep copy | ||
let(:xml) { File.read('spec/word/document.xml') } | ||
let(:parser) { DocxTemplater::TemplateProcesser.new(data) } | ||
|
||
context "valid xml" do | ||
it "should render and still be valid XML" do | ||
Nokogiri::XML.parse(xml).should be_xml | ||
out = parser.render(xml) | ||
Nokogiri::XML.parse(out).should be_xml | ||
end | ||
|
||
it "should accept non-ascii characters" do | ||
data[:teacher] = "老师" | ||
out = parser.render(xml) | ||
out.index("老师").should >= 0 | ||
Nokogiri::XML.parse(out).should be_xml | ||
end | ||
|
||
it "should escape as necessary invalid xml characters" do | ||
data[:building] = "23rd & A #1 floor" | ||
data[:classroom] = "--> 201 <!--" | ||
data[:roster][0][:name] = "<#Ai & Bo>" | ||
out = parser.render(xml) | ||
|
||
Nokogiri::XML.parse(out).should be_xml | ||
out.index("23rd & A #1 floor").should >= 0 | ||
out.index("--> 201 <!--").should >= 0 | ||
out.index("<#Ai & Bo>").should >= 0 | ||
end | ||
end | ||
|
||
context "unmatched begin and end row templates" do | ||
it "should not raise" do | ||
xml = <<EOF | ||
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> | ||
<w:body> | ||
<w:tbl> | ||
<w:tr><w:tc> | ||
<w:p> | ||
<w:r><w:t>#BEGIN_ROW:#{:roster.to_s.upcase}#</w:t></w:r> | ||
</w:p> | ||
</w:tc></w:tr> | ||
<w:tr><w:tc> | ||
<w:p> | ||
<w:r><w:t>#END_ROW:#{:roster.to_s.upcase}#</w:t></w:r> | ||
</w:p> | ||
</w:tc></w:tr> | ||
<w:tr><w:tc> | ||
<w:p> | ||
<w:r><w:t>#BEGIN_ROW:#{:event_reports.to_s.upcase}#</w:t></w:r> | ||
</w:p> | ||
</w:tc></w:tr> | ||
<w:tr><w:tc> | ||
<w:p> | ||
<w:r><w:t>#END_ROW:#{:event_reports.to_s.upcase}#</w:t></w:r> | ||
</w:p> | ||
</w:tc></w:tr> | ||
</w:tbl> | ||
</w:body> | ||
</xml> | ||
EOF | ||
expect { parser.render(xml) }.to_not raise_error | ||
end | ||
|
||
it "should raise an exception" do | ||
xml = <<EOF | ||
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> | ||
<w:body> | ||
<w:tbl> | ||
<w:tr><w:tc> | ||
<w:p> | ||
<w:r><w:t>#BEGIN_ROW:#{:roster.to_s.upcase}#</w:t></w:r> | ||
</w:p> | ||
</w:tc></w:tr> | ||
<w:tr><w:tc> | ||
<w:p> | ||
<w:r><w:t>#END_ROW:#{:roster.to_s.upcase}#</w:t></w:r> | ||
</w:p> | ||
</w:tc></w:tr> | ||
<w:tr><w:tc> | ||
<w:p> | ||
<w:r><w:t>#BEGIN_ROW:#{:event_reports.to_s.upcase}#</w:t></w:r> | ||
</w:p> | ||
</w:tc></w:tr> | ||
</w:tbl> | ||
</w:body> | ||
</xml> | ||
EOF | ||
expect { parser.render(xml) }.to raise_error(/#END_ROW:EVENT_REPORTS# nil: true/) | ||
end | ||
end | ||
|
||
it "should replace all simple keys with values" do | ||
non_array_keys = data.reject { |k, v| v.class == Array } | ||
non_array_keys.keys.each do |key| | ||
xml.index("$#{key.to_s.upcase}$").should >= 0 | ||
xml.index(data[key].to_s).should be_nil | ||
end | ||
out = parser.render(xml) | ||
|
||
non_array_keys.each do |key| | ||
out.index("$#{key}$").should be_nil | ||
out.index(data[key].to_s).should >= 0 | ||
end | ||
end | ||
|
||
it "should replace all array keys with values" do | ||
xml.index("#BEGIN_ROW:").should >= 0 | ||
xml.index("#END_ROW:").should >= 0 | ||
xml.index("$EACH:").should >= 0 | ||
|
||
out = parser.render(xml) | ||
|
||
out.index("#BEGIN_ROW:").should be_nil | ||
out.index("#END_ROW:").should be_nil | ||
out.index("$EACH:").should be_nil | ||
|
||
[:roster, :event_reports].each do |key| | ||
data[key].each do |row| | ||
row.values.map(&:to_s).each do |row_value| | ||
out.index(row_value).should >= 0 | ||
end | ||
end | ||
end | ||
end | ||
|
||
it "shold render students names in the same order as the data" do | ||
out = parser.render(xml) | ||
out.index('Sally').should >= 0 | ||
out.index('Kumar').should >= 0 | ||
out.index('Kumar').should > out.index('Sally') | ||
end | ||
|
||
it "shold render event reports names in the same order as the data" do | ||
out = parser.render(xml) | ||
out.index('Science Museum Field Trip').should >= 0 | ||
out.index('Wilderness Center Retreat').should >= 0 | ||
out.index('Wilderness Center Retreat').should > out.index('Science Museum Field Trip') | ||
end | ||
|
||
it "should render 2-line event reports in same order as docx" do | ||
event_reports_starting_at = xml.index("#BEGIN_ROW:EVENT_REPORTS#") | ||
event_reports_starting_at.should >= 0 | ||
xml.index("$EACH:NAME$", event_reports_starting_at).should > event_reports_starting_at | ||
xml.index("$EACH:NOTES$", event_reports_starting_at).should > event_reports_starting_at | ||
xml.index("$EACH:NOTES$", event_reports_starting_at).should > xml.index("$EACH:NAME$", event_reports_starting_at) | ||
|
||
out = parser.render(xml) | ||
out.index('PTA sponsored event. Spoke to Astronaut with HAM radio.').should > out.index('Science Museum Field Trip') | ||
end | ||
|
||
it "should render sums of input data" do | ||
xml.index("#SUM").should >= 0 | ||
out = parser.render(xml) | ||
out.index("#SUM").should be_nil | ||
out.index("#{data[:roster].count} Students").should >= 0 | ||
out.index("#{data[:event_reports].count} Events").should >= 0 | ||
end | ||
end |
Oops, something went wrong.