Skip to content

Commit

Permalink
fixing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan Wolter committed Dec 11, 2011
1 parent 670f3e1 commit 1729c9f
Show file tree
Hide file tree
Showing 8 changed files with 1,484 additions and 0 deletions.
Binary file added ExampleTemplate.docx
Binary file not shown.
Binary file added ExampleTemplate_RenderedOutput.docx
Binary file not shown.
1 change: 1 addition & 0 deletions Gemfile
@@ -1,5 +1,6 @@
source "http://rubygems.org"

gem 'nokogiri'
gem 'rake'
gem "rspec", :require => 'spec'
gem 'zipruby' # this gem because rubyzip does not support in-memory zip file modification
2 changes: 2 additions & 0 deletions Gemfile.lock
Expand Up @@ -3,6 +3,7 @@ GEM
specs:
diff-lcs (1.1.3)
nokogiri (1.5.0)
rake (0.9.2.2)
rspec (2.7.0)
rspec-core (~> 2.7.0)
rspec-expectations (~> 2.7.0)
Expand All @@ -18,5 +19,6 @@ PLATFORMS

DEPENDENCIES
nokogiri
rake
rspec
zipruby
131 changes: 131 additions & 0 deletions render_docx_template.rb
@@ -0,0 +1,131 @@
require 'rubygems'
require 'nokogiri'
require 'zipruby'

def log(str)
# puts str
end

module DocxTemplater
class TemplateProcesser
attr_reader :data

# data is expected to be a hash of symbols => string or arrays of hashes.
def initialize(data)
@data = data
end

# naive and innefficient templating.
def render(document)
data.each do |key, value|
if value.class == Array
document = enter_multiple_values(document, key)
document.gsub!("#SUM:#{key.to_s.upcase}#", value.count.to_s)
else
document.gsub!("$#{key.to_s.upcase}$", safe(value))
end
end
document
end

private

def safe(text)
text.to_s.gsub('&', '&amp;').gsub('>', '&gt;').gsub('<', '&lt;')
end

def enter_multiple_values(document, key)
log "enter_multiple_values for: #{key}"
# TODO ideally we would not re-parse xml doc every time
xml = Nokogiri::XML(document)

# Often these tags in Word are broken up with various xml entries. Probably need to manually fix word/document.xml before saving the template.
begin_row = "#BEGIN_ROW:#{key.to_s.upcase}#"
end_row = "#END_ROW:#{key.to_s.upcase}#"
begin_row_template = xml.xpath("//w:tr[contains(., '#{begin_row}')]", xml.root.namespaces).first
end_row_template = xml.xpath("//w:tr[contains(., '#{end_row}')]", xml.root.namespaces).first
log "begin_row_template: #{begin_row_template.to_s}"
log "end_row_template: #{end_row_template.to_s}"
raise "unmatched template markers: #{begin_row} nil: #{begin_row_template.nil?}, #{end_row} nil: #{end_row_template.nil?}. This could be because word messed with format, or xml became invalid. See README." unless begin_row_template && end_row_template

row_templates = []
row = begin_row_template.next_sibling
while (row != end_row_template)
row_templates.unshift(row)
row = row.next_sibling
end
log "row_templates: (#{row_templates.count}) #{row_templates.map(&:to_s).inspect}"

# for each data, reversed so they come out in the right order
data[key].reverse.each do |each_data|
log "each_data: #{each_data.inspect}"

# dup so we have new nodes to append
row_templates.map(&:dup).each do |new_row|
log " new_row: #{new_row}"
innards = new_row.inner_html
if !(matches = innards.scan(/\$EACH:([^\$]+)\$/)).empty?
log " matches: #{matches.inspect}"
matches.map(&:first).each do |each_key|
log " each_key: #{each_key}"
innards.gsub!("$EACH:#{each_key}$", safe(each_data[each_key.downcase.to_sym]))
end
end
# change all the internals of the new node, even if we did not template
new_row.inner_html = innards
#log "new_row new innards: #{new_row.inner_html}"

# add this row after the template's start
begin_row_template.add_next_sibling(new_row)
end
end
# delete unwanted template rows from document
(row_templates + [begin_row_template, end_row_template]).map(&:unlink)
xml.to_s
end
end

# Creates a new word document from an existing docx file. (You may need to modify that docx since word
# may munge your templating markup with in-between XML nodes.)
class DocxCreator
attr_reader :template_path, :data, :template_parser

def initialize(template_path, data)
@template_path = template_path
@template_parser = TemplateProcesser.new(data)
end

def generate_docx_file(file_name = "output_#{Time.now.strftime("%Y-%m-%d_%H%M")}.docx")
buffer = generate_docx_bytes
File.open(file_name, 'w') { |f| f.write(buffer) }
end

def generate_docx_bytes
buffer = ''

# Open the existing template file (no temp files created, just read it)
Zip::Archive.open(template_path) do |template|
n_entries = template.num_files

# Then create a new file with the output kept in-memory.
Zip::Archive.open_buffer(buffer, Zip::CREATE) do |archive|
n_entries.times do |i|
entry_name = template.get_name(i)
template.fopen(entry_name) do |f|
archive.add_buffer(entry_name, copy_or_template(entry_name, f))
end
end
end
end
buffer
end

private

def copy_or_template(entry_name, f)
# Inside the word document archive is one file with contents of the actual document. Modify it.
return template_parser.render(f.read) if entry_name == 'word/document.xml'
f.read
end
end
end
36 changes: 36 additions & 0 deletions spec/integration_spec.rb
@@ -0,0 +1,36 @@
require 'rspec'
require 'render_docx_template'
require 'template_processor_spec'

describe "integration test" do
let(:data) { DocxTemplater::TestData::DATA }
let(:input_file) { '../ExampleTemplate.docx' }
let(:output_file) { 'spec/tmp/IntegrationTestOutput.docx' }
before { File.delete(output_file) }

context "should process in incoming docx" do
it "generates a valid zip file (.docx)" do
DocxCreator.new(input_file, data).generate_docx_file(output_file)

archive = Zip::Archive.open(output_file)
archive.close

puts "************************************"
puts " >>> Only will work on mac <<<"
puts "NOW attempting to open created file in Word."
cmd = "open #{output_file}"
puts " will run '#{cmd}'"
puts "************************************"

system cmd
end

it "generates a file with the same contents as the input docx" do
input_entries = Zip::Archive.open(input_file) { |z| z.map(&:name) }
DocxCreator.new(input_file, data).generate_docx_file(output_file)
output_entries = Zip::Archive.open(output_file) { |z| z.map(&:name) }

input_entries.should == output_entries
end
end
end
192 changes: 192 additions & 0 deletions spec/template_processor_spec.rb
@@ -0,0 +1,192 @@
require 'rspec'
require 'render_docx_template'

module DocxTemplater
module TestData
DATA = {
:teacher => "Priya Vora",
:building => "Building #14",
:classroom => :'Rm 202',
:district => "Washington County Public Schools",
:senority => 12.25,
:roster => [
{:name => 'Sally', :age => 12, :attendence => '100%'},
{:name => :Xiao, :age => 10, :attendence => '94%'},
{:name => 'Bryan', :age => 13, :attendence => '100%'},
{:name => 'Larry', :age => 11, :attendence => '90%'},
{:name => 'Kumar', :age => 12, :attendence => '76%'},
{:name => 'Amber', :age => 11, :attendence => '100%'},
{:name => 'Isaiah', :age => 12, :attendence => '89%'},
{:name => 'Omar', :age => 12, :attendence => '99%'},
{:name => 'Xi', :age => 11, :attendence => '20%'},
{:name => 'Noushin', :age => 12, :attendence => '100%'}
],
:event_reports => [
{:name => 'Science Museum Field Trip', :notes => 'PTA sponsored event. Spoke to Astronaut with HAM radio.'},
{:name => 'Wilderness Center Retreat', :notes => '2 days hiking for charity:water fundraiser, $10,200 raised.'}
],
:created_at => "11-12-03 02:01"
}
end
end

describe DocxTemplater::TemplateProcesser do
let(:data) { Marshal.load(Marshal.dump(DocxTemplater::TestData::DATA)) } # deep copy
let(:xml) { File.read('spec/word/document.xml') }
let(:parser) { DocxTemplater::TemplateProcesser.new(data) }

context "valid xml" do
it "should render and still be valid XML" do
Nokogiri::XML.parse(xml).should be_xml
out = parser.render(xml)
Nokogiri::XML.parse(out).should be_xml
end

it "should accept non-ascii characters" do
data[:teacher] = "老师"
out = parser.render(xml)
out.index("老师").should >= 0
Nokogiri::XML.parse(out).should be_xml
end

it "should escape as necessary invalid xml characters" do
data[:building] = "23rd & A #1 floor"
data[:classroom] = "--> 201 <!--"
data[:roster][0][:name] = "<#Ai & Bo>"
out = parser.render(xml)

Nokogiri::XML.parse(out).should be_xml
out.index("23rd &amp; A #1 floor").should >= 0
out.index("--&gt; 201 &lt;!--").should >= 0
out.index("&lt;#Ai &amp; Bo&gt;").should >= 0
end
end

context "unmatched begin and end row templates" do
it "should not raise" do
xml = <<EOF
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:tbl>
<w:tr><w:tc>
<w:p>
<w:r><w:t>#BEGIN_ROW:#{:roster.to_s.upcase}#</w:t></w:r>
</w:p>
</w:tc></w:tr>
<w:tr><w:tc>
<w:p>
<w:r><w:t>#END_ROW:#{:roster.to_s.upcase}#</w:t></w:r>
</w:p>
</w:tc></w:tr>
<w:tr><w:tc>
<w:p>
<w:r><w:t>#BEGIN_ROW:#{:event_reports.to_s.upcase}#</w:t></w:r>
</w:p>
</w:tc></w:tr>
<w:tr><w:tc>
<w:p>
<w:r><w:t>#END_ROW:#{:event_reports.to_s.upcase}#</w:t></w:r>
</w:p>
</w:tc></w:tr>
</w:tbl>
</w:body>
</xml>
EOF
expect { parser.render(xml) }.to_not raise_error
end

it "should raise an exception" do
xml = <<EOF
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:tbl>
<w:tr><w:tc>
<w:p>
<w:r><w:t>#BEGIN_ROW:#{:roster.to_s.upcase}#</w:t></w:r>
</w:p>
</w:tc></w:tr>
<w:tr><w:tc>
<w:p>
<w:r><w:t>#END_ROW:#{:roster.to_s.upcase}#</w:t></w:r>
</w:p>
</w:tc></w:tr>
<w:tr><w:tc>
<w:p>
<w:r><w:t>#BEGIN_ROW:#{:event_reports.to_s.upcase}#</w:t></w:r>
</w:p>
</w:tc></w:tr>
</w:tbl>
</w:body>
</xml>
EOF
expect { parser.render(xml) }.to raise_error(/#END_ROW:EVENT_REPORTS# nil: true/)
end
end

it "should replace all simple keys with values" do
non_array_keys = data.reject { |k, v| v.class == Array }
non_array_keys.keys.each do |key|
xml.index("$#{key.to_s.upcase}$").should >= 0
xml.index(data[key].to_s).should be_nil
end
out = parser.render(xml)

non_array_keys.each do |key|
out.index("$#{key}$").should be_nil
out.index(data[key].to_s).should >= 0
end
end

it "should replace all array keys with values" do
xml.index("#BEGIN_ROW:").should >= 0
xml.index("#END_ROW:").should >= 0
xml.index("$EACH:").should >= 0

out = parser.render(xml)

out.index("#BEGIN_ROW:").should be_nil
out.index("#END_ROW:").should be_nil
out.index("$EACH:").should be_nil

[:roster, :event_reports].each do |key|
data[key].each do |row|
row.values.map(&:to_s).each do |row_value|
out.index(row_value).should >= 0
end
end
end
end

it "shold render students names in the same order as the data" do
out = parser.render(xml)
out.index('Sally').should >= 0
out.index('Kumar').should >= 0
out.index('Kumar').should > out.index('Sally')
end

it "shold render event reports names in the same order as the data" do
out = parser.render(xml)
out.index('Science Museum Field Trip').should >= 0
out.index('Wilderness Center Retreat').should >= 0
out.index('Wilderness Center Retreat').should > out.index('Science Museum Field Trip')
end

it "should render 2-line event reports in same order as docx" do
event_reports_starting_at = xml.index("#BEGIN_ROW:EVENT_REPORTS#")
event_reports_starting_at.should >= 0
xml.index("$EACH:NAME$", event_reports_starting_at).should > event_reports_starting_at
xml.index("$EACH:NOTES$", event_reports_starting_at).should > event_reports_starting_at
xml.index("$EACH:NOTES$", event_reports_starting_at).should > xml.index("$EACH:NAME$", event_reports_starting_at)

out = parser.render(xml)
out.index('PTA sponsored event. Spoke to Astronaut with HAM radio.').should > out.index('Science Museum Field Trip')
end

it "should render sums of input data" do
xml.index("#SUM").should >= 0
out = parser.render(xml)
out.index("#SUM").should be_nil
out.index("#{data[:roster].count} Students").should >= 0
out.index("#{data[:event_reports].count} Events").should >= 0
end
end

0 comments on commit 1729c9f

Please sign in to comment.