Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

Support Vietnamsese when generating header ID #35

Closed
wants to merge 4 commits into
from
@@ -164,7 +164,21 @@ def extract_code_language!(attr)
# Uses the option +auto_id_prefix+: the value of this option is prepended to every generated
# ID.
def generate_id(str)
- gen_id = str.gsub(/^[^a-zA-Z]+/, '')
+ # icy (2013-feb-10): mutable object is just crazy! you can't use
+ # for example `gen_id = str` here because that will update
+ # the original string `str` and that will make `kramdown` failed.
+ gen_id = String.new(str)
+ if @options[:vietnamese_ids]
+ # Convert Vietnamese accents to non-accent version
+ # This will help the link readable
+ vntext_src = "áàảãạăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệíìỉĩịóòỏõọôốồổỗộơớờởỡợúùủũụưứừửữựýỳỷỹỵ"
+ vntext_dst = "aaaaaaaaaaaaaaaaadeeeeeeeeeeeiiiiioooooooooooooooouuuuuuuuuuuuyyyyy"
+ vntext_src += "ÁÀẢÃẠĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÍÌỈĨỊÓÒỎÕỌÔỐỒỔỖỘƠỚỜỞỠỢÚÙỦŨỤƯỨỪỬỮỰÝỲỶỸỴ"
+ vntext_dst += vntext_dst.upcase
+
+ gen_id.tr!(vntext_src, vntext_dst)
+ end
+ gen_id.gsub!(/^[^a-zA-Z]+/, '')
gen_id.tr!('^a-zA-Z0-9 -', '')
gen_id.tr!(' ', '-')
gen_id.downcase!
View
@@ -176,6 +176,23 @@ def self.simple_array_validator(val, name, size)
Used by: all converters
EOF
+ define(:vietnamese_ids, Boolean, true, <<EOF)
+Support Vietnamsese when generating header ID
+
+If this option is `true` (and when `auto_ids` is true) ID values in
+Vietnamese will be converted to a readable form (non-accent version
+of the origin text).
+
+This option should be used only if `auto_ids` is true.
+
+Example: "Đây-là-ví-dụ" will generate new ID `day-la-vi-du` which is very
+readable by Vietnamese. When this option is `false`, its ID should be
+`y-l-v-d` which is unreadable (non-sense ID).
+
+Default: true
+Used by: HTML/Latex convertor
+EOF
+
define(:auto_ids, Boolean, true, <<EOF)
Use automatic header ID generation
View
@@ -40,7 +40,7 @@ class TestFiles < Test::Unit::TestCase
next if !Kramdown::Converter.const_defined?(output_format[0..0].upcase + output_format[1..-1])
define_method('test_' + text_file.tr('.', '_') + "_to_#{output_format}") do
opts_file = File.join(File.dirname(text_file), 'options') if !File.exist?(opts_file)
- options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :footnote_nr => 1}
+ options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :footnote_nr => 1, :vietnamese_ids => false}
doc = Kramdown::Document.new(File.read(text_file), options)
assert_equal(File.read(output_file), doc.send("to_#{output_format}"))
end
@@ -57,14 +57,15 @@ class TestFiles < Test::Unit::TestCase
'test/testcases/span/03_codespan/highlighting.html', # bc of span elements inside code element
'test/testcases/block/04_header/with_auto_ids.html', # bc of auto_ids=true option
'test/testcases/block/04_header/header_type_offset.html', # bc of header_offset option
+ 'test/testcases/block/04_header/with_auto_ids_vietnamese.html', # bc of auto_ids=true option
]
Dir[File.dirname(__FILE__) + '/testcases/**/*.{html, htmlinput}'].each do |html_file|
next if EXCLUDE_HTML_FILES.any? {|f| html_file =~ /#{f}$/}
out_file = (html_file =~ /\.htmlinput$/ ? html_file.sub(/input$/, '') : html_file)
define_method('test_' + html_file.tr('.', '_') + "_to_html") do
opts_file = html_file.sub(/\.html$/, '.options')
opts_file = File.join(File.dirname(html_file), 'options') if !File.exist?(opts_file)
- options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :footnote_nr => 1}
+ options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :vietnamese_ids => false, :footnote_nr => 1}
doc = Kramdown::Document.new(File.read(html_file), options.merge(:input => 'html'))
assert_equal(tidy_output(File.read(out_file)), tidy_output(doc.to_html))
end
@@ -92,12 +93,14 @@ def tidy_output(out)
EXCLUDE_LATEX_FILES = ['test/testcases/span/01_link/image_in_a.text', # bc of image link
'test/testcases/span/01_link/imagelinks.text', # bc of image links
'test/testcases/span/04_footnote/markers.text', # bc of footnote in header
+ 'test/testcases/block/04_header/with_auto_ids_vietnamese.text', # bc of requirement of vietnamese support in latex
]
Dir[File.dirname(__FILE__) + '/testcases/**/*.text'].each do |text_file|
next if EXCLUDE_LATEX_FILES.any? {|f| text_file =~ /#{f}$/}
define_method('test_' + text_file.tr('.', '_') + "_to_latex_compilation") do
latex = Kramdown::Document.new(File.read(text_file),
:auto_ids => false, :footnote_nr => 1,
+ :vietnamese_ids => false,
:template => 'document').to_latex
result = IO.popen("latex -output-directory='#{Dir.tmpdir}' 2>/dev/null", 'r+') do |io|
io.write(latex)
@@ -125,6 +128,7 @@ def tidy_output(out)
'test/testcases/block/11_ial/simple.text', # bc of change of ordering of attributes in header
'test/testcases/span/extension/comment.text', # bc of comment text modifications (can this be avoided?)
'test/testcases/block/04_header/header_type_offset.text', # bc of header_offset being applied twice
+ 'test/testcases/block/04_header/with_auto_ids_vietnamese.text', # bc of auto_ids = true
]
Dir[File.dirname(__FILE__) + '/testcases/**/*.text'].each do |text_file|
next if EXCLUDE_TEXT_FILES.any? {|f| text_file =~ /#{f}$/}
@@ -133,7 +137,7 @@ def tidy_output(out)
html_file += '.19' if RUBY_VERSION >= '1.9' && File.exist?(html_file + '.19')
opts_file = text_file.sub(/\.text$/, '.options')
opts_file = File.join(File.dirname(text_file), 'options') if !File.exist?(opts_file)
- options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :footnote_nr => 1}
+ options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :vietnamese_ids => false, :footnote_nr => 1}
kdtext = Kramdown::Document.new(File.read(text_file), options).to_kramdown
html = Kramdown::Document.new(kdtext, options).to_html
assert_equal(tidy_output(File.read(html_file)), tidy_output(html))
@@ -158,14 +162,15 @@ def tidy_output(out)
'test/testcases/block/04_header/with_auto_ids.html', # bc of auto_ids=true option
'test/testcases/block/04_header/header_type_offset.html', # bc of header_offset option
'test/testcases/block/16_toc/toc_exclude.html', # bc of different attribute ordering
+ 'test/testcases/block/04_header/with_auto_ids_vietnamese.html', # need utf8 in tidy, auto_ids = true
]
Dir[File.dirname(__FILE__) + '/testcases/**/*.html'].each do |html_file|
next if EXCLUDE_HTML_KD_FILES.any? {|f| html_file =~ /#{f}$/}
define_method('test_' + html_file.tr('.', '_') + "_to_kramdown_to_html") do
- kd = Kramdown::Document.new(File.read(html_file), :input => 'html', :auto_ids => false, :footnote_nr => 1).to_kramdown
+ kd = Kramdown::Document.new(File.read(html_file), :input => 'html', :vietnamse_ids => false, :auto_ids => false, :footnote_nr => 1).to_kramdown
opts_file = html_file.sub(/\.html$/, '.options')
opts_file = File.join(File.dirname(html_file), 'options') if !File.exist?(opts_file)
- options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :footnote_nr => 1}
+ options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :vietnamse_ids => false, :footnote_nr => 1}
doc = Kramdown::Document.new(kd, options)
assert_equal(tidy_output(File.read(html_file)), tidy_output(doc.to_html))
end
@@ -177,7 +182,7 @@ def tidy_output(out)
# Generate test methods for asserting that converters don't modify the document tree.
Dir[File.dirname(__FILE__) + '/testcases/**/*.text'].each do |text_file|
opts_file = text_file.sub(/\.text$/, '.options')
- options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :footnote_nr => 1}
+ options = File.exist?(opts_file) ? YAML::load(File.read(opts_file)) : {:auto_ids => false, :vietnamese_ids => false, :footnote_nr => 1}
(Kramdown::Converter.constants.map {|c| c.to_sym} - [:Base, :RemoveHtmlTags]).each do |conv_class|
define_method("test_whether_#{conv_class}_modifies_tree_with_file_#{text_file.tr('.', '_')}") do
doc = Kramdown::Document.new(File.read(text_file), options)
@@ -1 +1,2 @@
:auto_ids: true
+:vietnamse_ids: false
@@ -0,0 +1 @@
+<h1 id="day-la-vi-du-voi-tieng-viet">Đây là ví dụ với tiếng Việt</h1>
@@ -0,0 +1,2 @@
+:auto_ids: true
+:vietnamese_ids: true
@@ -0,0 +1 @@
+# Đây là ví dụ với tiếng Việt