Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added new option transliterated_header_ids

  • Loading branch information...
commit a2bcbabd56fa195a6e1664f25996cd7d1a03047a 1 parent b9f220c
Thomas Leitner authored
2  doc/converter/html.page
View
@@ -151,7 +151,7 @@ Here is an example:
The HTML converter supports the following options:
-{options: {items: [auto_ids, template, footnote_nr, entity_output, smart_quotes, toc_levels, enable_coderay, coderay_wrap, coderay_line_numbers, coderay_line_number_start, coderay_tab_width, coderay_bold_every, coderay_css, coderay_default_lang]}}
+{options: {items: [auto_ids, transliterated_header_ids, template, footnote_nr, entity_output, smart_quotes, toc_levels, enable_coderay, coderay_wrap, coderay_line_numbers, coderay_line_number_start, coderay_tab_width, coderay_bold_every, coderay_css, coderay_default_lang]}}
{include_file: doc/links.markdown}
2  doc/converter/latex.page
View
@@ -99,7 +99,7 @@ lists; code blocks that use the `listings` environment; tables; and math blocks.
The LaTeX converter supports the following options:
-{options: {items: [auto_ids, toc_levels, latex_headers, smart_quotes, template]}}
+{options: {items: [auto_ids, transliterated_header_ids, toc_levels, latex_headers, smart_quotes, template]}}
{include_file: doc/links.markdown}
2  lib/kramdown/converter/base.rb
View
@@ -8,6 +8,7 @@
#
require 'erb'
+require 'kramdown/utils'
module Kramdown
@@ -152,6 +153,7 @@ def extract_code_language!(attr)
# Uses the option +auto_id_prefix+: the value of this option is prepended to every generated
# ID.
def generate_id(str)
+ str = ::Kramdown::Utils::Unidecoder.decode(str) if @options[:transliterated_header_ids]
gen_id = str.gsub(/^[^a-zA-Z]+/, '')
gen_id.tr!('^a-zA-Z0-9 -', '')
gen_id.tr!(' ', '-')
12 lib/kramdown/options.rb
View
@@ -188,6 +188,18 @@ def self.simple_array_validator(val, name, size)
Used by: HTML/Latex converter
EOF
+ define(:transliterated_header_ids, Boolean, false, <<EOF)
+Transliterate the header text before generating the ID
+
+Only ASCII characters are used in headers IDs. This is not good for
+languages with many non-ASCII characters. By enabling this option
+the header text is transliterated to ASCII as good as possible so that
+the resulting header ID is more useful.
+
+Default: false
+Used by: HTML/Latex converter
+EOF
+
define(:parse_block_html, Boolean, false, <<EOF)
Process kramdown syntax in block HTML tags
1  lib/kramdown/utils.rb
View
@@ -18,6 +18,7 @@ module Utils
autoload :Entities, 'kramdown/utils/entities'
autoload :Html, 'kramdown/utils/html'
autoload :OrderedHash, 'kramdown/utils/ordered_hash'
+ autoload :Unidecoder, 'kramdown/utils/unidecoder'
# Treat +name+ as if it were snake cased (e.g. snake_case) and camelize it (e.g. SnakeCase).
def self.camelize(name)
43 lib/kramdown/utils/unidecoder.rb
View
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+#
+#--
+# Copyright (C) 2009-2013 Thomas Leitner <t_leitner@gmx.at>
+#
+# This file is part of kramdown which is licensed under the MIT.
+#++
+#
+# This file is based on code originally from the Stringex library and needs the data files from
+# Stringex to work correctly.
+
+module Kramdown
+ module Utils
+
+ # Provides the ability to tranliterate Unicode strings into plain ASCII ones.
+ module Unidecoder
+
+ if RUBY_VERSION <= '1.8.6'
+ def self.decode(string)
+ string
+ end
+ else
+
+ require 'stringex/unidecoder' # dummy require so that we can get at the data files
+
+ # Transliterate string from Unicode into ASCII.
+ def self.decode(string)
+ string.gsub(/[^\x00-\x7f]/u) do |codepoint|
+ begin
+ unpacked = codepoint.unpack("U")[0]
+ Stringex::Unidecoder::CODEPOINTS["x%02x" % (unpacked >> 8)][unpacked & 255]
+ rescue
+ "?"
+ end
+ end
+ end
+
+ end
+
+ end
+
+ end
+end
7 test/test_files.rb
View
@@ -16,8 +16,12 @@
class TestFiles < Test::Unit::TestCase
+ EXCLUDE_KD_FILES = [('test/testcases/block/04_header/with_auto_ids.text' if RUBY_VERSION <= '1.8.6'), # bc of dep stringex not working
+ ].compact
+
# Generate test methods for kramdown-to-xxx conversion
Dir[File.dirname(__FILE__) + '/testcases/**/*.text'].each do |text_file|
+ next if EXCLUDE_KD_FILES.any? {|f| text_file =~ /#{f}$/}
basename = text_file.sub(/\.text$/, '')
opts_file = text_file.sub(/\.text$/, '.options')
(Dir[basename + ".*"] - [text_file, opts_file]).each do |output_file|
@@ -114,7 +118,8 @@ def tidy_output(out)
'test/testcases/block/11_ial/simple.text', # bc of change of ordering of attributes in header
'test/testcases/span/extension/comment.text', # bc of comment text modifications (can this be avoided?)
'test/testcases/block/04_header/header_type_offset.text', # bc of header_offset being applied twice
- ]
+ ('test/testcases/block/04_header/with_auto_ids.text' if RUBY_VERSION <= '1.8.6'), # bc of dep stringex not working
+ ].compact
Dir[File.dirname(__FILE__) + '/testcases/**/*.text'].each do |text_file|
next if EXCLUDE_TEXT_FILES.any? {|f| text_file =~ /#{f}$/}
define_method('test_' + text_file.tr('.', '_') + "_to_kramdown_to_html") do
2  test/testcases/block/04_header/with_auto_ids.html
View
@@ -17,3 +17,5 @@ <h1 id="section-1">33333</h1>
<h2 id="hallo-2">hallO</h2>
<h1>Header without ID</h1>
+
+<h1 id="transliterated-day-la-vi-du">Transliterated: Đây-là-ví-dụ</h1>
1  test/testcases/block/04_header/with_auto_ids.options
View
@@ -1 +1,2 @@
:auto_ids: true
+:transliterated_header_ids: true
2  test/testcases/block/04_header/with_auto_ids.text
View
@@ -20,3 +20,5 @@ Not now
# Header without ID
{: id=""}
+
+# Transliterated: Đây-là-ví-dụ
Please sign in to comment.
Something went wrong with that request. Please try again.