Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6337565
commit 79639d7
Showing
7 changed files
with
419 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
module Qiita | ||
module Markdown | ||
module Filters | ||
# A filter for simplifying document structure by removing complex markups | ||
# (mainly block elements) and complex contents. | ||
# | ||
# The logic of this filter is similar to the `Sanitize` filter, but this | ||
# does not use the `sanitize` gem internally for the following reasons: | ||
# | ||
# * Each filter should do only its own responsibility, and this filter is | ||
# _not_ for sanitization. | ||
# | ||
# * The `sanitize` gem automatically adds extra transformers even if we | ||
# want to clean up only some elements, and they would be run in the | ||
# `Sanitize` filter later. | ||
# https://github.com/rgrove/sanitize/blob/v3.1.2/lib/sanitize.rb#L77-L100 | ||
class Simplify < HTML::Pipeline::Filter | ||
SIMPLE_ELEMENTS = %w(a b code em i ins q s samp span strike strong sub sup var) | ||
|
||
COMPLEX_CONTENT_ELEMENTS = %w(table) | ||
|
||
def call | ||
remove_complex_contents | ||
clean_complex_markups | ||
doc | ||
end | ||
|
||
private | ||
|
||
# Remove complex elements along with their contents entirely. | ||
def remove_complex_contents | ||
selector = COMPLEX_CONTENT_ELEMENTS.join(",") | ||
doc.search(selector).each(&:remove) | ||
end | ||
|
||
# Remove complex markups while keeping their contents. | ||
def clean_complex_markups | ||
doc.traverse do |node| | ||
next unless node.element? | ||
next if SIMPLE_ELEMENTS.include?(node.name) | ||
node.replace(node.children) | ||
end | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
module Qiita | ||
module Markdown | ||
module Filters | ||
# A filter for truncating a document without breaking the document | ||
# structure. | ||
# | ||
# You can pass `:length` and `:omission` option to :truncate context. | ||
# | ||
# @example | ||
# Truncate.new(doc, truncate: { length: 50, omission: '... (continued)' }) | ||
class Truncate < HTML::Pipeline::Filter | ||
DEFAULT_OPTIONS = { | ||
length: 100, | ||
omission: "…".freeze | ||
}.freeze | ||
|
||
def call | ||
@current_length = 0 | ||
@previous_char_was_blank = false | ||
|
||
traverse(doc) do |node| | ||
if exceeded? | ||
node.remove | ||
elsif node.text? | ||
process_text_node(node) | ||
end | ||
end | ||
|
||
doc | ||
end | ||
|
||
private | ||
|
||
# Traverse the given node recursively in the depth-first order. | ||
# Note that we cannot use Nokogiri::XML::Node#traverse | ||
# since it traverses the node's descendants _before_ the node itself. | ||
# https://github.com/sparklemotion/nokogiri/blob/v1.6.6.2/lib/nokogiri/xml/node.rb#L571-L574 | ||
def traverse(node, &block) | ||
block.call(node) | ||
|
||
node.children.each do |child_node| | ||
traverse(child_node, &block) | ||
end | ||
end | ||
|
||
def exceeded? | ||
@current_length > max_length | ||
end | ||
|
||
def process_text_node(node) | ||
node.content.each_char.with_index do |char, index| | ||
current_char_is_blank = char.strip.empty? | ||
|
||
if !@previous_char_was_blank || !current_char_is_blank | ||
@current_length += 1 | ||
end | ||
|
||
@previous_char_was_blank = current_char_is_blank | ||
|
||
if exceeded? | ||
node.content = node.content.slice(0...(index - omission.size)) + omission | ||
break | ||
end | ||
end | ||
end | ||
|
||
def max_length | ||
options[:length] | ||
end | ||
|
||
def omission | ||
options[:omission] || "".freeze | ||
end | ||
|
||
def options | ||
@options ||= DEFAULT_OPTIONS.merge(context[:truncate] || {}) | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
module Qiita | ||
module Markdown | ||
# A processor for rendering a summary of markdown document. This simplifies | ||
# a document by removing complex markups and also truncates it to a | ||
# specific length without breaking the document structure. | ||
class SummaryProcessor < Processor | ||
DEFAULT_FILTERS = [ | ||
Filters::Redcarpet, | ||
Filters::Simplify, | ||
HTML::Pipeline::EmojiFilter, | ||
Filters::Mention, | ||
Filters::Sanitize, | ||
Filters::Truncate | ||
] | ||
|
||
# @note Modify filters if you want. | ||
# @return [Array<HTML::Pipeline::Filter>] | ||
def filters | ||
@filters ||= DEFAULT_FILTERS | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.