Skip to content

Commit

Permalink
nokogiri: switch to the nokogumbo-based html5 parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
evazion committed Aug 31, 2021
1 parent 49d18e6 commit 38c9559
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 7 deletions.
10 changes: 5 additions & 5 deletions app/logical/d_text.rb
Expand Up @@ -49,7 +49,7 @@ def self.preprocess(dtext_messages)
# @param artists [Array<Artist>]
# @return [String] the HTML output
def self.postprocess(html, wiki_pages, tags, artists)
fragment = Nokogiri::HTML.fragment(html)
fragment = Nokogiri::HTML5.fragment(html)

fragment.css("a.dtext-wiki-link").each do |node|
path = Addressable::URI.parse(node["href"]).path
Expand Down Expand Up @@ -174,7 +174,7 @@ def self.parse_mentions(text)
# @return [Array<String>] the list of wiki page names
def self.parse_wiki_titles(text)
html = DTextRagel.parse(text)
fragment = Nokogiri::HTML.fragment(html)
fragment = Nokogiri::HTML5.fragment(html)

titles = fragment.css("a.dtext-wiki-link").map do |node|
title = node["href"][%r{\A/wiki_pages/(.*)\z}i, 1]
Expand All @@ -191,7 +191,7 @@ def self.parse_wiki_titles(text)
# @return [Array<String>] the list of external URLs
def self.parse_external_links(text)
html = DTextRagel.parse(text)
fragment = Nokogiri::HTML.fragment(html)
fragment = Nokogiri::HTML5.fragment(html)

links = fragment.css("a.dtext-external-link").map { |node| node["href"] }
links.uniq
Expand Down Expand Up @@ -326,7 +326,7 @@ def self.to_markdown(dtext)
# @param html [String] the HTML input
# @return [String] the Markdown output
def self.html_to_markdown(html)
html = Nokogiri::HTML.fragment(html)
html = Nokogiri::HTML5.fragment(html)

html.children.map do |node|
case node.name
Expand All @@ -349,7 +349,7 @@ def self.html_to_markdown(html)
# @param inline [Boolean] if true, convert <img> tags to plaintext
# @return [String] the DText output
def self.from_html(text, inline: false, &block)
html = Nokogiri::HTML.fragment(text)
html = Nokogiri::HTML5.fragment(text)

dtext = html.children.map do |element|
block.call(element) if block.present?
Expand Down
2 changes: 1 addition & 1 deletion app/logical/sources/strategies/deviant_art.rb
Expand Up @@ -269,7 +269,7 @@ def page
resp = http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1})

if resp.status.success?
Nokogiri::HTML(resp.body.to_s)
resp.parse
# the work was deleted
elsif resp.code == 404
nil
Expand Down
2 changes: 1 addition & 1 deletion app/logical/sources/strategies/tumblr.rb
Expand Up @@ -183,7 +183,7 @@ def find_largest(url, sizes: SIZES)
end

def inline_images
html = Nokogiri::HTML.fragment(artist_commentary_desc)
html = Nokogiri::HTML5.fragment(artist_commentary_desc)
html.css("img").map { |node| node["src"] }
end

Expand Down

0 comments on commit 38c9559

Please sign in to comment.