Skip to content

Commit

Permalink
FIX: wasn't extracting links to quoted posts
Browse files Browse the repository at this point in the history
  • Loading branch information
ZogStriP committed Feb 6, 2017
1 parent ceee2a5 commit ba11548
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 44 deletions.
4 changes: 2 additions & 2 deletions Gemfile.lock
Expand Up @@ -206,7 +206,7 @@ GEM
omniauth-twitter (1.3.0)
omniauth-oauth (~> 1.1)
rack
onebox (1.7.8)
onebox (1.7.9)
fast_blank (>= 1.0.0)
htmlentities (~> 4.3.4)
moneta (~> 0.8)
Expand Down Expand Up @@ -476,4 +476,4 @@ DEPENDENCIES
unicorn

BUNDLED WITH
1.14.2
1.14.3
9 changes: 9 additions & 0 deletions lib/onebox/discourse_onebox_sanitize_config.rb
@@ -0,0 +1,9 @@
class Sanitize
module Config

DISCOURSE_ONEBOX ||= freeze_config merge(ONEBOX,
attributes: merge(ONEBOX[:attributes], 'aside' => [:data])
)

end
end
2 changes: 1 addition & 1 deletion lib/onebox/engine/discourse_local_onebox.rb
Expand Up @@ -69,7 +69,7 @@ def topic_html(route)
first_post = topic.ordered_posts.first

args = {
topic: topic.id,
topic_id: topic.id,
avatar: PrettyText.avatar_img(topic.user.avatar_template, "tiny"),
original_url: @url,
title: PrettyText.unescape_emoji(CGI::escapeHTML(topic.title)),
Expand Down
2 changes: 1 addition & 1 deletion lib/onebox/templates/discourse_topic_onebox.hbs
@@ -1,4 +1,4 @@
<aside class='quote topic-onebox' data-post="1" data-topic="{{topic}}">
<aside class='quote' data-post="1" data-topic="{{topic_id}}">
<div class='title'>
{{{avatar}}}
<a href="{{original_url}}">{{{title}}}</a> {{{category_html}}}
Expand Down
5 changes: 3 additions & 2 deletions lib/oneboxer.rb
@@ -1,3 +1,4 @@
require_dependency "./lib/onebox/discourse_onebox_sanitize_config"
Dir["#{Rails.root}/lib/onebox/engine/*_onebox.rb"].sort.each { |f| require f }

module Oneboxer
Expand Down Expand Up @@ -142,8 +143,8 @@ def self.onebox_raw(url)
Rails.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
uri = URI(url) rescue nil
return blank_onebox if uri.blank? || SiteSetting.onebox_domains_blacklist.include?(uri.hostname)

r = Onebox.preview(url, cache: {}, max_width: 695)
options = { cache: {}, max_width: 695, sanitize_config: Sanitize::Config::DISCOURSE_ONEBOX }
r = Onebox.preview(url, options)
{ onebox: r.to_s, preview: r.try(:placeholder_html).to_s }
end
rescue => e
Expand Down
46 changes: 19 additions & 27 deletions lib/pretty_text.rb
Expand Up @@ -270,44 +270,36 @@ def self.add_rel_nofollow_to_user_content(doc)
end
end

class DetectedLink
attr_accessor :is_quote, :url

def initialize(url, is_quote=false)
@url = url
@is_quote = is_quote
end
end

class DetectedLink < Struct.new(:url, :is_quote); end

def self.extract_links(html)
links = []
doc = Nokogiri::HTML.fragment(html)

# remove href inside quotes & elided part
doc.css("aside.quote:not(.topic-onebox) a, .elided a").each { |l| l["href"] = "" }
doc.css("aside.quote a, .elided a").each { |a| a["href"] = "" }

# extract all links from the post
doc.css("a").each { |l|
unless l["href"].blank? || "#".freeze == l["href"][0]
links << DetectedLink.new(l["href"])
# extract all links
doc.css("a").each do |a|
if a["href"].present? && a["href"][0] != "#".freeze
links << DetectedLink.new(a["href"], false)
end
}

# extract links to quotes
doc.css("aside.quote[data-topic]").each do |a|
topic_id = a['data-topic']
end

url = "/t/topic/#{topic_id}"
if post_number = a['data-post']
url << "/#{post_number}"
# extract quotes
doc.css("aside.quote[data-topic]").each do |aside|
if aside["data-topic"].present?
url = "/t/topic/#{aside["data-topic"]}"
url << "/#{aside["data-post"]}" if aside["data-post"].present?
links << DetectedLink.new(url, true)
end

links << DetectedLink.new(url, true)
end

# Extract Youtube links
doc.css("div[data-youtube-id]").each do |d|
links << DetectedLink.new("https://www.youtube.com/watch?v=#{d['data-youtube-id']}", false)
# extract Youtube links
doc.css("div[data-youtube-id]").each do |div|
if div["data-youtube-id"].present?
links << DetectedLink.new("https://www.youtube.com/watch?v=#{div['data-youtube-id']}", false)
end
end

links
Expand Down
11 changes: 5 additions & 6 deletions spec/components/pretty_text_spec.rb
Expand Up @@ -225,12 +225,11 @@ def extract_urls(text)
<a href='http://body_and_quote.com'>http://useless2.com</a>
")

expect(links.map{|l| [l.url, l.is_quote]}.to_a.sort).to eq(
[["http://body_only.com",false],
["http://body_and_quote.com", false],
["/t/topic/1234",true]
].sort
)
expect(links.map { |l| [l.url, l.is_quote] }.sort).to eq([
["http://body_only.com", false],
["http://body_and_quote.com", false],
["/t/topic/1234", true],
].sort)
end

it "should not preserve tags in code blocks" do
Expand Down
20 changes: 15 additions & 5 deletions spec/jobs/process_post_spec.rb
Expand Up @@ -9,9 +9,7 @@

context 'with a post' do

let(:post) do
Fabricate(:post)
end
let(:post) { Fabricate(:post) }

it 'does not erase posts when CookedPostProcessor malfunctions' do
# Look kids, an actual reason why you want to use mocks
Expand All @@ -35,7 +33,6 @@
end

it 'processes posts' do

post = Fabricate(:post, raw: "<img src='#{Discourse.base_url_no_prefix}/awesome/picture.png'>")
expect(post.cooked).to match(/http/)

Expand All @@ -51,7 +48,20 @@
expect { Jobs::ProcessPost.new.execute(post_id: post.id) }.to change { TopicLink.count }.by(1)
end

end
it "extracts links to quoted posts" do
quoted_post = Fabricate(:post, raw: "This is a post with a link to https://www.discourse.org", post_number: 42)
post.update_columns(raw: "This quote is the best\n\n[quote=\"#{quoted_post.user.username}, topic:#{quoted_post.topic_id}, post:#{quoted_post.post_number}\"]#{quoted_post.excerpt}[/quote]")
# when creating a quote, we also create the reflexion link
expect { Jobs::ProcessPost.new.execute(post_id: post.id) }.to change { TopicLink.count }.by(2)
end

it "extracts links to oneboxed topics" do
oneboxed_post = Fabricate(:post)
post.update_columns(raw: "This post is the best\n\n#{oneboxed_post.full_url}")
# when creating a quote, we also create the reflexion link
expect { Jobs::ProcessPost.new.execute(post_id: post.id) }.to change { TopicLink.count }.by(2)
end

end

end

0 comments on commit ba11548

Please sign in to comment.