From 70aa8d5961b6e742c424d25fed6d4615a0d7e42b Mon Sep 17 00:00:00 2001 From: Arpit Jalan Date: Thu, 3 Aug 2017 12:45:03 +0530 Subject: [PATCH] FIX: do not use canonical link for Discourse topic/post https://meta.discourse.org/t/incorrect-or-failing-oneboxes-for-links-to-other-discourse-instances/67351/7?u=techapj --- lib/onebox/engine/standard_embed.rb | 15 +- lib/onebox/version.rb | 2 +- spec/fixtures/discourse_topic.response | 344 ++++++++++++++++++ spec/fixtures/discourse_topic_reply.response | 344 ++++++++++++++++++ .../engine/whitelisted_generic_onebox_spec.rb | 43 ++- 5 files changed, 730 insertions(+), 18 deletions(-) create mode 100644 spec/fixtures/discourse_topic.response create mode 100644 spec/fixtures/discourse_topic_reply.response diff --git a/lib/onebox/engine/standard_embed.rb b/lib/onebox/engine/standard_embed.rb index 51bdc429..8ada8b7a 100644 --- a/lib/onebox/engine/standard_embed.rb +++ b/lib/onebox/engine/standard_embed.rb @@ -58,11 +58,13 @@ def html_doc response = (Onebox::Helpers.fetch_response(url, nil, nil, headers) rescue nil) doc = Nokogiri::HTML(response) - # prefer canonical link - canonical_link = doc.at('//link[@rel="canonical"]/@href') - if canonical_link && "#{URI(canonical_link).host}#{URI(canonical_link).path}" != "#{URI(url).host}#{URI(url).path}" - response = (Onebox::Helpers.fetch_response(canonical_link, nil, nil, headers) rescue nil) - doc = Nokogiri::HTML(response) if response + unless skip_canonical_link + # prefer canonical link + canonical_link = doc.at('//link[@rel="canonical"]/@href') + if canonical_link && "#{URI(canonical_link).host}#{URI(canonical_link).path}" != "#{URI(url).host}#{URI(url).path}" + response = (Onebox::Helpers.fetch_response(canonical_link, nil, nil, headers) rescue nil) + doc = Nokogiri::HTML(response) if response + end end @html_doc = doc @@ -123,6 +125,9 @@ def get_twitter twitter end + def skip_canonical_link + WhitelistedGenericOnebox.probable_discourse(URI(url)) + end end end end diff --git a/lib/onebox/version.rb b/lib/onebox/version.rb index 16161192..7ba2b786 100644 --- a/lib/onebox/version.rb +++ b/lib/onebox/version.rb @@ -1,3 +1,3 @@ module Onebox - VERSION = "1.8.16" + VERSION = "1.8.17" end diff --git a/spec/fixtures/discourse_topic.response b/spec/fixtures/discourse_topic.response new file mode 100644 index 00000000..cae4899e --- /dev/null +++ b/spec/fixtures/discourse_topic.response @@ -0,0 +1,344 @@ + + + + + + Congratulations, most stars in 2013 GitHub Octoverse! - praise - Discourse Meta + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/fixtures/discourse_topic_reply.response b/spec/fixtures/discourse_topic_reply.response new file mode 100644 index 00000000..69d227ff --- /dev/null +++ b/spec/fixtures/discourse_topic_reply.response @@ -0,0 +1,344 @@ + + + + + + Congratulations, most stars in 2013 GitHub Octoverse! - praise - Discourse Meta + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/lib/onebox/engine/whitelisted_generic_onebox_spec.rb b/spec/lib/onebox/engine/whitelisted_generic_onebox_spec.rb index 12c63240..65043874 100644 --- a/spec/lib/onebox/engine/whitelisted_generic_onebox_spec.rb +++ b/spec/lib/onebox/engine/whitelisted_generic_onebox_spec.rb @@ -97,20 +97,39 @@ def generic_html end end - describe "uses canonical link" do - let(:mobile_url) { "http://m.imdb.com/title/tt0944947" } - let(:canonical_url) { "http://www.imdb.com/title/tt0944947/" } - before do - fake(mobile_url, response('imdb_mobile')) - fake(canonical_url, response('imdb')) + describe 'canonical link' do + context 'uses canonical link if available' do + let(:mobile_url) { "http://m.imdb.com/title/tt0944947" } + let(:canonical_url) { "http://www.imdb.com/title/tt0944947/" } + before do + fake(mobile_url, response('imdb_mobile')) + fake(canonical_url, response('imdb')) + end + + it 'fetches opengraph data from canonical link' do + onebox = described_class.new(mobile_url) + expect(onebox.to_html).not_to be_nil + expect(onebox.to_html).to include("Game of Thrones") + expect(onebox.to_html).to include("Nine noble families fight for control over the mythical lands of Westeros") + expect(onebox.to_html).to include("https://images-na.ssl-images-amazon.com/images/M/MV5BMjE3NTQ1NDg1Ml5BMl5BanBnXkFtZTgwNzY2NDA0MjI@._V1_UY1200_CR90,0,630,1200_AL_.jpg") + end end - it 'fetches opengraph data from canonical link' do - onebox = described_class.new(mobile_url) - expect(onebox.to_html).not_to be_nil - expect(onebox.to_html).to include("Game of Thrones") - expect(onebox.to_html).to include("Nine noble families fight for control over the mythical lands of Westeros") - expect(onebox.to_html).to include("https://images-na.ssl-images-amazon.com/images/M/MV5BMjE3NTQ1NDg1Ml5BMl5BanBnXkFtZTgwNzY2NDA0MjI@._V1_UY1200_CR90,0,630,1200_AL_.jpg") + context 'does not use canonical link for Discourse topics' do + let(:discourse_topic_url) { "https://meta.discourse.org/t/congratulations-most-stars-in-2013-github-octoverse/12483" } + let(:discourse_topic_reply_url) { "https://meta.discourse.org/t/congratulations-most-stars-in-2013-github-octoverse/12483/2" } + before do + fake(discourse_topic_url, response('discourse_topic')) + fake(discourse_topic_reply_url, response('discourse_topic_reply')) + end + + it 'fetches opengraph data from original link' do + onebox = described_class.new(discourse_topic_reply_url) + expect(onebox.to_html).not_to be_nil + expect(onebox.to_html).to include("Congratulations, most stars in 2013 GitHub Octoverse!") + expect(onebox.to_html).to include("Thanks for that link and thank you -- and everyone else who is contributing to the project!") + expect(onebox.to_html).to include("https://cdn-enterprise.discourse.org/meta/user_avatar/meta.discourse.org/codinghorror/200/5297_1.png") + end end end