Skip to content

Commit

Permalink
Only block domains at the final destination (#15689)
Browse files Browse the repository at this point in the history
In an earlier PR, we decided that we only want to block a domain if 
the blocked domain in the SiteSetting is the final destination (/t/59305). That 
PR used `FinalDestination#get`. `resolve` however is used several places
 but blocks domains along the redirect chain when certain options are provided.

This commit changes the default options for `resolve` to not do that. Existing
users of `FinalDestination#resolve` are
- `Oneboxer#external_onebox`
- our onebox helper `fetch_html_doc`, which is used in amazon, standard embed 
and youtube
  - these folks already go through `Oneboxer#external_onebox` which already
  blocks correctly
  • Loading branch information
nattsw committed Jan 31, 2022
1 parent 30454b3 commit aac9f43
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 40 deletions.
2 changes: 1 addition & 1 deletion lib/final_destination.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ def initialize(url, opts = nil)
@opts[:max_redirects] ||= 5
@opts[:lookup_ip] ||= lambda { |host| FinalDestination.lookup_ip(host) }

@ignored = @opts[:ignore_hostnames] || []
@limit = @opts[:max_redirects]

@ignored = []
if @limit > 0
ignore_redirects = [Discourse.base_url_no_prefix]

Expand Down
8 changes: 1 addition & 7 deletions lib/inline_oneboxer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def self.lookup(url, opts = nil)
if uri.present? &&
uri.hostname.present? &&
(always_allow || allowed_domains.include?(uri.hostname)) &&
!domain_is_blocked?(uri.hostname)
!Onebox::DomainChecker.is_blocked?(uri.hostname)
title = RetrieveTitle.crawl(url)
title = nil if title && title.length < MIN_TITLE_LENGTH
return onebox_for(url, title, opts)
Expand All @@ -73,12 +73,6 @@ def self.lookup(url, opts = nil)

private

def self.domain_is_blocked?(hostname)
SiteSetting.blocked_onebox_domains&.split('|').any? do |blocked|
hostname == blocked || hostname.end_with?(".#{blocked}")
end
end

def self.onebox_for(url, title, opts)
title = title && Emoji.gsub_emoji_to_unicode(title)
if title && opts[:post_number]
Expand Down
11 changes: 11 additions & 0 deletions lib/onebox/domain_checker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# frozen_string_literal: true

module Onebox
class DomainChecker
def self.is_blocked?(hostname)
SiteSetting.blocked_onebox_domains&.split('|').any? do |blocked|
hostname == blocked || hostname.end_with?(".#{blocked}")
end
end
end
end
7 changes: 1 addition & 6 deletions lib/oneboxer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -379,10 +379,6 @@ def self.local_category_html(url, route)
end
end

def self.blocked_domains
SiteSetting.blocked_onebox_domains.split("|")
end

def self.preserve_fragment_url_hosts
@preserve_fragment_url_hosts ||= ['http://github.com']
end
Expand Down Expand Up @@ -420,7 +416,7 @@ def self.external_onebox(url, available_strategies = nil)
return error_box
end

return blank_onebox if uri.blank? || blocked_domains.any? { |hostname| uri.hostname.match?(hostname) }
return blank_onebox if uri.blank? || Onebox::DomainChecker.is_blocked?(uri.hostname)

onebox_options = {
max_width: 695,
Expand Down Expand Up @@ -538,7 +534,6 @@ def self.redis_oneboxer_strategy_key(hostname)
def self.get_final_destination_options(url, strategy = nil)
fd_options = {
ignore_redirects: ignore_redirects,
ignore_hostnames: blocked_domains,
force_get_hosts: force_get_hosts,
force_custom_user_agent_hosts: force_custom_user_agent_hosts,
preserve_fragment_url_hosts: preserve_fragment_url_hosts,
Expand Down
2 changes: 1 addition & 1 deletion lib/retrieve_title.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def self.fetch_title(url)
encoding = nil

fd.get do |_response, chunk, uri|
if (uri.present? && InlineOneboxer.domain_is_blocked?(uri.hostname))
if (uri.present? && Onebox::DomainChecker.is_blocked?(uri.hostname))
throw :done
end

Expand Down
82 changes: 59 additions & 23 deletions spec/components/oneboxer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -160,28 +160,54 @@ def preview(url, user = nil, category = nil, topic = nil)
end
end

it "does not crawl blocklisted URLs" do
SiteSetting.blocked_onebox_domains = "git.*.com|bitbucket.com"
url = 'https://github.com/discourse/discourse/commit/21b562852885f883be43032e03c709241e8e6d4f'
stub_request(:head, 'https://discourse.org/').to_return(status: 302, body: "", headers: { location: url })
context ".external_onebox" do
html = <<~HTML
<html>
<head>
<meta property="og:title" content="Cats">
<meta property="og:description" content="Meow">
</head>
<body>
<p>body</p>
</body>
<html>
HTML

expect(Oneboxer.external_onebox(url)[:onebox]).to be_empty
expect(Oneboxer.external_onebox('https://discourse.org/')[:onebox]).to be_empty
end
context "blacklisted domains" do

it "does not consider ignore_redirects domains as blocklisted" do
url = 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/'
stub_request(:head, url).to_return(status: 200, body: "", headers: {})
stub_request(:get, url).to_return(status: 200, body: "", headers: {})
it "does not return a onebox if redirect uri final destination is in blacklist" do
SiteSetting.blocked_onebox_domains = "kitten.com"

expect(Oneboxer.external_onebox(url)[:onebox]).to be_present
end
stub_request(:get, "http://cat.com/meow").to_return(status: 301, body: "", headers: { "location" => "https://kitten.com" })
stub_request(:head, "http://cat.com/meow").to_return(status: 301, body: "", headers: { "location" => "https://kitten.com" })

stub_request(:get, "https://kitten.com").to_return(status: 200, body: html, headers: {})
stub_request(:head, "https://kitten.com").to_return(status: 200, body: "", headers: {})

expect(Oneboxer.external_onebox("http://cat.com/meow")[:onebox]).to be_empty
expect(Oneboxer.external_onebox("https://kitten.com")[:onebox]).to be_empty
end

it "returns onebox if 'midway redirect' is blocked but final redirect uri is not blocked" do
SiteSetting.blocked_onebox_domains = "middle.com"

it "censors external oneboxes" do
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "bad word")
stub_request(:get, "https://cat.com/start").to_return(status: 301, body: "a", headers: { "location" => "https://middle.com/midway" })
stub_request(:head, "https://cat.com/start").to_return(status: 301, body: "a", headers: { "location" => "https://middle.com/midway" })

url = 'https://example.com/'
stub_request(:any, url).to_return(status: 200, body: <<~HTML, headers: {})
stub_request(:head, "https://middle.com/midway").to_return(status: 301, body: "b", headers: { "location" => "https://cat.com/end" })

stub_request(:get, "https://cat.com/end").to_return(status: 200, body: html)
stub_request(:head, "https://cat.com/end").to_return(status: 200, body: "", headers: {})

expect(Oneboxer.external_onebox("https://cat.com/start")[:onebox]).to be_present
end
end

it "censors external oneboxes" do
Fabricate(:watched_word, action: WatchedWord.actions[:censor], word: "bad word")

url = 'https://example.com/'
stub_request(:any, url).to_return(status: 200, body: <<~HTML, headers: {})
<html>
<head>
<meta property="og:title" content="title with bad word">
Expand All @@ -191,13 +217,23 @@ def preview(url, user = nil, category = nil, topic = nil)
<p>content with bad word</p>
</body>
<html>
HTML
HTML

onebox = Oneboxer.external_onebox(url)
expect(onebox[:onebox]).to include('title with')
expect(onebox[:onebox]).not_to include('bad word')
expect(onebox[:preview]).to include('title with')
expect(onebox[:preview]).not_to include('bad word')
end

onebox = Oneboxer.external_onebox(url)
expect(onebox[:onebox]).to include('title with')
expect(onebox[:onebox]).not_to include('bad word')
expect(onebox[:preview]).to include('title with')
expect(onebox[:preview]).not_to include('bad word')
it "returns onebox" do
SiteSetting.blocked_onebox_domains = "not.me"

stub_request(:get, "https://its.me").to_return(status: 200, body: html)
stub_request(:head, "https://its.me").to_return(status: 200, body: "", headers: {})

expect(Oneboxer.external_onebox("https://its.me")[:onebox]).to be_present
end
end

it "uses the Onebox custom user agent on specified hosts" do
Expand Down
2 changes: 0 additions & 2 deletions spec/components/retrieve_title_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@
stub_request(:get, "https://wikipedia.com/amazing")
.to_return(status: 200, body: "<html><title>very amazing</title>", headers: {})

IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq(nil)
end

Expand All @@ -126,7 +125,6 @@
stub_request(:get, "https://cat.com/meow")
.to_return(status: 200, body: "<html><title>very amazing</title>", headers: {})

IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("http://foobar.com/amazing")).to eq("very amazing")
end
end
Expand Down
27 changes: 27 additions & 0 deletions spec/lib/onebox/domain_checker_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# frozen_string_literal: true

require "rails_helper"

describe Onebox::DomainChecker do
describe '.is_blocked?' do
before do
SiteSetting.blocked_onebox_domains = "api.cat.org|kitten.cloud"
end

describe "returns true when entirely matched" do
it { expect(described_class.is_blocked?("api.cat.org")).to be(true) }
it { expect(described_class.is_blocked?("kitten.cloud")).to be(true) }
it { expect(described_class.is_blocked?("api.dog.org")).to be(false) }
it { expect(described_class.is_blocked?("puppy.cloud")).to be(false) }
end

describe "returns true when ends with .<domain>" do
it { expect(described_class.is_blocked?("dev.api.cat.org")).to be(true) }
it { expect(described_class.is_blocked?(".api.cat.org")).to be(true) }
it { expect(described_class.is_blocked?("dev.kitten.cloud")).to be(true) }
it { expect(described_class.is_blocked?(".kitten.cloud")).to be(true) }
it { expect(described_class.is_blocked?("xapi.cat.org")).to be(false) }
it { expect(described_class.is_blocked?("xkitten.cloud")).to be(false) }
end
end
end

1 comment on commit aac9f43

@discoursebot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on Discourse Meta. There might be relevant details there:

https://meta.discourse.org/t/users-not-able-to-login-in-when-invitation-has-expired/216814/8

Please sign in to comment.