Skip to content

Commit

Permalink
add image limit config ; add fallback mechanism. Fixes #15
Browse files Browse the repository at this point in the history
  • Loading branch information
gottfrois committed May 4, 2014
1 parent 05e6326 commit 2103691
Show file tree
Hide file tree
Showing 11 changed files with 187 additions and 90 deletions.
4 changes: 4 additions & 0 deletions lib/generators/templates/initializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,8 @@
# Regex of words considered negative to rate website description.
#
# config.negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i

# Numbers of images to fetch. Fetching too many images will be slow.
#
# config.image_limit = 5
end
5 changes: 0 additions & 5 deletions lib/link_thumbnailer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,3 @@ def scraper
gem 'link_thumbnailer'
EOC

if defined?(Rails)
# require 'link_thumbnailer/engine'
# require 'link_thumbnailer/railtie'
end
4 changes: 3 additions & 1 deletion lib/link_thumbnailer/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class Configuration

attr_accessor :redirect_limit, :blacklist_urls, :user_agent,
:verify_ssl, :http_timeout, :attributes, :graders,
:description_min_length, :positive_regex, :negative_regex
:description_min_length, :positive_regex, :negative_regex,
:image_limit

# Create a new instance.
#
Expand All @@ -50,6 +51,7 @@ def initialize
@description_min_length = 25
@positive_regex = /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
@negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i
@image_limit = 5
end

end
Expand Down
44 changes: 17 additions & 27 deletions lib/link_thumbnailer/scraper.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'delegate'
require 'active_support/core_ext/object/blank'
require 'active_support/inflector'

require 'link_thumbnailer/models/website'
Expand Down Expand Up @@ -27,42 +28,31 @@ def initialize(source, url)

def call
config.attributes.each do |name|
scraper = scraper_class(name).new(document)
scraper.call(website, name.to_s)
scrapers.each do |scraper_prefix|
scraper = scraper_class(scraper_prefix, name).new(document)
scraper.call(website, name.to_s) if scraper.applicable?

break unless website.send(name).blank?
end
end

website
end

private

def scraper_class(name)
scraper_class_name(name.to_s.camelize).constantize
rescue NameError
raise ::LinkThumbnailer::ScraperInvalid, "scraper named '#{name}' does not exists."
end

def scraper_class_name(class_name)
if opengraph?
"::LinkThumbnailer::Scrapers::Opengraph::#{class_name}"
else
"::LinkThumbnailer::Scrapers::Default::#{class_name}"
end
def scrapers
[
"::LinkThumbnailer::Scrapers::Opengraph",
"::LinkThumbnailer::Scrapers::Default"
]
end

def opengraph?
meta.any? do |node|
opengraph_node?(node)
end
end

def opengraph_node?(node)
node.attribute('name').to_s.start_with?('og:') ||
node.attribute('property').to_s.start_with?('og:')
end

def meta
document.css('meta')
def scraper_class(prefix, name)
name = name.to_s.camelize
"#{prefix}::#{name}".constantize
rescue NameError
raise ::LinkThumbnailer::ScraperInvalid, "scraper named '#{prefix}::#{name}' does not exists."
end

def parser
Expand Down
4 changes: 4 additions & 0 deletions lib/link_thumbnailer/scrapers/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def call(website, attribute_name)
website
end

def applicable?
true
end

private

def value
Expand Down
2 changes: 1 addition & 1 deletion lib/link_thumbnailer/scrapers/default/images.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Images < ::LinkThumbnailer::Scrapers::Default::Base
private

def value
abs_urls.map { |uri| modelize(uri) }
abs_urls.each_with_index.take_while { |_, i| i < 5 }.map { |e| modelize(e.first) }
end

def urls
Expand Down
13 changes: 13 additions & 0 deletions lib/link_thumbnailer/scrapers/opengraph/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ module Scrapers
module Opengraph
class Base < ::LinkThumbnailer::Scrapers::Base

def applicable?
meta.any? { |node| opengraph_node?(node) }
end

private

def value
Expand All @@ -24,6 +28,15 @@ def attribute
"og:#{attribute_name}"
end

def opengraph_node?(node)
node.attribute('name').to_s.start_with?('og:') ||
node.attribute('property').to_s.start_with?('og:')
end

def meta
document.css('meta')
end

end
end
end
Expand Down
1 change: 0 additions & 1 deletion lib/link_thumbnailer/scrapers/opengraph/image.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
require 'pry'
require 'link_thumbnailer/scrapers/opengraph/base'

module LinkThumbnailer
Expand Down
17 changes: 11 additions & 6 deletions spec/configuration_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@

let(:instance) { described_class.new }

it { expect(instance.redirect_limit).to eq(3) }
it { expect(instance.user_agent).to eq('link_thumbnailer') }
it { expect(instance.verify_ssl).to eq(true) }
it { expect(instance.http_timeout).to eq(5) }
it { expect(instance.blacklist_urls).to_not be_empty }
it { expect(instance.attributes).to_not be_empty }
it { expect(instance.redirect_limit).to eq(3) }
it { expect(instance.user_agent).to eq('link_thumbnailer') }
it { expect(instance.verify_ssl).to eq(true) }
it { expect(instance.http_timeout).to eq(5) }
it { expect(instance.blacklist_urls).to_not be_empty }
it { expect(instance.attributes).to_not be_empty }
it { expect(instance.graders).to_not be_empty }
it { expect(instance.description_min_length).to eq(25) }
it { expect(instance.positive_regex).to_not be_nil }
it { expect(instance.negative_regex).to_not be_nil }
it { expect(instance.image_limit).to eq(5) }

describe '.config' do

Expand Down
89 changes: 40 additions & 49 deletions spec/scraper_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,87 +15,78 @@

describe '#call' do

let(:scraper_class) { double('scraper_class') }
let(:scraper) { double('scraper') }
let(:attributes) { [:foo] }
let(:action) { instance.call }
let(:prefix_1) { 'prefix_1' }
let(:prefix_2) { 'prefix_2' }
let(:scraper_class) { double('scraper_class') }
let(:applicable_scraper) { double(applicable?: true) }
let(:not_applicable_scraper) { double(applicable?: false) }
let(:attributes) { [:bar] }
let(:scrapers) { [prefix_1, prefix_2] }
let(:action) { instance.call }

before do
instance.stub_chain(:config, :attributes).and_return(attributes)

expect(scraper).to receive(:call).with(website, 'foo')
expect(scraper_class).to receive(:new).with(document).and_return(scraper)
expect(instance).to receive(:scraper_class).with(:foo).and_return(scraper_class)
instance.stub(:scrapers).and_return(scrapers)
end

it { expect(action).to eq(website) }

end

describe '#scraper_class_name' do

let(:class_name) { 'Foo' }
let(:action) { instance.send(:scraper_class_name, class_name) }

context 'when opengraph' do
context 'when first scraper not applicable' do

before do
instance.stub(:opengraph?).and_return(true)
expect(website).to receive(:bar).and_return('bar')
expect(applicable_scraper).to receive(:call).with(website, 'bar')
expect(scraper_class).to receive(:new).with(document).and_return(applicable_scraper)
expect(instance).to receive(:scraper_class).with(prefix_1, :bar).and_return(scraper_class)
end

it { expect(action).to eq("::LinkThumbnailer::Scrapers::Opengraph::#{class_name}") }
it { expect(action).to eq(website) }

end

context 'by default' do
context 'when first scraper applicable' do

before do
instance.stub(:opengraph?).and_return(false)
expect(website).to receive(:bar).and_return('bar')
expect(not_applicable_scraper).to_not receive(:call).with(website, 'bar')
expect(scraper_class).to receive(:new).with(document).and_return(not_applicable_scraper)
expect(instance).to receive(:scraper_class).with(prefix_1, :bar).and_return(scraper_class)
end

it { expect(action).to eq("::LinkThumbnailer::Scrapers::Default::#{class_name}") }
it { expect(action).to eq(website) }

end

end

describe '#opengraph?' do

let(:node) { double('node') }
let(:meta) { [node, node] }
let(:action) { instance.send(:opengraph?) }
context 'when all scrapers applicable and first one return a result' do

before do
instance.stub(:meta).and_return(meta)
end

context 'when all node is an opengraph' do
let(:valid_scraper) { double('scraper', applicable?: true) }
let(:not_valid_scraper) { double('scraper', applicable?: true) }

before do
instance.stub(:opengraph_node?).and_return(true, true)
expect(website).to receive(:bar).once.and_return('bar')
expect(valid_scraper).to receive(:call).once.with(website, 'bar')
expect(not_valid_scraper).to_not receive(:call).with(website, 'bar')
expect(scraper_class).to receive(:new).with(document).once.and_return(valid_scraper)
expect(instance).to receive(:scraper_class).with(prefix_1, :bar).and_return(scraper_class)
end

it { expect(action).to be_true }
it { expect(action).to eq(website) }

end

context 'when any node is an opengraph' do

before do
instance.stub(:opengraph_node?).and_return(true, false)
end

it { expect(action).to be_true }

end
context 'when all scrapers applicable but first one do not return any result' do

context 'when no node is an opengraph' do
let(:valid_scraper) { double('scraper', applicable?: true) }
let(:not_valid_scraper) { double('scraper', applicable?: true) }

before do
instance.stub(:opengraph_node?).and_return(false, false)
expect(website).to receive(:bar).and_return('', 'bar')
expect(valid_scraper).to receive(:call).once.with(website, 'bar')
expect(not_valid_scraper).to receive(:call).once.with(website, 'bar')
expect(scraper_class).to receive(:new).with(document).and_return(not_valid_scraper, valid_scraper)
expect(instance).to receive(:scraper_class).with(prefix_1, :bar).and_return(scraper_class)
expect(instance).to receive(:scraper_class).with(prefix_2, :bar).and_return(scraper_class)
end

it { expect(action).to be_false }
it { expect(action).to eq(website) }

end

Expand Down
94 changes: 94 additions & 0 deletions spec/scrapers/opengraph/base_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
require 'spec_helper'

describe LinkThumbnailer::Scrapers::Opengraph::Base do

let(:node) { double('node') }
let(:document) { double('document') }
let(:instance) { described_class.new(document) }

describe '#applicable?' do

let(:meta) { [node, node] }
let(:action) { instance.applicable? }

before do
instance.stub(:meta).and_return(meta)
end

context 'when all node is an opengraph' do

before do
instance.stub(:opengraph_node?).and_return(true, true)
end

it { expect(action).to be_true }

end

context 'when any node is an opengraph' do

before do
instance.stub(:opengraph_node?).and_return(true, false)
end

it { expect(action).to be_true }

end

context 'when no node is an opengraph' do

before do
instance.stub(:opengraph_node?).and_return(false, false)
end

it { expect(action).to be_false }

end

end

describe '#opengraph_node?' do

let(:action) { instance.send(:opengraph_node?, node) }

before do
node.stub(:attribute).with('name').and_return(attribute_from_name)
end

context 'with attribute from name valid' do

let(:attribute_from_name) { 'og:foo' }

it { expect(action).to be_true }

end

context 'with attribute from name not valid' do

let(:attribute_from_name) { 'foo' }

before do
node.stub(:attribute).with('property').and_return(attribute_from_property)
end

context 'and attribute from property valid' do

let(:attribute_from_property) { 'og:bar' }

it { expect(action).to be_true }

end

context 'and attribute from property not valid' do

let(:attribute_from_property) { 'bar' }

it { expect(action).to be_false }

end

end

end

end

0 comments on commit 2103691

Please sign in to comment.