Permalink
Browse files

Fixed reg expression to account for malformed wikimedia image links.

  • Loading branch information...
aantix committed Dec 27, 2011
1 parent 0c27739 commit c59e9cd7fef62bab87adeb64dc9b6b04d13b28e8
Showing with 82 additions and 47 deletions.
  1. +1 −2 .bundle/config
  2. +2 −0 Gemfile
  3. +72 −41 Gemfile.lock
  4. +1 −1 Rakefile
  5. +6 −3 lib/sex_it_up.rb
View
@@ -1,2 +1 @@
----
-BUNDLE_DISABLE_SHARED_GEMS: "1"
+--- {}
View
@@ -1,3 +1,5 @@
+source :rubygems
+
gem "activerecord", ">= 3.0.1"
gem "actionpack", ">= 3.0.1"
gem "paperclip", ">= 2.3.5"
View
@@ -1,53 +1,72 @@
GEM
+ remote: http://rubygems.org/
specs:
- abstract (1.0.0)
- actionpack (3.0.3)
- activemodel (= 3.0.3)
- activesupport (= 3.0.3)
- builder (~> 2.1.2)
- erubis (~> 2.6.6)
- i18n (~> 0.4)
- rack (~> 1.2.1)
- rack-mount (~> 0.6.13)
- rack-test (~> 0.5.6)
- tzinfo (~> 0.3.23)
- activemodel (3.0.3)
- activesupport (= 3.0.3)
- builder (~> 2.1.2)
- i18n (~> 0.4)
- activerecord (3.0.3)
- activemodel (= 3.0.3)
- activesupport (= 3.0.3)
- arel (~> 2.0.2)
- tzinfo (~> 0.3.23)
- activesupport (3.0.3)
- arel (2.0.3)
- builder (2.1.2)
- diff-lcs (1.1.2)
- erubis (2.6.6)
- abstract (>= 1.0.0)
+ actionpack (3.1.3)
+ activemodel (= 3.1.3)
+ activesupport (= 3.1.3)
+ builder (~> 3.0.0)
+ erubis (~> 2.7.0)
+ i18n (~> 0.6)
+ rack (~> 1.3.5)
+ rack-cache (~> 1.1)
+ rack-mount (~> 0.8.2)
+ rack-test (~> 0.6.1)
+ sprockets (~> 2.0.3)
+ activemodel (3.1.3)
+ activesupport (= 3.1.3)
+ builder (~> 3.0.0)
+ i18n (~> 0.6)
+ activerecord (3.1.3)
+ activemodel (= 3.1.3)
+ activesupport (= 3.1.3)
+ arel (~> 2.2.1)
+ tzinfo (~> 0.3.29)
+ activesupport (3.1.3)
+ multi_json (~> 1.0)
+ arel (2.2.1)
+ builder (3.0.0)
+ cocaine (0.2.1)
+ diff-lcs (1.1.3)
+ domain_name (0.5.1)
+ unf (~> 0.0.3)
+ erubis (2.7.0)
factory_girl (1.3.2)
git (1.2.5)
google-search (1.0.2)
json
- i18n (0.4.1)
- jeweler (1.5.1)
+ hike (1.2.1)
+ i18n (0.6.0)
+ jeweler (1.5.2)
bundler (~> 1.0.0)
git (>= 1.2.5)
rake
- json (1.4.6)
- mechanize (1.0.0)
- nokogiri (>= 1.2.1)
- nokogiri (1.4.3.1)
- paperclip (2.3.5)
- activerecord
- activesupport
- rack (1.2.1)
- rack-mount (0.6.13)
+ json (1.6.4)
+ mechanize (2.1)
+ domain_name (~> 0.5, >= 0.5.1)
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
+ net-http-persistent (~> 2.3, >= 2.3.2)
+ nokogiri (~> 1.4)
+ ntlm-http (~> 0.1, >= 0.1.1)
+ webrobots (~> 0.0, >= 0.0.9)
+ mime-types (1.17.2)
+ multi_json (1.0.4)
+ net-http-digest_auth (1.2)
+ net-http-persistent (2.3.3)
+ nokogiri (1.5.0)
+ ntlm-http (0.1.1)
+ paperclip (2.4.5)
+ activerecord (>= 2.3.0)
+ activesupport (>= 2.3.2)
+ cocaine (>= 0.0.2)
+ mime-types
+ rack (1.3.5)
+ rack-cache (1.1)
+ rack (>= 0.4)
+ rack-mount (0.8.3)
rack (>= 1.0.0)
- rack-test (0.5.6)
+ rack-test (0.6.1)
rack (>= 1.0)
- rake (0.8.7)
+ rake (0.9.2.2)
rspec (2.1.0)
rspec-core (~> 2.1.0)
rspec-expectations (~> 2.1.0)
@@ -58,8 +77,20 @@ GEM
rspec-mocks (2.1.0)
rspec-rails (2.1.0)
rspec (~> 2.1.0)
- sqlite3-ruby (1.3.1)
- tzinfo (0.3.23)
+ sprockets (2.0.3)
+ hike (~> 1.2)
+ rack (~> 1.0)
+ tilt (~> 1.1, != 1.3.0)
+ sqlite3 (1.3.5)
+ sqlite3-ruby (1.3.3)
+ sqlite3 (>= 1.3.3)
+ tilt (1.3.3)
+ tzinfo (0.3.31)
+ unf (0.0.4)
+ unf_ext
+ unf_ext (0.0.4)
+ webrobots (0.0.12)
+ nokogiri (>= 1.4.4)
PLATFORMS
ruby
View
@@ -18,7 +18,7 @@ Jeweler::Tasks.new do |gem|
gem.homepage = "http://github.com/aantix/sex_it_up"
gem.authors = ["Jim Jones"]
gem.require_path = 'lib'
- gem.version = "1.0.0"
+ gem.version = "1.1.0"
gem.add_development_dependency "paperclip", ">= 2.3.5"
gem.add_development_dependency "mechanize", ">= 1.0.0"
gem.add_development_dependency "google-search", ">= 1.0.2"
View
@@ -49,7 +49,7 @@ def self.cache_search(query, term)
Google::Search::Web.new(:query => query).each do |result|
page = agent.get(result.uri)
image = find_image_link(page)
-
+
cache(term, image.href) unless image.nil?
num_results+=1
@@ -59,7 +59,7 @@ def self.cache_search(query, term)
end
def self.find_image_link(page)
- page.links.detect {|link| link.href =~ /http:\/\/upload.wikimedia.org\/wikipedia\/commons\/\w\// }
+ page.links.detect {|link| link.href =~ /upload.wikimedia.org\/wikipedia\/commons\/\w\/(.)+(.jpg|.png|.jpeg)$/i}
end
def self.agent
@@ -70,11 +70,14 @@ def self.agent
end
def self.cache(search_term, img_url)
+ # Some urls are only prefixed with a double slash, excluding the protocol
+ # This causes an exception with the URI parse
+ img_url.gsub!(/^\/+/,'http://')
+
# No need to re-retrieve file if already done so
image = find_by_image_original_url(img_url)
return image unless image.nil?
- #image = open img_url
image = open(URI.parse(img_url))
# The original_filename passed in from a file open is unintelligible;

0 comments on commit c59e9cd

Please sign in to comment.