Skip to content

Commit

Permalink
Port from my gist + bundleify
Browse files Browse the repository at this point in the history
  • Loading branch information
jamiew committed Dec 6, 2011
0 parents commit a5cd8c3
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
@@ -0,0 +1,3 @@
.bundle
.DS_Store
.gems
3 changes: 3 additions & 0 deletions Gemfile
@@ -0,0 +1,3 @@
source :rubygems

gem 'mechanize'
19 changes: 19 additions & 0 deletions Gemfile.lock
@@ -0,0 +1,19 @@
GEM
remote: http://rubygems.org/
specs:
mechanize (2.0.1)
net-http-digest_auth (~> 1.1, >= 1.1.1)
net-http-persistent (~> 1.8)
nokogiri (~> 1.4)
webrobots (~> 0.0, >= 0.0.9)
net-http-digest_auth (1.2)
net-http-persistent (1.9)
nokogiri (1.5.0)
webrobots (0.0.12)
nokogiri (>= 1.4.4)

PLATFORMS
ruby

DEPENDENCIES
mechanize
51 changes: 51 additions & 0 deletions tumblr-photo-downloader.rb
@@ -0,0 +1,51 @@
require 'rubygems'
require 'bundler'
Bundler.require


# Your Tumblr subdomain, e.g. "jamiew" for "jamiew.tumblr.com"
site = "doctorwho"


FileUtils.mkdir_p(site)

concurrency = 8
num = 50
start = 0

loop do
puts "start=#{start}"

url = "http://#{site}.tumblr.com/api/read?type=photo&num=#{num}&start=#{start}"
page = Mechanize.new.get(url)
doc = Nokogiri::XML.parse(page.body)

images = (doc/'post photo-url').select{|x| x if x['max-width'].to_i == 1280 }
image_urls = images.map {|x| x.content }

image_urls.each_slice(concurrency).each do |group|
threads = []
group.each do |url|
threads << Thread.new {
puts "Saving photo #{url}"
begin
file = Mechanize.new.get(url)
filename = File.basename(file.uri.to_s.split('?')[0])
file.save_as("#{site}/#{filename}")
rescue Mechanize::ResponseCodeError
puts "Error getting file, #{$!}"
end
}
end
threads.each{|t| t.join }
end

puts "#{images.count} images found (num=#{num})"
if images.count < num
puts "our work here is done"
break
else
start += num
end

end

0 comments on commit a5cd8c3

Please sign in to comment.