Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Cannot retrieve contributors at this time

25 lines (20 sloc) 0.515 kb
require 'rubygems'
require 'open-uri'
require 'hpricot'
# Grab the first 2000 stories from twssstories.com (10 per page)
f = File.open(File.expand_path("../../data/twss.txt", __FILE__), "w")
domain = "http://twssstories.com"
200.times do |i|
url = domain + "/node?page=#{i}"
puts url
doc = Hpricot(open(url).read)
doc.search('div.content p') do |story|
# now pull out the good stuff...
if story.to_plain_text =~ /\"(.*)?\"/
f.puts $1
end
end
f.flush
sleep rand * 3.0
end
f.close
Jump to Line
Something went wrong with that request. Please try again.