Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
chrislo committed Sep 3, 2012
0 parents commit b67373a
Show file tree
Hide file tree
Showing 10 changed files with 1,397 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
@@ -0,0 +1,4 @@
rdoc/*
pkg/*
/Gemfile.lock
/.rbenv-version
7 changes: 7 additions & 0 deletions AUTHORS
@@ -0,0 +1,7 @@
AUTHORS

British Broadcasting Corporation
--------------------------------

- Chris Lowis <chris.lowis at bbc.co.uk>

556 changes: 556 additions & 0 deletions COPYING

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Gemfile
@@ -0,0 +1,3 @@
source :rubygems

gemspec
27 changes: 27 additions & 0 deletions README.rdoc
@@ -0,0 +1,27 @@
= On This Day

A simple ruby parser for Wikipedia's "On this day" box on the
(english-language) homepage.

== Usage

Install the gem

gem install onthisday

Then fetch the current news items

@onthisday = OnThisDay::Parser.new
items = @onthisday.items

Items have text

items.first.text #=> "French Revolution: Meeting on a tennis court near the Palace of Versailles, members of France's Third Estate took the Tennis Court Oath, pledging not to separate until a new constitution was established."

A year

items.first.year #=> 1789

And associated topics (other related wikipedia pages)

items.first.topics #=> ['French_Revolution', 'Palace_of_Versailles', 'Estates_of_the_realm', 'Tennis_Court_Oath', 'Constitution']
79 changes: 79 additions & 0 deletions Rakefile
@@ -0,0 +1,79 @@
require "rubygems"
require "rubygems/package_task"
require "rdoc/task"

require "rake/testtask"
Rake::TestTask.new do |t|
t.libs << "test"
t.test_files = FileList["test/**/*_test.rb"]
t.verbose = true
end

task :default => ["test"]

# This builds the actual gem. For details of what all these options
# mean, and other ones you can add, check the documentation here:
#
# http://rubygems.org/read/chapter/20
#
spec = Gem::Specification.new do |s|

# Change these as appropriate
s.name = "onthisday"
s.version = "0.1.0"
s.summary = "A little parser for Wikipedia's 'On This Day' content block"
s.author = "Chris Lowis"
s.email = "chris.lowis@gmail.com"
s.homepage = "http://github.com/bbcrd/onthisday"

s.has_rdoc = true

# Add any extra files to include in the gem (like your README)
s.files = %w(Gemfile README.rdoc) + Dir.glob("{test,lib}/**/*")
s.require_paths = ["lib"]

# If you want to depend on other gems, add them here, along with any
# relevant versions
s.add_dependency("nokogiri")
s.add_dependency("rest-client")

# If your tests use any gems, include them here
s.add_development_dependency("webmock")
end

# This task actually builds the gem. We also regenerate a static
# .gemspec file, which is useful if something (i.e. GitHub) will
# be automatically building a gem for this project. If you're not
# using GitHub, edit as appropriate.
#
# To publish your gem online, install the 'gemcutter' gem; Read more
# about that here: http://gemcutter.org/pages/gem_docs
Gem::PackageTask.new(spec) do |pkg|
pkg.gem_spec = spec
end

desc "Build the gemspec file #{spec.name}.gemspec"
task :gemspec do
file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
File.open(file, "w") {|f| f << spec.to_ruby }
end

# If you don't want to generate the .gemspec file, just remove this line. Reasons
# why you might want to generate a gemspec:
# - using bundler with a git source
# - building the gem without rake (i.e. gem build blah.gemspec)
# - maybe others?
task :package => :gemspec

# Generate documentation
RDoc::Task.new do |rd|
rd.main = "README.rdoc"
rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
rd.rdoc_files.include("lib/**/*.rb")
rd.rdoc_dir = "rdoc"
end

desc 'Clear out RDoc and generated packages'
task :clean => [:clobber_rdoc, :clobber_package] do
rm "#{spec.name}.gemspec"
end
79 changes: 79 additions & 0 deletions lib/onthisday.rb
@@ -0,0 +1,79 @@
# encoding: utf-8

require 'rest_client'
require 'nokogiri'

module OnThisDay
class Item
def initialize(element)
@element = element
@year = nil
remove_noprint_elements!
set_and_remove_year!
end

# Remove any child nodes with class "nopront". This removes the
# boilerplate Wikinews, Obituries etc.
def remove_noprint_elements!
@element.xpath('//*[starts-with(@class,"noprint")]').each do |node|
node.children.remove
end
end

def year
@year.to_i
end

def set_and_remove_year!
@element.xpath('./a').each do |node|
title = node['title']

# if the title of the link looks like a year, e.g. "1879", set
# the year of this item and remove the node
if title.match /\A\d{4,4}\z/
@year = title
node.remove
end
end
end

def text
@element.inner_text.gsub(' – ','')
end

def html
@element.inner_html.gsub(' – ','')
end

# Rescursively search for all a elements in this element and
# return their value (removing /wiki/)
def topics
@element.xpath('.//a').map do |a|
a.attr('href').gsub('/wiki/','')
end
end
end

class Parser
def initialize
end

def items
elements = doc.xpath("//div[@id='mp-otd']/ul/li")
elements.map {|e| Item.new(e)}
end

def doc
Nokogiri::HTML(content)
end

def wikipedia_url
"http://en.wikipedia.org/wiki/Main_Page"
end

def content
RestClient.proxy = ENV['http_proxy']
@content ||= RestClient.get wikipedia_url
end
end
end
34 changes: 34 additions & 0 deletions onthisday.gemspec
@@ -0,0 +1,34 @@
# -*- encoding: utf-8 -*-

Gem::Specification.new do |s|
s.name = "onthisday"
s.version = "0.1.4"

s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Chris Lowis"]
s.date = "2012-06-20"
s.email = "chris.lowis@gmail.com"
s.files = ["Gemfile", "README.rdoc", "test/fixtures", "test/fixtures/main_page_20120620.html", "test/onthisday_test.rb", "lib/onthisday.rb"]
s.homepage = "http://github.com/bbcrd/onthisday"
s.require_paths = ["lib"]
s.rubygems_version = "1.8.24"
s.summary = "A little parser for Wikipedia's 'In the News' content"

if s.respond_to? :specification_version then
s.specification_version = 3

if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
s.add_runtime_dependency(%q<rest-client>, [">= 0"])
s.add_development_dependency(%q<webmock>, [">= 0"])
else
s.add_dependency(%q<nokogiri>, [">= 0"])
s.add_dependency(%q<rest-client>, [">= 0"])
s.add_dependency(%q<webmock>, [">= 0"])
end
else
s.add_dependency(%q<nokogiri>, [">= 0"])
s.add_dependency(%q<rest-client>, [">= 0"])
s.add_dependency(%q<webmock>, [">= 0"])
end
end

0 comments on commit b67373a

Please sign in to comment.