Permalink
Browse files

Initial scraper

  • Loading branch information...
tmtmtmtm committed Jul 30, 2015
0 parents commit da3f9439a9cd64f88cc05f6a823caaf70ae6cce8
Showing with 102 additions and 0 deletions.
  1. +6 −0 .gitignore
  2. +16 −0 Gemfile
  3. +55 −0 Gemfile.lock
  4. +1 −0 README.md
  5. +24 −0 scraper.rb
@@ -0,0 +1,6 @@
.cache/*
*.swp
*.sqlite
16 Gemfile
@@ -0,0 +1,16 @@
# It's easy to add more libraries or choose different versions. Any libraries
# specified here will be installed and made available to your morph.io scraper.
# Find out more: https://morph.io/documentation/ruby
source "https://rubygems.org"
ruby "2.0.0"
gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults"
gem "execjs"
gem "pry"
gem "colorize"
gem "nokogiri"
gem "open-uri-cached"
gem "fuzzy_match"
gem 'wikidata-client', '~> 0.0.7', require: 'wikidata'
@@ -0,0 +1,55 @@
GIT
remote: https://github.com/openaustralia/scraperwiki-ruby.git
revision: fc50176812505e463077d5c673d504a6a234aa78
branch: morph_defaults
specs:
scraperwiki (3.0.1)
httpclient
sqlite_magic
GEM
remote: https://rubygems.org/
specs:
coderay (1.1.0)
colorize (0.7.7)
excon (0.45.4)
execjs (2.5.2)
faraday (0.9.1)
multipart-post (>= 1.2, < 3)
faraday_middleware (0.10.0)
faraday (>= 0.7.4, < 0.10)
fuzzy_match (2.1.0)
hashie (3.4.2)
httpclient (2.6.0.1)
method_source (0.8.2)
mini_portile (0.6.2)
multipart-post (2.0.0)
nokogiri (1.6.6.2)
mini_portile (~> 0.6.0)
open-uri-cached (0.0.5)
pry (0.10.1)
coderay (~> 1.1.0)
method_source (~> 0.8.1)
slop (~> 3.4)
slop (3.6.0)
sqlite3 (1.3.10)
sqlite_magic (0.0.3)
sqlite3
wikidata-client (0.0.7)
excon (~> 0.40)
faraday (~> 0.9)
faraday_middleware (~> 0.9)
hashie (~> 3.3)
PLATFORMS
ruby
DEPENDENCIES
colorize
execjs
fuzzy_match
nokogiri
open-uri-cached
pry
scraperwiki!
wikidata-client (~> 0.0.7)
@@ -0,0 +1 @@
This is a scraper that runs on [Morph](https://morph.io). To get started [see the documentation](https://morph.io/documentation)
@@ -0,0 +1,24 @@
#!/bin/env ruby
# encoding: utf-8
require 'colorize'
require 'csv'
require 'json'
require 'scraperwiki'
require 'pry'
def reprocess(file)
csv = CSV.table(open(file))
csv.each do |td|
td[:id] = (td.delete :person_id).last
td[:source] = (td.delete :uri).last
td[:name] = "%s %s" % [td[:first_name], td[:last_name]]
td[:sort_name] = "%s, %s" % [td[:last_name], td[:first_name]]
td[:term] = '31'
ScraperWiki.save_sqlite([:id, :term], td)
end
end
reprocess('https://www.kildarestreet.com/tds/?f=csv')

0 comments on commit da3f943

Please sign in to comment.