/
scraper.rb
39 lines (33 loc) · 1.29 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
require 'scraperwiki'
require 'nokogiri'
require 'mechanize'
require 'open-uri'
require 'date'
require 'json'
require 'yaml'
url = "http://ratings.food.gov.uk/OpenDataFiles/FHRS705en-GB.xml"
doc = Nokogiri::XML open(url)
inspections = []
doc.search('EstablishmentDetail').each do |i|
details = i.children.inject({}){|hsh,el| hsh[el.name] = el.inner_text;hsh}
details["lat"] = i.search('Geocode Latitude').inner_text rescue nil
details["lng"] = i.search('Geocode Longitude').inner_text rescue nil
inspections << details
end
inspections.each do |i|
details = {}
details[:id] = i["FHRSID"]
details[:councilid] = i["LocalAuthorityBusinessID"]
details[:date] = Date.parse(i["RatingDate"])
details[:name] = i["BusinessName"]
details[:link] = "http://ratings.food.gov.uk/business/en-GB/#{details[:id]}"
address = [i["AddressLine1"], i["AddressLine2"], i["AddressLine3"], i["AddressLine4"], i["PostCode"]].compact.reject { |s| s.empty? }
details[:address] = address.join(", ")
details[:postcode] = i["PostCode"]
details[:rating] = i["RatingValue"]
details[:type] = i["BusinessType"]
details[:rss_date] = details[:date].strftime("%A, %d %b %Y %H:%M:%S %Z")
details[:lat] = i["lat"]
details[:lng] = i["lng"]
ScraperWiki.save(["id"], details)
end