Skip to content

Commit

Permalink
Build a record for each individual offence, and geocode
Browse files Browse the repository at this point in the history
  • Loading branch information
auxesis committed Dec 18, 2016
1 parent c44f740 commit 613211d
Showing 1 changed file with 33 additions and 31 deletions.
64 changes: 33 additions & 31 deletions scraper.rb
Expand Up @@ -19,32 +19,17 @@ def scrub!
end
end

def normalise_date(value)
case
when value.class == DateTime
return value.to_date
when value.class == String
return Date.parse(value)
when value.class == Date
return value
else
puts "[debug] Unhandled date: #{value.inspect}"
raise
end
end

def generate_id(details)
return details.values.map(&:to_s).join(' ').to_md5
def generate_id(record)
return record.map(&:to_s).join(' ').to_md5
end

def geocode(prosecution)
@addresses ||= {}

address = prosecution['address']
address = [
prosecution['address'],
prosecution['county'],
prosecution['postcode'],
prosecution['business_address'],
'Canberra',
'ACT'
].join(', ')

if @addresses[address]
Expand All @@ -55,7 +40,7 @@ def geocode(prosecution)
a = Geokit::Geocoders::GoogleGeocoder.geocode(address)

if !a.lat && !a.lng
a = Geokit::Geocoders::GoogleGeocoder.geocode(prosecution['postcode'])
puts "[debug] Couldn't geocode #{address}"
end

location = {
Expand All @@ -71,7 +56,7 @@ def geocode(prosecution)

def existing_record_ids
return @cached if @cached
@cached = ScraperWiki.select('link from data').map {|r| r['link']}
@cached = ScraperWiki.select('id from data').map {|r| r['id']}
rescue SqliteMagic::NoSuchTable
[]
end
Expand Down Expand Up @@ -204,17 +189,16 @@ def finalise_record!
values = @record.delete('Date of Offence') || []
values.compact!
values.map!(&:strip).reject! {|v| v.blank?}
values.map! {|v| Date.parse(v)}
@record['offence_dates'] = values

# TODO(auxesis): split offences into individual records
# Offence Proven
values = @record.delete('Offence Proven') || []
values.compact!
values.map!(&:strip).reject! {|v| v.blank?}
values.reject! {|v| v =~ /Total \(\d+\) Charge/i}
@record['offence_proven'] = values

# TODO(auxesis): split offences into individual records
# Imposed Penalty
values = @record.delete('Imposed Penalty') || []
values.compact!
Expand All @@ -225,7 +209,7 @@ def finalise_record!
values = @record.delete('Removal date') || []
values.compact!
values.map!(&:strip).reject! {|v| v.blank?}
@record['removal_date'] = values.join(' ')
@record['removal_date'] = Date.parse(values.join(' '))

# Notes
values = @record.delete('Notes') || []
Expand Down Expand Up @@ -269,10 +253,30 @@ def clean_imposed_penalties!
end
end

# TODO(auxesis): split offences into individual records
def split_records_into_multiple_prosecutions(records)
binding.pry
prosecutions = []

records.each do |record|
offences = record['offence_proven']
penalties = record['imposed_penalties']
offences.zip(penalties).each do |offence,penalty|
prosecution = {
'business_name' => record['prosecution_details'],
'business_address' => record['business_address'],
'offence_date' => record['offence_dates'].first,
'offence' => offence,
'imposed_penalty' => penalty,
'removal_date' => record['removal_date'],
'notes' => record['notes']
}
prosecution['id'] = generate_id(prosecution)

prosecutions << prosecution
end
end

records
prosecutions
end

def add_to_record(column, value)
Expand Down Expand Up @@ -328,15 +332,13 @@ def fetch_and_build_prosecutions
def main
prosecutions = fetch_and_build_prosecutions

binding.pry

puts "### Found #{prosecutions.size} notices"
new_prosecutions = prosecutions.select {|r| !existing_record_ids.include?(r['link'])}
new_prosecutions = prosecutions.select {|r| !existing_record_ids.include?(r['id'])}
puts "### There are #{new_prosecutions.size} new prosecutions"
new_prosecutions.map! {|p| geocode(p) }

# Serialise
ScraperWiki.save_sqlite(['link'], new_prosecutions)
ScraperWiki.save_sqlite(['id'], new_prosecutions)

puts "Done"
end
Expand Down

0 comments on commit 613211d

Please sign in to comment.