Update scraper.rb to fix dates DDMMYY

jamasalbertastash · Aug 9, 2023 · 5706a21 · 5706a21
1 parent 385149e
commit 5706a21
Showing 1 changed file with 9 additions and 6 deletions.
diff --git a/scraper.rb b/scraper.rb
@@ -7,33 +7,36 @@ def scrape_page(page, comment_url)
   table = page.at("table")
 
   table.search("tr")[1..-1].each do |tr|
-    day, month, year = tr.search("td")[3].inner_text.gsub(/[[:space:]]/, ' ').split(" ")
+    date_str = tr.search("td")[3].inner_text.gsub(/[[:space:]]/, ' ')
+
+    # Split the date string on periods
+    day, month, year = date_str.split(".")
 
     # Print the date values for debugging
     puts "Parsed Date: Day: #{day}, Month: #{month}, Year: #{year}"
 
-    month_i = Date::MONTHNAMES.index(month)
-
     # Guard clause
-    unless day && month_i && year
+    unless day && month && year
       puts "Invalid date values. Skipping..."
       next
     end
 
+    # Adjust the year to YYYY format
+    year = "20" + year if year.length == 2
+
     record = {
       "info_url" => tr.search("td a")[0].attributes['href'].to_s,
       "comment_url" => comment_url,
       "council_reference" => tr.search("td")[0].inner_text,
       "description" => tr.search("td")[1].inner_text,
       "address" => tr.search("td")[2].inner_text + ", VIC",
-      "on_notice_to" => Date.new(year.to_i, month_i, day.to_i).to_s,
+      "on_notice_to" => Date.new(year.to_i, month.to_i, day.to_i).to_s,
       "date_scraped" => Date.today.to_s
     }
 
     # Check if record already exists
     if (ScraperWiki.select("* from data where `council_reference`='#{record['council_reference']}'").empty? rescue true)
       puts "Saving record " + record['council_reference'] + ", " + record['address']
-#      puts record
       ScraperWiki.save_sqlite(['council_reference'], record)
     else
       puts "Skipping already saved record " + record['council_reference']