Skip to content

Commit

Permalink
Update for new platform
Browse files Browse the repository at this point in the history
- Switch to Torann/dom-parser
- update dependencies
  • Loading branch information
jamezpolley committed May 7, 2019
1 parent 8a04a1c commit 40d6462
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 27 deletions.
4 changes: 2 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"ext-pdo_sqlite": "*",
"ext-gd": "*",
"ext-mbstring": "*",
"byjg/pgbrowser" : "1.0.*",
"sunra/php-simple-html-dom-parser": "1.5.2"
"byjg/pgbrowser" : "*",
"torann/dom-parser": "*"
}
}
42 changes: 23 additions & 19 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions scraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require_once 'vendor/openaustralia/scraperwiki/scraperwiki.php';

use PGuardiario\PGBrowser;
use Sunra\PhpSimple\HtmlDomParser;
use Torann\DomParser\HtmlDom;

date_default_timezone_set('Australia/Sydney');

Expand All @@ -29,7 +29,7 @@
$full_url = $url_base. '?' .$url_query. '&d=' .$period;
$page = $browser->get($full_url);

$page_dom = HtmlDomParser::str_get_html($page->html);
$page_dom = HtmlDom::fromString($page->html);
$results = $page_dom->find("div[class=result]");

foreach ($results as $result) {
Expand All @@ -38,20 +38,20 @@

// getting detail page
$page2 = $browser->get($info_url);
$page2_dom = HtmlDomParser::str_get_html($page2->html);
$page2_dom = HtmlDom::fromString($page2->html);
$divs = $page2_dom->find("div[class=detailleft]");

foreach ($divs as $div) {
switch ($div->plaintext) {
case 'Description:' :
$description = trim($div->next_sibling()->plaintext);
$description = trim($div->nextSibling()->plaintext);
break;
case 'Properties:' :
$address = explode("\n", $div->next_sibling()->plaintext);
$address = explode("\n", $div->nextSibling()->plaintext);
$address = trim(preg_replace('/\s+/', ' ', $address[0]));
break;
case 'Lodged:' :
$date_received = explode("/", $div->next_sibling()->plaintext);
$date_received = explode("/", $div->nextSibling()->plaintext);
$date_received = $date_received[2] . '-' . $date_received[1] . '-' . $date_received[0];
break;
}
Expand Down

0 comments on commit 40d6462

Please sign in to comment.