Skip to content

Commit

Permalink
Added: Begun webcrawler verifier.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander Krasnoyarov committed Oct 27, 2016
1 parent b1e5d95 commit 7f69a57
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- Added: `Range` helper.
- Added: `Steeler` webcrawler verifier.
- Added: `Twitter` webcrawler verifier.
- Added: `Begun` webcrawler verifier.
- Chore: refactoring.
- Chore: integrate `coveralls.io` and add badge `Coverage Status`.
- Chore: add badge `Dependency Status`.
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ if (!empty($_SERVER['HTTP_USER_AGENT']) && !empty($_SERVER['REMOTE_ADDR'])) {

- **Apple**: `Applebot`.
- **Baidu**: `Baiduspider`.
- **Begun**: `BegunAdvertising`.
- **Bing**: `bingbot`, `msnbot`, `adidxbot`, `BingPreview`.
- **Deuse**: `DeuSu`.
- **Exalead**: `Exabot`.
Expand Down
14 changes: 14 additions & 0 deletions src/DNS/HostVerifier.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?php
namespace WebcrawlerVerifier\DNS;

use WebcrawlerVerifier\Helper\StringHelper as StringHelper;

class HostVerifier
{
public static function verify($host, $allowedHostNames)
{
return !!array_filter($allowedHostNames, function ($validHost) use ($host) {
return StringHelper::endsWith($validHost, $host) !== false;
});
}
}
8 changes: 1 addition & 7 deletions src/DNS/ReverseVerifier.php
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
<?php
namespace WebcrawlerVerifier\DNS;

use WebcrawlerVerifier\Helper\StringHelper as StringHelper;

class ReverseVerifier
{
public static function verify($ip, $allowedHostNames)
{
$host = gethostbyaddr($ip);
$ipAfterLookup = gethostbyname($host);

$hostIsValid = !!array_filter($allowedHostNames, function ($validHost) use ($host) {
return StringHelper::endsWith($validHost, $host) !== false;
});

return $hostIsValid && $ipAfterLookup === $ip;
return HostVerifier::verify($host, $allowedHostNames) && $ipAfterLookup === $ip;
}
}
20 changes: 20 additions & 0 deletions src/Webcrawler/BegunWebcrawlerVerifier.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php
namespace WebcrawlerVerifier\Webcrawler;

use WebcrawlerVerifier\DNS\HostVerifier;

class BegunWebcrawlerVerifier implements WebcrawlerVerifierInterface
{
protected $allowedHostNames = ['begun.ru'];

/**
* Checks whether the given IP address really belongs to a valid host or not
*
* @param $ip string the IP address to check
* @return bool true if the given IP belongs to any of the valid hosts, otherwise false
*/
public function verify($ip)
{
return HostVerifier::verify(gethostbyaddr($ip), $this->allowedHostNames);
}
}
1 change: 1 addition & 0 deletions src/WebcrawlerVerifier.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class WebcrawlerVerifier
protected $webcrawlerVerifiers = [
'Applebot' => 'WebcrawlerVerifier\Webcrawler\AppleWebcrawlerVerifier',
'Baiduspider' => 'WebcrawlerVerifier\Webcrawler\BaiduWebcrawlerVerifier',
'BegunAdvertising' => 'WebcrawlerVerifier\Webcrawler\BegunWebcrawlerVerifier',
'bingbot' => 'WebcrawlerVerifier\Webcrawler\BingWebcrawlerVerifier',
'msnbot' => 'WebcrawlerVerifier\Webcrawler\BingWebcrawlerVerifier',
'adidxbot' => 'WebcrawlerVerifier\Webcrawler\BingWebcrawlerVerifier',
Expand Down
15 changes: 15 additions & 0 deletions tests/WebcrawlerVerifierTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,21 @@ public function testVerifyBaiduWebcrawler()
));
}

public function testVerifyBegunWebcrawler()
{
$webcrawlerVerifier = new \WebcrawlerVerifier\WebcrawlerVerifier();
$userAgent = 'Mozilla/5.0 (compatible; BegunAdvertising/3.0; +http://begun.ru/begun/technology/indexer/)';

$this->assertEquals($webcrawlerVerifier::UNVERIFIED, $webcrawlerVerifier->verify(
$userAgent,
'192.168.0.1'
));
$this->assertEquals($webcrawlerVerifier::VERIFIED, $webcrawlerVerifier->verify(
$userAgent,
'91.192.149.231'
));
}

public function testVerifyBingWebcrawler()
{
$webcrawlerVerifier = new \WebcrawlerVerifier\WebcrawlerVerifier();
Expand Down

0 comments on commit 7f69a57

Please sign in to comment.