From 1c96f2f61bd70d8f441f59525a4ea437ea93a03b Mon Sep 17 00:00:00 2001 From: Andreas Gohr Date: Wed, 13 Jan 2016 11:39:58 +0100 Subject: [PATCH] ignore actions triggered by bots Desite the voting being triggered by JavaScript, some search bots do execute the votes. This patch introduces CrawlerDetect from https://github.com/JayBizzle/Crawler-Detect to ignore such votes --- CrawlerDetect.php | 423 ++++++++++++++++++++++++++++++++++++++++++++++ helper.php | 7 + plugin.info.txt | 2 +- 3 files changed, 431 insertions(+), 1 deletion(-) create mode 100644 CrawlerDetect.php diff --git a/CrawlerDetect.php b/CrawlerDetect.php new file mode 100644 index 0000000..8882fbd --- /dev/null +++ b/CrawlerDetect.php @@ -0,0 +1,423 @@ +setHttpHeaders($headers); + $this->setUserAgent($userAgent); + } + + /** + * Set HTTP headers. + * + * @param array $httpHeaders + */ + public function setHttpHeaders($httpHeaders = null) + { + // use global _SERVER if $httpHeaders aren't defined + if (!is_array($httpHeaders) || !count($httpHeaders)) { + $httpHeaders = $_SERVER; + } + // clear existing headers + $this->httpHeaders = array(); + // Only save HTTP headers. In PHP land, that means only _SERVER vars that + // start with HTTP_. + foreach ($httpHeaders as $key => $value) { + if (substr($key, 0, 5) === 'HTTP_') { + $this->httpHeaders[$key] = $value; + } + } + } + + /** + * Return user agent headers. + * + * @return array + */ + public function getUaHttpHeaders() + { + return self::$uaHttpHeaders; + } + + /** + * Set the user agent. + * + * @param string $userAgent + */ + public function setUserAgent($userAgent = null) + { + if (false === empty($userAgent)) { + return $this->userAgent = $userAgent; + } else { + $this->userAgent = null; + foreach ($this->getUaHttpHeaders() as $altHeader) { + if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow. + $this->userAgent .= $this->httpHeaders[$altHeader].' '; + } + } + + return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null); + } + } + + /** + * Build the user agent regex. + * + * @return string + */ + public function getRegex() + { + return '('.implode('|', self::$crawlers).')'; + } + + /** + * Build the replacement regex. + * + * @return string + */ + public function getIgnored() + { + return '('.implode('|', self::$ignore).')'; + } + + /** + * Check user agent string against the regex. + * + * @param string $userAgent + * + * @return bool + */ + public function isCrawler($userAgent = null) + { + $agent = is_null($userAgent) ? $this->userAgent : $userAgent; + + $agent = preg_replace('/'.$this->getIgnored().'/i', '', $agent); + + $result = preg_match('/'.$this->getRegex().'/i', $agent, $matches); + + if ($matches) { + $this->matches = $matches; + } + + return (bool) $result; + } + + /** + * Return the matches. + * + * @return array + */ + public function getMatches() + { + return $this->matches[0]; + } +} diff --git a/helper.php b/helper.php index 0b0ac66..0745aa9 100644 --- a/helper.php +++ b/helper.php @@ -140,6 +140,13 @@ public function rate($rate, $page) { $sqlite = $this->getDBHelper(); if(!$sqlite) return; + // ignore any bot accesses + if(!class_exists('Jaybizzle\CrawlerDetect\CrawlerDetect')){ + require (__DIR__ . '/CrawlerDetect.php'); + } + $CrawlerDetect = new Jaybizzle\CrawlerDetect\CrawlerDetect(); + if($CrawlerDetect->isCrawler()) return; + $translation = plugin_load('helper', 'translation'); if (!$translation) { $lang = ''; diff --git a/plugin.info.txt b/plugin.info.txt index e16a046..3c597d1 100644 --- a/plugin.info.txt +++ b/plugin.info.txt @@ -1,7 +1,7 @@ base rating author Andreas Gohr email gohr@cosmocode.de -date 2015-02-19 +date 2016-01-13 name rating plugin desc Allows rating a page url https://www.dokuwiki.org/plugin:rating