Skip to content

Commit

Permalink
Optimized for increased performance + better render compression
Browse files Browse the repository at this point in the history
  • Loading branch information
JanPetterMG committed Aug 10, 2016
1 parent 0591feb commit b215f8e
Show file tree
Hide file tree
Showing 21 changed files with 228 additions and 102 deletions.
8 changes: 4 additions & 4 deletions src/Client/Cache/MySQL/Manager.php
Expand Up @@ -290,17 +290,17 @@ public function cron($timeLimit, $workerID)
SELECT base
FROM robotstxt__cache1
WHERE worker = :workerID
ORDER BY nextUpdate DESC
LIMIT 10;
SQL
);
$query->bindParam(':workerID', $worker, PDO::PARAM_INT);
$query->execute();
if ($query->rowCount() > 0) {
while ($row = $query->fetch(PDO::FETCH_ASSOC)) {
if (!$this->push(new UriClient($row['base'], $this->curlOptions, $this->byteLimit))) {
throw new ClientException('Unable to update `' . $row['base'] . '`');
if ($this->push(new UriClient($row['base'], $this->curlOptions, $this->byteLimit))) {
$log[(string)microtime(true)] = $row['base'];
}
$log[(string)microtime(true)] = $row['base'];
}
}
}
Expand Down Expand Up @@ -338,7 +338,7 @@ public function clean($delay)
$delay = self::CACHE_TIME + $delay;
$query = $this->pdo->prepare(<<<SQL
DELETE FROM robotstxt__cache1
WHERE (worker = 0 OR worker IS NULL) AND nextUpdate < (UNIX_TIMESTAMP() - :delay);
WHERE nextUpdate < (UNIX_TIMESTAMP() - :delay);
SQL
);
$query->bindParam(':delay', $delay, PDO::PARAM_INT);
Expand Down
8 changes: 4 additions & 4 deletions src/Client/Directives/DelayCore.php
Expand Up @@ -77,10 +77,10 @@ public function getUserAgent()
*/
public function handle(PDO $pdo)
{
if ($this->handler === null) {
$handler = new DatabaseHandler($pdo);
$this->handler = $handler->delayClient($this->base, $this->userAgent, $this->getValue());
if (isset($this->handler)) {
return $this->handler;
}
return $this->handler;
$handler = new DatabaseHandler($pdo);
return $this->handler = $handler->delayClient($this->base, $this->userAgent, $this->getValue());
}
}
2 changes: 1 addition & 1 deletion src/Client/Directives/UserAgentTools.php
Expand Up @@ -133,7 +133,7 @@ private function checkPath($directive, $uri)
self::DIRECTIVE_NO_INDEX => $this->handler->noIndex(),
self::DIRECTIVE_DISALLOW => $this->handler->disallow(),
self::DIRECTIVE_ALLOW => $this->handler->allow(),
] as $currentDirective => $handler) {
] as $currentDirective => &$handler) {
if ($handler->client()->isListed($uri)) {
if ($currentDirective === self::DIRECTIVE_NO_INDEX) {
return $directive === self::DIRECTIVE_DISALLOW;
Expand Down
90 changes: 44 additions & 46 deletions src/Parser/Directives/AllowParser.php
Expand Up @@ -54,6 +54,18 @@ class AllowParser implements ParserInterface, RobotsTxtInterface
*/
private $host;

/**
* Optimized for performance
* @var bool
*/
private $optimized = false;

/**
* Client cache
* @var AllowClient
*/
private $client;

/**
* AllowParser constructor
*
Expand Down Expand Up @@ -94,39 +106,37 @@ public function add($line)
*/
private function addPath($path)
{
foreach ([
$path,
'/',
'*',
] as $testPath) {
if (in_array($testPath, $this->path)) {
return false;
}
}
if ($this->isPath($path)) {
$path = rtrim($path, '*');
if (in_array(mb_substr($path, 0, 1), [
'/',
'*',
])) {
$this->path[] = $path;
$this->removeOverlapping();
$this->optimized = false;
}
return in_array($path, $this->path);
}

/**
* Check if path is valid
* Render
*
* @param string $path
* @param RenderHandler $handler
* @return bool
*/
private function isPath($path)
public function render(RenderHandler $handler)
{
if (mb_strpos($path, '/') !== 0) {
foreach ([
'*',
'?',
] as $char) {
$path = str_replace($char, '/', $path);
}
if (!$this->optimized) {
$this->removeOverlapping();
}
sort($this->path);
$inline = new RenderHandler($handler->getLevel());
$this->host->render($inline);
$this->cleanParam->render($inline);
$handler->addInline($this->directive, $inline);
foreach ($this->path as $path) {
$handler->add($this->directive, $path);
}
return mb_strpos($path, '/') === 0;
return true;
}

/**
Expand All @@ -136,35 +146,18 @@ private function isPath($path)
*/
private function removeOverlapping()
{
foreach ($this->path as $key1 => $path1) {
foreach ($this->path as $key2 => $path2) {
foreach ($this->path as $key1 => &$path1) {
foreach ($this->path as $key2 => &$path2) {
if ($key1 !== $key2 &&
mb_strpos($path1, $path2) === 0
(mb_strpos($path1, $path2) === 0 ||
mb_strpos(str_replace('*', '/', $path1), $path2) === 0
)
) {
unset($this->path[$key1]);
return $this->removeOverlapping();
}
}
}
return true;
}

/**
* Render
*
* @param RenderHandler $handler
* @return bool
*/
public function render(RenderHandler $handler)
{
sort($this->path);
$inline = new RenderHandler($handler->getLevel());
$this->host->render($inline);
$this->cleanParam->render($inline);
$handler->addInline($this->directive, $inline);
foreach ($this->path as $path) {
$handler->add($this->directive, $path);
}
$this->optimized = true;
return true;
}

Expand All @@ -175,6 +168,11 @@ public function render(RenderHandler $handler)
*/
public function client()
{
return new AllowClient($this->path, $this->host->client(), $this->cleanParam->client());
if (isset($this->client)) {
return $this->client;
} elseif (!$this->optimized) {
$this->removeOverlapping();
}
return $this->client = new AllowClient($this->path, $this->host->client(), $this->cleanParam->client());
}
}
11 changes: 10 additions & 1 deletion src/Parser/Directives/CleanParamParser.php
Expand Up @@ -17,6 +17,12 @@
*/
class CleanParamParser extends CleanParamParserCore
{
/**
* Client cache
* @var CleanParamClient
*/
private $client;

/**
* CleanParamParser constructor.
*/
Expand All @@ -32,6 +38,9 @@ public function __construct()
*/
public function client()
{
return new CleanParamClient($this->cleanParam);
if (isset($this->client)) {
return $this->client;
}
return $this->client = new CleanParamClient($this->cleanParam);
}
}
6 changes: 3 additions & 3 deletions src/Parser/Directives/CleanParamParserCore.php
Expand Up @@ -42,12 +42,12 @@ public function add($line)
{
// split into parameter and path
$array = array_map('trim', mb_split('\s+', $line, 2));
// strip any invalid characters from path prefix
$path = '/';
if (isset($array[1])) {
// strip any invalid characters from path prefix
$uriParser = new UriParser(preg_replace('/[^A-Za-z0-9\.-\/\*\_]/', '', $array[1]));
$path = $uriParser->encode();
$path = rtrim($uriParser->encode(), '*');
}
$path = empty($path) ? '/' : $path;
$param = array_map('trim', explode('&', $array[0]));
foreach ($param as $key) {
$this->cleanParam[$key][] = $path;
Expand Down
11 changes: 10 additions & 1 deletion src/Parser/Directives/DelayParser.php
Expand Up @@ -37,6 +37,12 @@ class DelayParser implements ParserInterface, RobotsTxtInterface
*/
private $delay;

/**
* Client cache
* @var DelayClient
*/
private $client;

/**
* DelayParser constructor.
*
Expand Down Expand Up @@ -80,7 +86,10 @@ public function add($line)
*/
public function client($userAgent = self::USER_AGENT, $fallbackValue = 0)
{
return new DelayClient($this->base, $userAgent, $this->delay, $fallbackValue);
if (isset($this->client)) {
return $this->client;
}
return $this->client = new DelayClient($this->base, $userAgent, $this->delay, $fallbackValue);
}

/**
Expand Down
11 changes: 10 additions & 1 deletion src/Parser/Directives/HostParser.php
Expand Up @@ -18,6 +18,12 @@
*/
class HostParser extends HostParserCore
{
/**
* Client cache
* @var HostClient
*/
private $client;

/**
* HostParser constructor.
*
Expand All @@ -36,7 +42,10 @@ public function __construct($base, $effective)
*/
public function client()
{
return new HostClient($this->base, $this->effective, isset($this->host[0]) ? [$this->host[0]] : []);
if (isset($this->client)) {
return $this->client;
}
return $this->client = new HostClient($this->base, $this->effective, isset($this->host[0]) ? [$this->host[0]] : []);
}

/**
Expand Down
11 changes: 10 additions & 1 deletion src/Parser/Directives/InlineCleanParamParser.php
Expand Up @@ -18,6 +18,12 @@
*/
class InlineCleanParamParser extends CleanParamParserCore
{
/**
* Client cache
* @var InlineCleanParamClient
*/
private $client;

/**
* InlineCleanParamParser constructor.
*/
Expand All @@ -33,6 +39,9 @@ public function __construct()
*/
public function client()
{
return new InlineCleanParamClient($this->cleanParam);
if (isset($this->client)) {
return $this->client;
}
return $this->client = new InlineCleanParamClient($this->cleanParam);
}
}
11 changes: 10 additions & 1 deletion src/Parser/Directives/InlineHostParser.php
Expand Up @@ -18,6 +18,12 @@
*/
class InlineHostParser extends HostParserCore
{
/**
* Client cache
* @var InlineHostClient
*/
private $client;

/**
* InlineHostParser constructor.
*
Expand All @@ -36,7 +42,10 @@ public function __construct($base, $effective)
*/
public function client()
{
return new InlineHostClient($this->base, $this->effective, $this->host);
if (isset($this->client)) {
return $this->client;
}
return $this->client = new InlineHostClient($this->base, $this->effective, $this->host);
}

/**
Expand Down
11 changes: 10 additions & 1 deletion src/Parser/Directives/RequestRateParser.php
Expand Up @@ -33,6 +33,12 @@ class RequestRateParser implements ParserInterface, RobotsTxtInterface
*/
private $requestRates = [];

/**
* Client cache
* @var RequestRateClient
*/
private $client;

/**
* RequestRate constructor.
*
Expand Down Expand Up @@ -107,8 +113,11 @@ private function draftParseRate($string)
*/
public function client($userAgent = self::USER_AGENT, $fallbackValue = 0)
{
if (isset($this->client)) {
return $this->client;
}
$this->sort();
return new RequestRateClient($this->base, $userAgent, $this->requestRates, $fallbackValue);
return $this->client = new RequestRateClient($this->base, $userAgent, $this->requestRates, $fallbackValue);
}

/**
Expand Down
11 changes: 10 additions & 1 deletion src/Parser/Directives/RobotVersionParser.php
Expand Up @@ -25,6 +25,12 @@ class RobotVersionParser implements ParserInterface, RobotsTxtInterface
*/
private $version;

/**
* Client cache
* @var RobotVersionClient
*/
private $client;

/**
* RobotVersionParser constructor.
*/
Expand Down Expand Up @@ -54,7 +60,10 @@ public function add($line)
*/
public function client()
{
return new RobotVersionClient($this->version);
if (isset($this->client)) {
return $this->client;
}
return $this->client = new RobotVersionClient($this->version);
}

/**
Expand Down

0 comments on commit b215f8e

Please sign in to comment.