From ff0081ff7cdd51f60a1ac239fae9f8e1cb90e6e0 Mon Sep 17 00:00:00 2001 From: Hugo Maugey Date: Fri, 31 Mar 2017 15:46:05 +0200 Subject: [PATCH] Add log adn 404 500 --- example/prerender.php | 58 +++-- example/test.php | 26 +++ readme.md | 46 +++- src/Hug/PrerenderSpa/PrerenderSpa.php | 231 ++++++++++++++++++-- tests/Hug/PrerenderSpa/PrerenderSpaTest.php | 102 ++++++++- tests/data/404.html | 1 + tests/data/500.html | 1 + tests/data/logs/snapshot-31-03-2017.log | 2 + 8 files changed, 436 insertions(+), 31 deletions(-) create mode 100644 tests/data/404.html create mode 100644 tests/data/500.html create mode 100644 tests/data/logs/snapshot-31-03-2017.log diff --git a/example/prerender.php b/example/prerender.php index 5298909..1f6037f 100644 --- a/example/prerender.php +++ b/example/prerender.php @@ -5,23 +5,55 @@ use Hug\PrerenderSpa\PrerenderSpa as PrerenderSpa; use Hug\Http\Http as Http; -$url = $_REQUEST['URL']; -error_log('url : ' . $url); +# Where do you store prerender filesystem $output = __DIR__ . '/../data/'; +$log = true; + + +# .htaccess http://prerender.io/URL_TO_SNAP +$url = $_REQUEST['URL']; +# .htaccess http://prerender?URL_TO_SNAP +// $url = $_SERVER['QUERY_STRING']; + +# In special case you have to rewrite home URL +// if($url==='https://hugo.maugey.fr/index.php') +// $url = 'https://hugo.maugey.fr/index'; -# Get Snapshot -// $url = 'https://hugo.maugey.fr/developeur-web/HTML5'; -if(false !== $snapshot = PrerenderSpa::get_snapshot($url, $output) +// error_log('Prerender URL : ' . $url); + +$html = null; +$http_code = null; + +try { - Http::header_status(200); - # gzip ? - echo $snapshot; + # Get Snapshot + if(false !== $snapshot = PrerenderSpa::get_snapshot($url, $output) + { + $http_code = 200; + $html = $snapshot; + } + else + { + # Set Header status 404 Not Found + $http_code = 404; + $html = PrerenderSpa::get_404($output); + } + + # Log Snapshot Request to analyse traffic + if($log) + { + $ip = $_REQUEST['REMOTE_ADDR']; + $ua = $_SERVER['HTTP_USER_AGENT']; + PrerenderSpa::log_snapshot($ip, $ua, $url, $http_code, $output); + } + } -else +catch(\Exception $e) { - # Set Header status 404 Not Found - Http::header_status(404); - # include real 404 - echo '404'; + $http_code = 500; + $html = PrerenderSpa::get_500($output); } + +Http::header_status($http_code); +echo $html; diff --git a/example/test.php b/example/test.php index 3536f2c..34b0151 100644 --- a/example/test.php +++ b/example/test.php @@ -8,6 +8,7 @@ $prerender_auth = 'USER:PASS'; $output = __DIR__ .'/../data/'; + # Load Sitemap Urls // $urls = PrerenderSpa::get_sitemap_urls(__DIR__ . '/../data/sitemap.xml'); @@ -26,3 +27,28 @@ error_log(print_r($PrerenderSpa->report, true)); +# Get Your Personnalized 404 +// $page = PrerenderSpa::get_404($output); +// echo $page; + +# Set Your Personnalized 404 +// $html = 'coucou'; +// $set = PrerenderSpa::set_404($html, $output); +// echo var_dump($set); + +# Get Your Personnalized 500 +// $page = PrerenderSpa::get_500($output); +// echo $page; + +# Set Your Personnalized 500 +// $html = 'coucou'; +// $set = PrerenderSpa::set_500($html, $output); +// echo var_dump($set); + +# Log Snapshot Request +// $ip = '123.123.123.123'; +// $ua = 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'; +// $url = 'http://test.com'; +// $http_code = 200; +// $log = PrerenderSpa::log_snapshot($ip, $ua, $url, $http_code, $output); +// echo var_dump($log); diff --git a/readme.md b/readme.md index b88aeaf..3c3a99a 100644 --- a/readme.md +++ b/readme.md @@ -39,7 +39,12 @@ $PrerenderSpa->prerender(); # Wait .... # Print Report error_log(print_r($PrerenderSpa->report, true)); -# Checkout for 404 +# Set Your Personnalized 404 +$html = 'My Personalized 404'; +PrerenderSpa::set_404($html, $output); +# Set Your Personnalized 500 +$html = 'My Personalized 500'; +PrerenderSpa::set_500($html, $output); ``` ### Second Step : Serve Webpage Snapshots to web crawlers @@ -52,6 +57,44 @@ Redirect search engine crawlers to prerender.php service ``` +Have a look at [prerender.php](example/prerender.php) +```php +# Where do you store prerender filesystem +$output = __DIR__ . '/../data/'; + +# .htaccess http://prerender.io/URL_TO_SNAP +$url = $_REQUEST['URL']; + +$html = null; +$http_code = null; + +try +{ + # Get Snapshot + if(false !== $snapshot = PrerenderSpa::get_snapshot($url, $output) + { + $http_code = 200; + $html = $snapshot; + } + else + { + # Set Header status 404 Not Found + $http_code = 404; + $html = PrerenderSpa::get_404($output); + } + +} +catch(\Exception $e) +{ + $http_code = 500; + $html = PrerenderSpa::get_500($output); +} + +Http::header_status($http_code); +echo $html; +``` + + ### Third Step : Generate Snapshot On Demand Repeat First Step by providing only URLs whose content has changed to optimize server from running headless browser snapshot service for nothing. @@ -69,7 +112,6 @@ composer exec phpunit Compress gzip saved HTML Archive snapshots -Log search engine visits ## Author diff --git a/src/Hug/PrerenderSpa/PrerenderSpa.php b/src/Hug/PrerenderSpa/PrerenderSpa.php index 6754a10..e967add 100644 --- a/src/Hug/PrerenderSpa/PrerenderSpa.php +++ b/src/Hug/PrerenderSpa/PrerenderSpa.php @@ -191,18 +191,25 @@ public static function get_sitemap_urls($filename) try { - $sitemap = file_get_contents($filename); - if($sitemap!==false) - { - $xml = new \SimpleXMLElement($sitemap); - - $urls = []; - - foreach ($xml->url as $url_list) - { - $urls[] = (string)$url_list->loc; - } - } + if(is_file($filename) && is_readable($filename)) + { + $sitemap = file_get_contents($filename); + if($sitemap!==false) + { + $xml = new \SimpleXMLElement($sitemap); + + $urls = []; + + foreach ($xml->url as $url_list) + { + $urls[] = (string)$url_list->loc; + } + } + } + else + { + error_log('PrerenderSpa get_sitemap_urls : ' . $filename . ' does not exist or is not writable ! '); + } } catch (\Exception $e) { @@ -221,13 +228,20 @@ public static function get_sitemap_urls($filename) */ public static function get_snapshot($url, $output) { - $html = ''; + $html = false; try { $url_file = PrerenderSpa::url_to_filename($url); - $url_file_path = $output . $url_file; - $html = file_get_contents($url_file_path); + $url_file_path = $output . 'snapshots' . DIRECTORY_SEPARATOR . $url_file; + if(is_file($url_file_path) && is_readable($url_file_path)) + { + $html = file_get_contents($url_file_path); + } + else + { + error_log('PrerenderSpa get_snapshot : File ' . $url_file_path . ' does not exist or not readable !'); + } } catch (\Exception $e) { @@ -237,6 +251,54 @@ public static function get_snapshot($url, $output) return $html; } + /** + * Log Snap Shot Request And by whom + * + * @param string $ip + * @param string $useragent + * @param string $url + * @param string $http_code + * @return bool $log + */ + public static function log_snapshot($ip, $ua, $url, $http_code, $output) + { + $log = false; + + try + { + $date = new \DateTime('now'); + $today = $date->format('d-m-Y'); + $now = $date->format('d-m-Y H:i:s'); + + $today_log = $output . 'logs' . DIRECTORY_SEPARATOR . 'snapshot-'.$today.'.log'; + + if(!is_file($today_log)) + { + file_put_contents($today_log, ''); + } + + if(is_file($today_log) && is_readable($today_log)) + { + $log_line = $now.';'.$ip.';'.$url.';'.$http_code.';'.$ua."\n"; + + if(file_put_contents($today_log, $log_line, FILE_APPEND)!==false) + { + $log = true; + } + } + else + { + error_log('PrerenderSpa log_snapshot : File ' . $today_log . ' does not exist or not readable !'); + } + } + catch (\Exception $e) + { + $log = false; + } + + return $log; + } + /** * Save report generated by prerender function * @@ -286,6 +348,144 @@ public function load_reports() return $reports; } + /** + * Set Custom 404 page + * + * @param string $html + * @param string $output + * @return bool $saved + */ + public static function set_404($html, $output) + { + $saved = false; + + $filename = $output . '404.html'; + try + { + if(file_put_contents($filename, $html)!==false) + { + $saved = true; + } + } + catch(\Exception $e) + { + error_log('PrerenderSpa set_404 : ' . $e->getMessage()); + } + + return $saved; + } + + /** + * Get Custom or default 404 page + * + * @param string $output + * @return string $_404 + */ + public static function get_404($output) + { + $_404 = ''; + $filename = $output . '404.html'; + if(is_file($filename) && is_readable($filename)) + { + $_404 = file_get_contents($filename); + } + else + { + $_404 = PrerenderSpa::get_default_404(); + } + return $_404; + } + + /** + * Get Default 404 page + * + * @return string $_404 + */ + public static function get_default_404() + { + $_404 = <<<'LABEL' + + + + 404 + + +404 + + +LABEL; + return $_404; + } + + /** + * Set Custom 500 page + * + * @param string $html + * @param string $output + * @return bool $saved + */ + public static function set_500($html, $output) + { + $saved = false; + + $filename = $output . '500.html'; + try + { + if(file_put_contents($filename, $html)!==false) + { + $saved = true; + } + } + catch(\Exception $e) + { + error_log('PrerenderSpa set_500 : ' . $e->getMessage()); + } + + return $saved; + } + + /** + * Get Custom or default 500 page + * + * @param string $output + * @return string $_500 + */ + public static function get_500($output) + { + $_500 = ''; + $filename = $output . '500.html'; + if(is_file($filename) && is_readable($filename)) + { + $_500 = file_get_contents($filename); + } + else + { + $_500 = PrerenderSpa::get_default_500(); + } + return $_500; + } + + /** + * Get Default 500 page + * + * @return string $_500 + */ + public static function get_default_500() + { + $_500 = <<<'LABEL' + + + + 500 + + +500 + + +LABEL; + return $_500; + } + /** * Archive older file (for recovery or comparison) @@ -297,4 +497,5 @@ public function load_reports() { } }*/ + } \ No newline at end of file diff --git a/tests/Hug/PrerenderSpa/PrerenderSpaTest.php b/tests/Hug/PrerenderSpa/PrerenderSpaTest.php index d85c39d..fdd36f2 100644 --- a/tests/Hug/PrerenderSpa/PrerenderSpaTest.php +++ b/tests/Hug/PrerenderSpa/PrerenderSpaTest.php @@ -53,7 +53,7 @@ public function testCannotGetSitemapUrlsWithInvalidSitemap() */ public function testCanGetSnapshotWithValidUrl() { - $test = PrerenderSpa::get_snapshot('https://hugo.maugey.fr/developeur-web/HTML5', $this->output . 'snapshots/'); + $test = PrerenderSpa::get_snapshot('https://hugo.maugey.fr/developeur-web/HTML5', $this->output); // . 'snapshots/' $this->assertInternalType('string', $test); } @@ -88,5 +88,105 @@ public function testCanUrlToFilenameWithValidUrl() $this->assertEquals('conversion-entités-html.html', $test); } + /* ************************************************* */ + /* ************ PrerenderSpa::log_snapshot ********* */ + /* ************************************************* */ + + /** + * + */ + public function testCanLogSnapshot() + { + $ip = '123.123.123.123'; + $ua = 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'; + $url = 'http://test.com'; + $http_code = 200; + $test = PrerenderSpa::log_snapshot($ip, $ua, $url, $http_code, $this->output); + $this->assertInternalType('boolean', $test); + $this->assertTrue($test); + } + + /* ************************************************* */ + /* ************** PrerenderSpa::set_404 ************ */ + /* ************************************************* */ + + /** + * + */ + public function testCanSet404() + { + $html = 'My 404 Page'; + $test = PrerenderSpa::set_404($html, $this->output); + $this->assertInternalType('boolean', $test); + $this->assertTrue($test); + } + + /* ************************************************* */ + /* ************** PrerenderSpa::get_404 ************ */ + /* ************************************************* */ + + /** + * + */ + public function testCanGet404() + { + $test = PrerenderSpa::get_404($this->output); + $this->assertInternalType('string', $test); + } + + /* ************************************************* */ + /* ********** PrerenderSpa::get_default_404 ******** */ + /* ************************************************* */ + + /** + * + */ + public function testCanGetDefault404() + { + $test = PrerenderSpa::get_default_404(); + $this->assertInternalType('string', $test); + } + + /* ************************************************* */ + /* ************** PrerenderSpa::set_500 ************ */ + /* ************************************************* */ + + /** + * + */ + public function testCanSet500() + { + $html = 'My 500 Page'; + $test = PrerenderSpa::set_500($html, $this->output); + $this->assertInternalType('boolean', $test); + $this->assertTrue($test); + } + + /* ************************************************* */ + /* ************** PrerenderSpa::get_500 ************ */ + /* ************************************************* */ + + /** + * + */ + public function testCanGet500() + { + $test = PrerenderSpa::get_500($this->output); + $this->assertInternalType('string', $test); + } + + /* ************************************************* */ + /* ********** PrerenderSpa::get_default_500 ******** */ + /* ************************************************* */ + + /** + * + */ + public function testCanGetDefault500() + { + $test = PrerenderSpa::get_default_500(); + $this->assertInternalType('string', $test); + } + } diff --git a/tests/data/404.html b/tests/data/404.html new file mode 100644 index 0000000..4a8c5ab --- /dev/null +++ b/tests/data/404.html @@ -0,0 +1 @@ +My 404 Page \ No newline at end of file diff --git a/tests/data/500.html b/tests/data/500.html new file mode 100644 index 0000000..39e52b4 --- /dev/null +++ b/tests/data/500.html @@ -0,0 +1 @@ +My 500 Page \ No newline at end of file diff --git a/tests/data/logs/snapshot-31-03-2017.log b/tests/data/logs/snapshot-31-03-2017.log new file mode 100644 index 0000000..297186a --- /dev/null +++ b/tests/data/logs/snapshot-31-03-2017.log @@ -0,0 +1,2 @@ +31-03-2017 15:32:38;123.123.123.123;http://test.com;200;Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) +31-03-2017 15:34:17;123.123.123.123;http://test.com;200;Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)