diff --git a/composer.lock b/composer.lock
index 4376e0e1..b7aa2560 100644
--- a/composer.lock
+++ b/composer.lock
@@ -955,16 +955,16 @@
},
{
"name": "php-stubs/wordpress-stubs",
- "version": "v5.4.1",
+ "version": "v5.4.2",
"source": {
"type": "git",
"url": "https://github.com/php-stubs/wordpress-stubs.git",
- "reference": "726e541337276f1648ef860efcfe7298bce3a1c5"
+ "reference": "38b0963698ca5858658a5b09198062411f22932a"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/php-stubs/wordpress-stubs/zipball/726e541337276f1648ef860efcfe7298bce3a1c5",
- "reference": "726e541337276f1648ef860efcfe7298bce3a1c5",
+ "url": "https://api.github.com/repos/php-stubs/wordpress-stubs/zipball/38b0963698ca5858658a5b09198062411f22932a",
+ "reference": "38b0963698ca5858658a5b09198062411f22932a",
"shasum": ""
},
"replace": {
@@ -991,7 +991,7 @@
"static analysis",
"wordpress"
],
- "time": "2020-04-29T23:43:44+00:00"
+ "time": "2020-06-11T14:56:54+00:00"
},
{
"name": "phpcompatibility/php-compatibility",
@@ -1264,16 +1264,16 @@
},
{
"name": "phpstan/phpstan",
- "version": "0.12.27",
+ "version": "0.12.29",
"source": {
"type": "git",
"url": "https://github.com/phpstan/phpstan.git",
- "reference": "2abbd3253e38a258137f647f4e5fdbcb13142c3e"
+ "reference": "9771daaf6b95c6313b908d0bcdee0afcd51f838a"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/phpstan/phpstan/zipball/2abbd3253e38a258137f647f4e5fdbcb13142c3e",
- "reference": "2abbd3253e38a258137f647f4e5fdbcb13142c3e",
+ "url": "https://api.github.com/repos/phpstan/phpstan/zipball/9771daaf6b95c6313b908d0bcdee0afcd51f838a",
+ "reference": "9771daaf6b95c6313b908d0bcdee0afcd51f838a",
"shasum": ""
},
"require": {
@@ -1316,7 +1316,7 @@
"type": "tidelift"
}
],
- "time": "2020-06-08T21:28:12+00:00"
+ "time": "2020-06-14T14:10:59+00:00"
},
{
"name": "phpunit/php-code-coverage",
diff --git a/readme.txt b/readme.txt
index 10ddcd08..af5820e8 100755
--- a/readme.txt
+++ b/readme.txt
@@ -2,9 +2,9 @@
Contributors: leonstafford
Tags: security, performance, static
Requires at least: 3.2
-Tested up to: 5.4.1
+Tested up to: 5.4.2
Requires PHP: 7.3
-Stable tag: 6.6.18
+Stable tag: 6.6.19
Publish your website as static HTML for improved performance and security.
@@ -128,8 +128,10 @@ Everyone's WordPress hosting environment and configuration is unique, with diffe
== Changelog ==
-= 6.6.18 =
+= 6.6.19 =
+ * granular crawl and deploy progress indicators
+ * no more txt files polluting uploads dir, slowing things down
* progress indicator on WP_CLI generate cmd
* finally supporting UTF/multibyte URLs!
* preserve font hex values in parsed stylesheets
diff --git a/src/Archive.php b/src/Archive.php
index 776b0de9..12a41071 100755
--- a/src/Archive.php
+++ b/src/Archive.php
@@ -17,63 +17,13 @@ public function __construct() {
[ 'wpenv' ]
);
- $this->path = '';
- $this->name = '';
- }
-
- public function setToCurrentArchive() : void {
- $handle = fopen(
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-CURRENT-ARCHIVE.txt',
- 'r'
- );
-
- if ( ! is_resource( $handle ) ) {
- return;
- }
-
- $path = stream_get_line( $handle, 0 );
-
- if ( ! $path ) {
- return;
- }
-
- $this->path = $path;
+ $this->path = $this->settings['wp_uploads_path'] . '/static-html-output/';
$this->name = basename( $this->path );
}
- public function currentArchiveExists() : bool {
- return is_file(
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-CURRENT-ARCHIVE.txt'
- );
- }
-
public function create() : void {
- $this->name = $this->settings['wp_uploads_path'] .
- '/wp-static-html-output-' . time();
-
- $this->path = $this->name . '/';
- $this->name = basename( $this->path );
-
- if ( wp_mkdir_p( $this->path ) ) {
- $result = file_put_contents(
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-CURRENT-ARCHIVE.txt',
- $this->path
- );
-
- if ( ! $result ) {
- WsLog::l( 'USER WORKING DIRECTORY NOT WRITABLE' );
- }
-
- chmod(
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-CURRENT-ARCHIVE.txt',
- 0664
- );
- } else {
- WsLog::l( "Couldn't create archive directory at $this->path" );
+ if ( ! wp_mkdir_p( $this->path ) ) {
+ Logger::l( "Couldn't create archive directory at $this->path" );
}
}
}
diff --git a/src/ArchiveProcessor.php b/src/ArchiveProcessor.php
index 3c363c7e..4b294798 100755
--- a/src/ArchiveProcessor.php
+++ b/src/ArchiveProcessor.php
@@ -12,14 +12,9 @@ class ArchiveProcessor extends StaticHTMLOutput {
* @var Archive
*/
public $archive;
- /**
- * @var string
- */
- public $target_folder;
public function __construct() {
$this->archive = new Archive();
- $this->archive->setToCurrentArchive();
$this->loadSettings(
[
@@ -29,14 +24,13 @@ public function __construct() {
'processing',
'netlify',
'zip',
- 'folder',
]
);
}
public function renameWPDirectory( string $source, string $target ) : void {
if ( empty( $source ) || empty( $target ) ) {
- WsLog::l(
+ Logger::l(
'Failed trying to rename: ' .
'Source: ' . $source .
' to: ' . $target
@@ -54,7 +48,7 @@ public function renameWPDirectory( string $source, string $target ) : void {
$original_dir
);
} else {
- WsLog::l(
+ Logger::l(
'Trying to rename non-existent directory: ' .
$original_dir
);
@@ -146,89 +140,6 @@ public function put_safety_file( string $dirname ) : bool {
return true;
}
- public function copyStaticSiteToPublicFolder() : void {
- if ( $this->settings['selected_deployment_option'] === 'folder' ) {
- $target_folder = trim( $this->settings['targetFolder'] );
- $this->target_folder = $target_folder;
-
- if ( ! $target_folder ) {
- return;
- }
-
- // instantiate with safe defaults
- $directory_exists = true;
- $directory_empty = false;
- $dir_has_safety_file = false;
-
- // CHECK #1: directory exists or can be created
- $directory_exists = is_dir( $target_folder );
-
- if ( $directory_exists ) {
- $directory_empty = $this->dir_is_empty( $target_folder );
- } else {
- if ( wp_mkdir_p( $target_folder ) ) {
- if ( ! $this->put_safety_file( $target_folder ) ) {
- WsLog::l(
- 'Couldn\'t put safety file in ' .
- 'Target Directory' .
- $target_folder
- );
-
- die();
- }
- } else {
- WsLog::l(
- 'Couldn\'t create Target Directory: ' .
- $target_folder
- );
-
- die();
- }
- }
-
- // CHECK #2: check directory empty and add safety file
- if ( $directory_empty ) {
- if ( ! $this->put_safety_file( $target_folder ) ) {
- WsLog::l(
- 'Couldn\'t put safety file in ' .
- 'Target Directory' .
- $target_folder
- );
-
- die();
- }
- }
-
- $dir_has_safety_file =
- $this->dir_has_safety_file( $target_folder );
-
- if ( $directory_empty || $dir_has_safety_file ) {
- $this->recursive_copy(
- $this->archive->path,
- $this->target_folder
- );
-
- if ( ! $this->put_safety_file( $target_folder ) ) {
- WsLog::l(
- 'Couldn\'t put safety file in ' .
- 'Target Directory' .
- $target_folder
- );
-
- die();
- }
- } else {
- WsLog::l(
- 'Target Directory wasn\'t empty ' .
- 'or didn\'t contain safety file ' .
- $target_folder
- );
-
- die();
- }
- }
- }
-
public function createNetlifySpecialFiles() : void {
if ( $this->settings['selected_deployment_option'] !== 'netlify' ) {
return;
@@ -262,7 +173,7 @@ public function create_zip() : void {
$zip_archive = new ZipArchive();
if ( $zip_archive->open( $temp_zip, ZIPARCHIVE::CREATE ) !== true ) {
- WsLog::l( 'Could not create archive' );
+ Logger::l( 'Could not create archive' );
return;
}
@@ -284,7 +195,7 @@ public function create_zip() : void {
str_replace( $this->archive->path, '', $filename )
)
) {
- WsLog::l( 'Could not add file: ' . $filename );
+ Logger::l( 'Could not add file: ' . $filename );
return;
}
}
diff --git a/src/BitBucket.php b/src/BitBucket.php
index 60778e65..c24fe940 100755
--- a/src/BitBucket.php
+++ b/src/BitBucket.php
@@ -30,6 +30,10 @@ class BitBucket extends SitePublisher {
* @var string
*/
public $local_file_contents;
+ /**
+ * @var string
+ */
+ public $local_file;
public function __construct() {
$this->loadSettings( 'bitbucket' );
@@ -41,10 +45,6 @@ public function __construct() {
$this->api_base = 'https://api.bitbucket.org/2.0/repositories/';
- $this->previous_hashes_path =
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-BITBUCKET-PREVIOUS-HASHES.txt';
-
if ( defined( 'WP_CLI' ) ) {
return; }
}
@@ -66,12 +66,41 @@ public function upload_files() : void {
$lines = $this->getItemsToDeploy( $batch_size );
- $this->openPreviousHashesFile();
-
$this->files_data = [];
foreach ( $lines as $line ) {
- $this->addFileToBatchForCommitting( $line );
+ $this->local_file = $line->url;
+ $this->target_path = $line->remote_path;
+
+ $this->local_file = $this->archive->path . $this->local_file;
+
+ $deploy_queue_path = str_replace( $this->archive->path, '', $this->local_file );
+
+ if ( ! is_file( $this->local_file ) ) {
+ DeployQueue::removeURL( $deploy_queue_path );
+ return;
+ }
+
+ $this->local_file_contents = (string) file_get_contents( $this->local_file );
+
+ if ( ! $this->local_file_contents ) {
+ DeployQueue::removeURL( $deploy_queue_path );
+ return;
+ }
+
+ $cached_hash = DeployCache::fileIsCached( $deploy_queue_path );
+
+ if ( $cached_hash ) {
+ $current_hash = md5( $this->local_file_contents );
+
+ if ( $current_hash != $cached_hash ) {
+ $this->addFileToBatchForCommitting( $line );
+ }
+ } else {
+ $this->addFileToBatchForCommitting( $line );
+ }
+
+ DeployQueue::removeURL( $deploy_queue_path );
// NOTE: progress will indicate file preparation, not the transfer
$this->updateProgress();
@@ -79,8 +108,6 @@ public function upload_files() : void {
$this->sendBatchToBitbucket();
- $this->writeFilePathAndHashesToFile();
-
$this->pauseBetweenAPICalls();
if ( $this->uploadsCompleted() ) {
@@ -121,48 +148,25 @@ public function test_upload() : void {
$this->finalizeDeployment();
}
- public function addFileToBatchForCommitting( string $line ) : void {
- list($local_file, $this->target_path) = explode( ',', $line );
-
- $local_file = $this->archive->path . $local_file;
+ /**
+ * @param mixed $line local file and remote path to deploy
+ */
+ public function addFileToBatchForCommitting( $line ) : void {
$this->files_data['message'] = 'StaticHTMLOutput deployment';
+ $this->local_file = $line->url;
+ $this->target_path = $line->remote_path;
+ $this->local_file = $this->archive->path . $this->local_file;
- if ( ! is_file( $local_file ) ) {
- return; }
-
- $this->local_file_contents = (string) file_get_contents( $local_file );
+ $this->files_data[ '/' . rtrim( $this->target_path ) ] =
+ new CURLFile( $this->local_file );
+ }
- if ( ! $this->local_file_contents ) {
+ public function sendBatchToBitbucket() : void {
+ if ( ! $this->files_data ) {
return;
}
- if ( isset( $this->file_paths_and_hashes[ $this->target_path ] ) ) {
- $prev = $this->file_paths_and_hashes[ $this->target_path ];
- $current = crc32( $this->local_file_contents );
-
- if ( $prev != $current ) {
- $this->files_data[ '/' . rtrim( $this->target_path ) ] =
- new CURLFile( $local_file );
-
- $this->recordFilePathAndHashInMemory(
- $this->target_path,
- $this->local_file_contents
- );
- }
- } else {
- $this->files_data[ '/' . rtrim( $this->target_path ) ] =
- new CURLFile( $local_file );
-
- $this->recordFilePathAndHashInMemory(
- $this->target_path,
- $this->local_file_contents
- );
- }
-
- }
-
- public function sendBatchToBitbucket() : void {
$this->client = new Request();
$remote_path = $this->api_base . $this->settings['bbRepo'] . '/src';
@@ -184,6 +188,13 @@ public function sendBatchToBitbucket() : void {
$this->client->status_code,
[ 200, 201, 301, 302, 304 ]
);
+
+ foreach ( $this->files_data as $curl_file ) {
+ $deploy_queue_path =
+ str_replace( $this->archive->path, '', $curl_file->name );
+
+ DeployCache::addFile( $deploy_queue_path );
+ }
} catch ( StaticHTMLOutputException $e ) {
$this->handleException( $e );
}
diff --git a/src/BunnyCDN.php b/src/BunnyCDN.php
index 2968382f..70bb5d73 100755
--- a/src/BunnyCDN.php
+++ b/src/BunnyCDN.php
@@ -40,10 +40,6 @@ public function __construct() {
$this->api_base = 'https://storage.bunnycdn.com';
}
- $this->previous_hashes_path =
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-BUNNYCDN-PREVIOUS-HASHES.txt';
-
if ( defined( 'WP_CLI' ) ) {
return;
}
@@ -71,48 +67,46 @@ public function upload_files() : void {
$lines = $this->getItemsToDeploy( $batch_size );
- $this->openPreviousHashesFile();
-
foreach ( $lines as $line ) {
- list($this->local_file, $this->target_path) = explode( ',', $line );
+ $this->local_file = $line->url;
+ $this->target_path = $line->remote_path;
$this->local_file = $this->archive->path . $this->local_file;
+ $deploy_queue_path = str_replace( $this->archive->path, '', $this->local_file );
+
if ( ! is_file( $this->local_file ) ) {
- continue; }
+ DeployQueue::removeURL( $deploy_queue_path );
+ continue;
+ }
$this->local_file_contents = (string) file_get_contents( $this->local_file );
if ( ! $this->local_file_contents ) {
+ DeployQueue::removeURL( $deploy_queue_path );
continue;
}
- if ( isset( $this->file_paths_and_hashes[ $this->target_path ] ) ) {
- $prev = $this->file_paths_and_hashes[ $this->target_path ];
- $current = crc32( $this->local_file_contents );
+ $cached_hash = DeployCache::fileIsCached( $deploy_queue_path );
- if ( $prev != $current ) {
- $this->createFileInBunnyCDN();
+ if ( $cached_hash ) {
+ $current_hash = md5( $this->local_file_contents );
- $this->recordFilePathAndHashInMemory(
- $this->target_path,
- $this->local_file_contents
- );
+ if ( $current_hash != $cached_hash ) {
+ $this->createFileInBunnyCDN();
+ DeployCache::addFile( $deploy_queue_path );
}
} else {
$this->createFileInBunnyCDN();
- $this->recordFilePathAndHashInMemory(
- $this->target_path,
- $this->local_file_contents
- );
+ DeployCache::addFile( $deploy_queue_path );
}
+ DeployQueue::removeURL( $deploy_queue_path );
+
$this->updateProgress();
}
- $this->writeFilePathAndHashesToFile();
-
$this->pauseBetweenAPICalls();
if ( $this->uploadsCompleted() ) {
@@ -147,8 +141,8 @@ public function purge_all_cache() : void {
echo 'SUCCESS';
}
} catch ( StaticHTMLOutputException $e ) {
- WsLog::l( 'BUNNYCDN PURGE CACHE: error encountered' );
- WsLog::l( $e );
+ Logger::l( 'BUNNYCDN PURGE CACHE: error encountered' );
+ Logger::l( $e );
throw new StaticHTMLOutputException( $e );
}
}
@@ -178,8 +172,8 @@ public function test_deploy() : void {
}
} catch ( StaticHTMLOutputException $e ) {
- WsLog::l( 'BUNNYCDN TEST EXPORT: error encountered' );
- WsLog::l( $e );
+ Logger::l( 'BUNNYCDN TEST EXPORT: error encountered' );
+ Logger::l( $e );
throw new StaticHTMLOutputException( $e );
}
@@ -210,11 +204,10 @@ public function createFileInBunnyCDN() : void {
$result->HttpCode,
[ 200, 201, 301, 302, 304 ]
);
-
}
} catch ( StaticHTMLOutputException $e ) {
- WsLog::l( 'BUNNYCDN EXPORT: error encountered' );
- WsLog::l( $e );
+ Logger::l( 'BUNNYCDN EXPORT: error encountered' );
+ Logger::l( $e );
$this->handleException( $e );
}
}
diff --git a/src/CLI.php b/src/CLI.php
index 28d7efc3..4ed7a360 100755
--- a/src/CLI.php
+++ b/src/CLI.php
@@ -100,14 +100,13 @@ public function generate() : void {
$plugin = Controller::getInstance();
WP_CLI::log( 'Generating file list' );
- $plugin->generate_filelist_preview();
+ $plugin->detect_urls();
WP_CLI::log( 'Preparing for export' );
$plugin->prepare_for_export();
$site_crawler = new SiteCrawler();
$site_crawler->crawl_site();
- $site_crawler->crawl_discovered_links();
WP_CLI::log( 'Performing post process actions' );
$plugin->post_process_archive_dir();
diff --git a/src/CSSProcessor.php b/src/CSSProcessor.php
index c79d8747..279d1e7c 100755
--- a/src/CSSProcessor.php
+++ b/src/CSSProcessor.php
@@ -67,10 +67,6 @@ class CSSProcessor extends StaticHTMLOutput {
* @var string[]
*/
public $discovered_urls;
- /**
- * @var bool
- */
- public $harvest_new_urls;
/**
* @var string[]
*/
@@ -90,7 +86,7 @@ public function __construct(
bool $remove_wp_meta = false,
string $rewrite_rules = '',
string $base_url,
- string $selected_deployment_option = 'folder',
+ string $selected_deployment_option = 'zip',
string $wp_site_url,
string $wp_uploads_path
) {
@@ -126,7 +122,6 @@ public function processCSS( string $css_document, string $page_url ) : bool {
$css_parser = new Sabberworm\CSS\Parser( $this->raw_css );
$this->css_doc = $css_parser->parse();
$this->page_url = new Net_URL2( $page_url );
- $this->detectIfURLsShouldBeHarvested();
$this->discovered_urls = [];
$this->urls_to_rewrite = [];
@@ -213,8 +208,6 @@ public function processCSS( string $css_document, string $page_url ) : bool {
}
}
- $this->writeDiscoveredURLs();
-
return true;
}
@@ -358,20 +351,6 @@ public function rewriteSiteURLsToPlaceholder(
return $rewritten_source;
}
- public function detectIfURLsShouldBeHarvested() : void {
- if ( defined( 'WP_CLI' ) ) {
- if ( defined( 'CRAWLING_DISCOVERED' ) ) {
- return;
- } else {
- $this->harvest_new_urls = true;
- }
- } else {
- $ajax_method = filter_input( INPUT_POST, 'ajax_action' );
-
- $this->harvest_new_urls = $ajax_method === 'crawl_site';
- }
- }
-
public function addDiscoveredURL( string $url ) : void {
// only discover assets, not HTML/XML. etc
$extension = pathinfo( $url, PATHINFO_EXTENSION );
@@ -384,66 +363,45 @@ public function addDiscoveredURL( string $url ) : void {
$url = strtok( $url, '#' );
$url = trim( (string) strtok( (string) $url, '?' ) );
- if ( ! $url ) {
+ if ( trim( (string) $url ) === '' ) {
return;
}
- if ( $this->harvest_new_urls ) {
- if ( ! $this->isValidURL( $url ) ) {
- return;
- }
-
- if ( $this->isInternalLink( $url ) ) {
- // get FQU resolved to this page
- $url = $this->page_url->resolve( $url );
-
- $discovered_url_without_site_url =
- str_replace(
- rtrim( $this->wp_site_url, '/' ),
- '',
- $url
- );
-
- $discovered_url_without_site_url =
- str_replace(
- rtrim( $this->placeholder_url, '/' ),
- '',
- $discovered_url_without_site_url
- );
-
- if ( is_string( $discovered_url_without_site_url ) ) {
- $this->discovered_urls[] = $discovered_url_without_site_url;
- }
- }
+ if ( ! $url ) {
+ return;
}
- }
-
- public function writeDiscoveredURLs() : void {
- $ajax_method = filter_input( INPUT_POST, 'ajax_action' );
- if ( $ajax_method === 'crawl_again' ) {
+ if ( ! $this->isValidURL( $url ) ) {
return;
}
- if ( defined( 'WP_CLI' ) ) {
- if ( defined( 'CRAWLING_DISCOVERED' ) ) {
- return;
+ if ( $this->isInternalLink( $url ) ) {
+ // get FQU resolved to this page
+ $url = $this->page_url->resolve( $url );
+
+ $discovered_url_without_site_url =
+ str_replace(
+ rtrim( $this->wp_site_url, '/' ),
+ '',
+ $url
+ );
+
+ $discovered_url_without_site_url =
+ str_replace(
+ rtrim( $this->placeholder_url, '/' ),
+ '',
+ $discovered_url_without_site_url
+ );
+
+ if ( is_string( $discovered_url_without_site_url ) ) {
+ // ignore empty or root / (duct tapes issue with / being repeatedly added)
+ if ( trim( $discovered_url_without_site_url ) === '/' ) {
+ return;
+ }
+
+ $this->discovered_urls[] = $discovered_url_without_site_url;
}
}
-
- file_put_contents(
- $this->wp_uploads_path .
- '/WP-STATIC-DISCOVERED-URLS.txt',
- PHP_EOL .
- implode( PHP_EOL, array_unique( $this->discovered_urls ) ),
- FILE_APPEND | LOCK_EX
- );
-
- chmod(
- $this->wp_uploads_path .
- '/WP-STATIC-DISCOVERED-URLS.txt',
- 0664
- );
}
public function isValidURL( string $url ) : bool {
@@ -500,5 +458,20 @@ public function getProtocolRelativeURL( string $url ) : string {
return $this->destination_protocol_relative_url;
}
+
+ /**
+ * @return string[] Discovered URLs
+ */
+ public function getDiscoveredURLs() : array {
+ $discovered_urls = array_unique( $this->discovered_urls );
+ array_filter( $discovered_urls );
+ sort( $discovered_urls );
+
+ if ( ! $discovered_urls ) {
+ return [];
+ }
+
+ return $discovered_urls;
+ }
}
diff --git a/src/Controller.php b/src/Controller.php
index a9d2a69d..9637d159 100755
--- a/src/Controller.php
+++ b/src/Controller.php
@@ -28,7 +28,7 @@ class Controller {
*/
public $wp_site;
- const VERSION = '6.6.18';
+ const VERSION = '6.6.19';
const OPTIONS_KEY = 'statichtmloutput-options';
const HOOK = 'statichtmloutput';
@@ -64,10 +64,10 @@ public static function init( string $bootstrap_file ) : Controller {
add_filter( 'custom_menu_order', '__return_true' );
add_filter( 'menu_order', [ 'StaticHTMLOutput\Controller', 'set_menu_order' ] );
}
+
return $instance;
}
-
/**
* Adjusts position of dashboard menu icons
*
@@ -109,7 +109,15 @@ public function setDefaultOptions() : void {
}
public function activate_for_single_site() : void {
+ // add_action( 'init', [ 'StaticHTMLOutput\Controller', 'add_custom_routes' ], 0 );
+
+ Logger::createTable();
$this->setDefaultOptions();
+ CrawlQueue::createTable();
+ CrawlLog::createTable();
+ DeployQueue::createTable();
+ DeployCache::createTable();
+ Exclusions::createTable();
}
/**
@@ -143,8 +151,6 @@ public static function activate( $network_wide ) : void {
}
public static function registerOptionsPage() : void {
- $plugins_url = plugin_dir_url( dirname( __FILE__ ) );
-
$page = add_menu_page(
'Static HTML',
'Static HTML',
@@ -178,7 +184,11 @@ public function finalize_deployment() : void {
echo 'SUCCESS';
}
- public function generate_filelist_preview() : void {
+ public function detect_urls() : void {
+ // clear CrawlQueue before rebuilding list
+ CrawlQueue::truncate();
+ CrawlLog::truncate();
+
$this->wp_site = new WPSite();
$target_settings = [
@@ -194,8 +204,6 @@ public function generate_filelist_preview() : void {
PostSettings::get( $target_settings );
}
- $plugin_hook = 'statichtmloutput';
-
$initial_file_list_count =
FilesHelper::buildInitialFileList(
true,
@@ -203,19 +211,32 @@ public function generate_filelist_preview() : void {
$this->wp_site->uploads_url,
$this->settings
);
-
- if ( ! defined( 'WP_CLI' ) ) {
- echo $initial_file_list_count;
- }
}
public static function renderOptionsPage() : void {
$instance = self::getInstance();
+ $instance->detect_urls();
$instance->wp_site = new WPSite();
$instance->current_archive = '';
$instance->view
->setTemplate( 'options-page-js' )
+ ->assign(
+ 'crawl_progress_url',
+ admin_url( 'admin.php?page=statichtmloutput&statichtmloutput-crawl-progress=1' )
+ )
+ ->assign(
+ 'deploy_progress_url',
+ admin_url( 'admin.php?page=statichtmloutput&statichtmloutput-deploy-progress=1' )
+ )
+ ->assign(
+ 'crawl_log_url',
+ admin_url( 'admin.php?page=statichtmloutput&statichtmloutput-crawl-log=1' )
+ )
+ ->assign(
+ 'export_log_url',
+ admin_url( 'admin.php?page=statichtmloutput&statichtmloutput-export-log=1' )
+ )
->assign( 'options', $instance->options )
->assign( 'wp_site', $instance->wp_site )
->assign( 'onceAction', self::HOOK . '-options' )
@@ -225,6 +246,7 @@ public static function renderOptionsPage() : void {
->setTemplate( 'options-page' )
->assign( 'wp_site', $instance->wp_site )
->assign( 'options', $instance->options )
+ ->assign( 'total_detected_urls', CrawlQueue::getTotal() )
->assign( 'onceAction', self::HOOK . '-options' )
->render();
}
@@ -247,15 +269,17 @@ public function save_options() : void {
public function prepare_for_export() : void {
$this->exporter = new Exporter();
- $this->exporter->pre_export_cleanup();
- $this->exporter->cleanup_leftover_archives();
- $this->exporter->initialize_cache_files();
+ // $this->exporter->cleanup_leftover_archives();
+ Logger::truncate();
- $archive = new Archive();
- $archive->create();
+ $this->detect_urls();
$this->logEnvironmentalInfo();
+ $archive = new Archive();
+ $archive->create();
+
+ // TODO: this is now just Inclusions/Exclusions task:
$this->exporter->generateModifiedFileList();
if ( ! defined( 'WP_CLI' ) ) {
@@ -265,7 +289,7 @@ public function prepare_for_export() : void {
public function reset_default_settings() : void {
if ( ! delete_option( 'statichtmloutput-options' ) ) {
- WsLog::l( 'Error resetting options to defaults' );
+ Logger::l( 'Error resetting options to defaults' );
echo 'ERROR';
}
@@ -282,7 +306,6 @@ public function post_process_archive_dir() : void {
// NOTE: renameWP Directories also doing same server publish
$processor->renameArchiveDirectories();
$processor->removeWPCruft();
- $processor->copyStaticSiteToPublicFolder();
$processor->create_zip();
if ( ! defined( 'WP_CLI' ) ) {
@@ -291,35 +314,7 @@ public function post_process_archive_dir() : void {
}
public function delete_deploy_cache() : void {
- $target_settings = [
- 'wpenv',
- ];
-
- if ( defined( 'WP_CLI' ) ) {
- $this->settings =
- DBSettings::get( $target_settings );
- } else {
- $this->settings =
- PostSettings::get( $target_settings );
- }
-
- $uploads_dir = $this->settings['wp_uploads_path'];
-
- $cache_files = [
- '/WP2STATIC-GITLAB-PREVIOUS-HASHES.txt',
- '/WP2STATIC-GITHUB-PREVIOUS-HASHES.txt',
- '/WP2STATIC-S3-PREVIOUS-HASHES.txt',
- '/WP2STATIC-BUNNYCDN-PREVIOUS-HASHES.txt',
- '/WP2STATIC-BITBUCKET-PREVIOUS-HASHES.txt',
- // Add to cleanup script when upgrading > 6.6.8
- // '/WP2STATIC-FTP-PREVIOUS-HASHES.txt',
- ];
-
- foreach ( $cache_files as $cache_file ) {
- if ( is_file( $uploads_dir . $cache_file ) ) {
- unlink( $uploads_dir . $cache_file );
- }
- }
+ DeployCache::truncate();
if ( ! defined( 'WP_CLI' ) ) {
echo 'SUCCESS';
@@ -346,30 +341,28 @@ public function logEnvironmentalInfo() : void {
$info[] = 'SERVER SOFTWARE ' . $_SERVER['SERVER_SOFTWARE'];
}
- WsLog::l( implode( PHP_EOL, $info ) );
+ Logger::l( implode( PHP_EOL, $info ) );
- WsLog::l( 'Active plugins:' );
+ Logger::l( 'Active plugins:' );
$active_plugins = get_option( 'active_plugins' );
foreach ( $active_plugins as $active_plugin ) {
- WsLog::l( $active_plugin );
+ Logger::l( $active_plugin );
}
- WsLog::l( 'Plugin options:' );
+ Logger::l( 'Plugin options:' );
$options = $this->options->getAllOptions( false );
foreach ( $options as $key => $value ) {
- WsLog::l( "{$value['Option name']}: {$value['Value']}" );
+ Logger::l( "{$value['Option name']}: {$value['Value']}" );
}
- WsLog::l( 'Installed extensions:' );
+ Logger::l( 'Installed extensions:' );
$extensions = get_loaded_extensions();
- foreach ( $extensions as $extension ) {
- WsLog::l( $extension );
- }
+ Logger::l( implode( ',', $extensions ) );
}
}
diff --git a/src/CrawlLog.php b/src/CrawlLog.php
new file mode 100644
index 00000000..b22fef04
--- /dev/null
+++ b/src/CrawlLog.php
@@ -0,0 +1,190 @@
+prefix . 'statichtmloutput_crawl_log';
+
+ $charset_collate = $wpdb->get_charset_collate();
+
+ /**
+ * Detected/discovered URLs added with initial status of 0
+ * and will be updated with response code after crawling
+ */
+ $sql = "CREATE TABLE $table_name (
+ id mediumint(9) NOT NULL AUTO_INCREMENT,
+ url VARCHAR(2083) NOT NULL,
+ note TEXT NOT NULL,
+ status SMALLINT DEFAULT 0 NOT NULL,
+ PRIMARY KEY (id)
+ ) $charset_collate;";
+
+ require_once ABSPATH . 'wp-admin/includes/upgrade.php';
+ dbDelta( $sql );
+ }
+
+ /**
+ * Add all Urls to log
+ *
+ * @param string[] $urls List of URLs to log info for
+ */
+ public static function addUrls( array $urls, string $note, int $status = 0 ) : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $placeholders = [];
+ $values = [];
+
+ foreach ( $urls as $url ) {
+ if ( ! $url ) {
+ continue;
+ }
+
+ $placeholders[] = '(%s, %s, %d)';
+ $values[] = rawurldecode( $url );
+ $values[] = $note;
+ $values[] = $status;
+ }
+
+ $query_string =
+ 'INSERT INTO ' . $table_name . ' (url, note, status) VALUES ' .
+ implode( ', ', $placeholders );
+ $query = $wpdb->prepare( $query_string, $values );
+
+ $wpdb->query( $query );
+ }
+
+ /**
+ * Get all crawlable URLs
+ *
+ * @return string[] All crawlable URLs
+ */
+ public static function getCrawlablePaths() : array {
+ global $wpdb;
+ $urls = [];
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $rows = $wpdb->get_results( "SELECT url FROM $table_name ORDER by url ASC" );
+
+ foreach ( $rows as $row ) {
+ $urls[] = $row->url;
+ }
+
+ return $urls;
+ }
+
+ /**
+ * Get whole CrawlLog
+ *
+ * @return mixed[] Whole crawl log
+ */
+ public static function getAll() : array {
+ global $wpdb;
+ $urls = [];
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $rows = $wpdb->get_results( "SELECT * FROM $table_name ORDER by url ASC" );
+
+ return $rows;
+ }
+
+ /**
+ * Get total crawlable URLs
+ *
+ * @return int Total crawlable URLs
+ */
+ public static function getTotalCrawlableURLs() : int {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $total_crawl_log = $wpdb->get_var( "SELECT COUNT(*) FROM $table_name" );
+
+ return $total_crawl_log;
+ }
+
+ /**
+ * Get crawled URLs
+ *
+ * @return int Total crawled URLs
+ */
+ public static function getTotalCrawledURLs() : int {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $total_crawl_log =
+ $wpdb->get_var(
+ "SELECT COUNT(*) FROM $table_name WHERE status > 0"
+ );
+
+ return $total_crawl_log;
+ }
+
+ /**
+ * Clear CrawlCrawl Log via truncate or deletion
+ */
+ public static function truncate() : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $wpdb->query( "TRUNCATE TABLE $table_name" );
+
+ $total_crawl_log = self::getTotalCrawlableURLs();
+
+ if ( $total_crawl_log > 0 ) {
+ Logger::l( 'failed to truncate CrawlCrawl Log: try deleting instead' );
+ }
+ }
+
+ /**
+ * Count URLs in Crawl Log
+ */
+ public static function getTotal() : int {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $total = $wpdb->get_var( "SELECT count(*) FROM $table_name" );
+
+ return $total;
+ }
+
+ /**
+ * Update URL status
+ */
+ public static function updateStatus( string $url, int $status ) : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $wpdb->update(
+ $table_name,
+ [ 'status' => $status ],
+ [ 'url' => $url ]
+ );
+ }
+
+ /**
+ * Check if URL is in CrawlLog
+ *
+ * @return bool If URL exists
+ */
+ public static function hasUrl( string $url ) : bool {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
+
+ $has_url = $wpdb->get_var( "SELECT COUNT(*) FROM $table_name where url = '$url'" );
+
+ return (bool) $has_url;
+ }
+}
diff --git a/src/CrawlQueue.php b/src/CrawlQueue.php
new file mode 100644
index 00000000..fa442778
--- /dev/null
+++ b/src/CrawlQueue.php
@@ -0,0 +1,132 @@
+prefix . 'statichtmloutput_urls';
+
+ $charset_collate = $wpdb->get_charset_collate();
+
+ $sql = "CREATE TABLE $table_name (
+ id mediumint(9) NOT NULL AUTO_INCREMENT,
+ url VARCHAR(2083) NOT NULL,
+ PRIMARY KEY (id)
+ ) $charset_collate;";
+
+ require_once ABSPATH . 'wp-admin/includes/upgrade.php';
+ dbDelta( $sql );
+ }
+
+ /**
+ * Add all Urls to queue
+ *
+ * @param string[] $urls List of URLs to crawl
+ */
+ public static function addUrls( array $urls ) : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_urls';
+
+ $placeholders = [];
+ $values = [];
+
+ foreach ( $urls as $url ) {
+ if ( ! $url ) {
+ continue;
+ }
+
+ $placeholders[] = '(%s)';
+ $values[] = rawurldecode( $url );
+ }
+
+ $query_string =
+ 'INSERT INTO ' . $table_name . ' (url) VALUES ' .
+ implode( ', ', $placeholders );
+ $query = $wpdb->prepare( $query_string, $values );
+
+ $wpdb->query( $query );
+ }
+
+ /**
+ * Get all crawlable URLs
+ *
+ * @return string[] All crawlable URLs
+ */
+ public static function getCrawlablePaths( int $limit = 500 ) : array {
+ global $wpdb;
+ $urls = [];
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_urls';
+
+ $rows = $wpdb->get_results( "SELECT url FROM $table_name ORDER by url ASC LIMIT $limit" );
+
+ foreach ( $rows as $row ) {
+ $urls[] = $row->url;
+ }
+
+ return $urls;
+ }
+
+ /**
+ * Get total crawlable URLs
+ *
+ * @return int Total crawlable URLs
+ */
+ public static function getTotalCrawlableURLs() : int {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_urls';
+
+ $total_urls = $wpdb->get_var( "SELECT COUNT(*) FROM $table_name" );
+
+ return $total_urls;
+ }
+
+ /**
+ * Clear CrawlQueue via truncate or deletion
+ */
+ public static function truncate() : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_urls';
+
+ $wpdb->query( "TRUNCATE TABLE $table_name" );
+
+ $total_urls = self::getTotalCrawlableURLs();
+
+ if ( $total_urls > 0 ) {
+ Logger::l( 'failed to truncate CrawlQueue: try deleting instead' );
+ }
+ }
+
+ /**
+ * Count URLs in Crawl Queue
+ */
+ public static function getTotal() : int {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_urls';
+
+ $total = $wpdb->get_var( "SELECT count(*) FROM $table_name" );
+
+ return $total;
+ }
+
+ /**
+ * Remove single URL from CrawlQueue
+ */
+ public static function removeURL( string $url ) : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_urls';
+
+ $result = $wpdb->delete(
+ $table_name,
+ [ 'url' => $url ]
+ );
+ }
+}
diff --git a/src/DBSettings.php b/src/DBSettings.php
index f28e34b7..530ad445 100755
--- a/src/DBSettings.php
+++ b/src/DBSettings.php
@@ -49,11 +49,6 @@ public static function get( array $sets = [] ) {
'deployBatchSize',
];
- $key_sets['folder'] = [
- 'baseUrl-folder',
- 'targetFolder',
- ];
-
$key_sets['zip'] = [
'baseUrl-zip',
];
diff --git a/src/DeployCache.php b/src/DeployCache.php
new file mode 100644
index 00000000..e27e3c68
--- /dev/null
+++ b/src/DeployCache.php
@@ -0,0 +1,219 @@
+prefix . 'statichtmloutput_deploy_cache';
+
+ $charset_collate = $wpdb->get_charset_collate();
+
+ $sql = "CREATE TABLE $table_name (
+ path_hash CHAR(32) NOT NULL,
+ path VARCHAR(2083) NOT NULL,
+ file_hash CHAR(32) NOT NULL,
+ namespace VARCHAR(128) NOT NULL,
+ PRIMARY KEY (path_hash, namespace)
+ ) $charset_collate;";
+
+ require_once ABSPATH . 'wp-admin/includes/upgrade.php';
+ dbDelta( $sql );
+ }
+
+ public static function addFile(
+ string $local_path,
+ string $namespace = self::DEFAULT_NAMESPACE,
+ ?string $file_hash = null
+ ) : void {
+ global $wpdb;
+
+ $deploy_cache_table = $wpdb->prefix . 'statichtmloutput_deploy_cache';
+
+ $settings = null;
+
+ $target_settings = [
+ 'wpenv',
+ ];
+
+ if ( defined( 'WP_CLI' ) ) {
+ $settings =
+ DBSettings::get( $target_settings );
+ } else {
+ $settings =
+ PostSettings::get( $target_settings );
+ }
+
+ $post_processed_dir = $settings['wp_uploads_path'] . '/static-html-output/';
+
+ $deployed_file = $post_processed_dir . $local_path;
+
+ $path_hash = md5( $deployed_file );
+
+ if ( ! $file_hash ) {
+ $file_contents = file_get_contents( $deployed_file );
+
+ if ( ! $file_contents ) {
+ return;
+ }
+
+ $file_hash = md5( $file_contents );
+ }
+
+ $sql = "INSERT INTO {$deploy_cache_table} (path_hash,path,file_hash,namespace)" .
+ ' VALUES (%s,%s,%s,%s) ON DUPLICATE KEY UPDATE file_hash = %s, namespace = %s';
+
+ $sql = $wpdb->prepare(
+ // Insert values
+ $sql,
+ $path_hash,
+ $local_path,
+ $file_hash,
+ $namespace,
+ // Duplicate key values
+ $file_hash,
+ $namespace
+ );
+
+ $wpdb->query( $sql );
+ }
+
+ /**
+ * Checks if file can skip deployment
+ * - uses hash of file and path's hash
+ *
+ * @return null|string hash of file if cached
+ */
+ public static function fileIsCached(
+ string $local_path,
+ string $namespace = self::DEFAULT_NAMESPACE,
+ ?string $file_hash = null
+ ) {
+ global $wpdb;
+
+ $settings = null;
+
+ $target_settings = [
+ 'wpenv',
+ ];
+
+ if ( defined( 'WP_CLI' ) ) {
+ $settings =
+ DBSettings::get( $target_settings );
+ } else {
+ $settings =
+ PostSettings::get( $target_settings );
+ }
+
+ $post_processed_dir = $settings['wp_uploads_path'] . '/static-html-output/';
+
+ $deployed_file = $post_processed_dir . $local_path;
+
+ $path_hash = md5( $deployed_file );
+
+ if ( ! $file_hash ) {
+ $file_contents = file_get_contents( $deployed_file );
+
+ if ( ! $file_contents ) {
+ return null;
+ }
+
+ $file_hash = md5( $file_contents );
+ }
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_cache';
+
+ $sql = $wpdb->prepare(
+ "SELECT file_hash FROM $table_name WHERE" .
+ ' path_hash = %s AND file_hash = %s AND namespace = %s LIMIT 1',
+ $path_hash,
+ $file_hash,
+ $namespace
+ );
+
+ $hash = $wpdb->get_var( $sql );
+
+ return $hash;
+ }
+
+ public static function truncate(
+ string $namespace = self::DEFAULT_NAMESPACE
+ ) : void {
+ Logger::l( 'Deleting DeployCache' );
+
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_cache';
+
+ $sql = "DELETE FROM $table_name WHERE namespace = %s";
+ $sql = $wpdb->prepare( $sql, $namespace );
+ $wpdb->query( $sql );
+ }
+
+ /**
+ * Count Paths in Deploy Cache
+ */
+ public static function getTotal(
+ string $namespace = self::DEFAULT_NAMESPACE
+ ) : int {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_cache';
+
+ $sql = "SELECT count(*) FROM $table_name WHERE namespace = %s";
+ $sql = $wpdb->prepare( $sql, $namespace );
+ $total = $wpdb->get_var( $sql );
+
+ return $total;
+ }
+
+ /**
+ * @return mixed[] namespace totals
+ */
+ public static function getTotalsByNamespace() : array {
+ global $wpdb;
+ $counts = [];
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_cache';
+
+ $sql = "SELECT namespace, COUNT(*) AS count FROM $table_name GROUP BY namespace";
+ $rows = $wpdb->get_results( $sql );
+
+ foreach ( $rows as $row ) {
+ $counts[ $row->namespace ] = $row->count;
+ }
+
+ return $counts;
+ }
+
+
+ /**
+ * Get all cached paths
+ *
+ * @return string[] All cached paths
+ */
+ public static function getPaths(
+ string $namespace = self::DEFAULT_NAMESPACE
+ ) : array {
+ global $wpdb;
+ $urls = [];
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_cache';
+
+ $sql = "SELECT path FROM $table_name WHERE namespace = %s";
+ $sql = $wpdb->prepare( $sql, $namespace );
+ $rows = $wpdb->get_results( $sql );
+
+ foreach ( $rows as $row ) {
+ $urls[] = $row->path;
+ }
+
+ sort( $urls );
+
+ return $urls;
+ }
+}
diff --git a/src/DeployQueue.php b/src/DeployQueue.php
new file mode 100644
index 00000000..f3af45b1
--- /dev/null
+++ b/src/DeployQueue.php
@@ -0,0 +1,128 @@
+prefix . 'statichtmloutput_deploy_queue';
+
+ $charset_collate = $wpdb->get_charset_collate();
+
+ $sql = "CREATE TABLE $table_name (
+ id mediumint(9) NOT NULL AUTO_INCREMENT,
+ url VARCHAR(2083) NOT NULL,
+ remote_path VARCHAR(2083) NOT NULL,
+ PRIMARY KEY (id)
+ ) $charset_collate;";
+
+ require_once ABSPATH . 'wp-admin/includes/upgrade.php';
+ dbDelta( $sql );
+ }
+
+ /**
+ * Add all Url to deploy queue
+ */
+ public static function addUrl( string $url, string $remote_path ) : void {
+ if ( ! $url ) {
+ return;
+ }
+
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_queue';
+
+ $placeholders = [];
+ $values = [];
+
+ $placeholders[] = '(%s, %s)';
+ $values[] = rawurldecode( $url );
+ $values[] = rawurldecode( $remote_path );
+
+ $query_string =
+ 'INSERT INTO ' . $table_name . ' (url, remote_path) VALUES ' .
+ implode( ', ', $placeholders );
+ $query = $wpdb->prepare( $query_string, $values );
+
+ $wpdb->query( $query );
+ }
+
+ /**
+ * Get all deployable URLs
+ *
+ * @return mixed[] All deployable URLs and remote_paths
+ */
+ public static function getDeployablePaths( int $limit = 500 ) : array {
+ global $wpdb;
+ $urls = [];
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_queue';
+
+ $rows = $wpdb->get_results(
+ "SELECT url, remote_path FROM $table_name ORDER by url ASC LIMIT $limit"
+ );
+
+ return $rows;
+ }
+
+ /**
+ * Get total deployable URLs
+ *
+ * @return int Total deployable URLs
+ */
+ public static function getTotalDeployableURLs() : int {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_queue';
+
+ $total_deploy_queue = $wpdb->get_var( "SELECT COUNT(*) FROM $table_name" );
+
+ return $total_deploy_queue;
+ }
+
+ /**
+ * Clear DeployQueue via truncate or deletion
+ */
+ public static function truncate() : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_queue';
+
+ $wpdb->query( "TRUNCATE TABLE $table_name" );
+
+ $total_deploy_queue = self::getTotalDeployableURLs();
+
+ if ( $total_deploy_queue > 0 ) {
+ Logger::l( 'failed to truncate DeployQueue: try deleting instead' );
+ }
+ }
+
+ /**
+ * Count URLs in Deploy Queue
+ */
+ public static function getTotal() : int {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_queue';
+
+ $total = $wpdb->get_var( "SELECT count(*) FROM $table_name" );
+
+ return $total;
+ }
+
+ /**
+ * Remove single URL from DeployQueue
+ */
+ public static function removeURL( string $url ) : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_deploy_queue';
+
+ $result = $wpdb->delete(
+ $table_name,
+ [ 'url' => $url ]
+ );
+ }
+}
diff --git a/src/Deployer.php b/src/Deployer.php
index ec8c2265..c6ec0c09 100755
--- a/src/Deployer.php
+++ b/src/Deployer.php
@@ -23,8 +23,6 @@ public function deploy( bool $test = false ) : string {
$start_time = microtime( true );
switch ( $this->settings['selected_deployment_option'] ) {
- case 'folder':
- break;
case 'zip':
break;
case 's3':
@@ -125,7 +123,6 @@ public function finalizeDeployment( string $deploy_result = '' ) : string {
public function triggerPostDeployHooks() : void {
$this->archive = new Archive();
- $this->archive->setToCurrentArchive();
do_action( 'statichtmloutput_post_deploy_trigger', $this->archive );
}
diff --git a/src/Exclusions.php b/src/Exclusions.php
new file mode 100644
index 00000000..77059440
--- /dev/null
+++ b/src/Exclusions.php
@@ -0,0 +1,73 @@
+prefix . 'statichtmloutput_exclusions';
+
+ $charset_collate = $wpdb->get_charset_collate();
+
+ $sql = "CREATE TABLE $table_name (
+ id mediumint(9) NOT NULL AUTO_INCREMENT,
+ pattern VARCHAR(2083) NOT NULL,
+ PRIMARY KEY (id)
+ ) $charset_collate;";
+
+ require_once ABSPATH . 'wp-admin/includes/upgrade.php';
+ dbDelta( $sql );
+ }
+
+ /**
+ * Add all Urls to queue
+ *
+ * @param string[] $patterns List of URLs to crawl
+ */
+ public static function addPatterns( array $patterns ) : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_exclusions';
+
+ $placeholders = [];
+ $values = [];
+
+ foreach ( $patterns as $pattern ) {
+ if ( ! $pattern ) {
+ continue;
+ }
+
+ $placeholders[] = '(%s)';
+ $values[] = $pattern;
+ }
+
+ $query_string =
+ 'INSERT INTO ' . $table_name . ' (pattern) VALUES ' .
+ implode( ', ', $placeholders );
+ $query = $wpdb->prepare( $query_string, $values );
+
+ $wpdb->query( $query );
+ }
+
+ /**
+ * Get all Exclusions patterns
+ *
+ * @return string[] All Exclusions patterns
+ */
+ public static function getAll() : array {
+ global $wpdb;
+ $patterns = [];
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_exclusions';
+
+ $rows = $wpdb->get_results( "SELECT pattern FROM $table_name" );
+
+ foreach ( $rows as $row ) {
+ $patterns[] = $row->url;
+ }
+
+ return $patterns;
+ }
+}
diff --git a/src/Exporter.php b/src/Exporter.php
index 59d0ac8a..65355dcf 100755
--- a/src/Exporter.php
+++ b/src/Exporter.php
@@ -14,114 +14,18 @@ public function __construct() {
);
}
- public function pre_export_cleanup() : void {
- $files_to_clean = [
- 'WP-STATIC-2ND-CRAWL-LIST.txt',
- 'WP-STATIC-404-LOG.txt',
- 'WP-STATIC-CRAWLED-LINKS.txt',
- 'WP-STATIC-DISCOVERED-URLS-LOG.txt',
- 'WP-STATIC-DISCOVERED-URLS.txt',
- 'WP2STATIC-FILES-TO-DEPLOY.txt',
- 'WP-STATIC-EXPORT-LOG.txt',
- 'WP-STATIC-FINAL-2ND-CRAWL-LIST.txt',
- 'WP-STATIC-FINAL-CRAWL-LIST.txt',
- 'WP2STATIC-GITLAB-FILES-IN-REPO.txt',
- ];
-
- foreach ( $files_to_clean as $file_to_clean ) {
- if ( file_exists(
- $this->settings['wp_uploads_path'] . '/' . $file_to_clean
- ) ) {
- unlink(
- $this->settings['wp_uploads_path'] . '/' .
- $file_to_clean
- );
- }
- }
- }
-
- public function cleanup_working_files() : void {
- // keep log files here for debugging
- // skip first export state
- if ( is_file(
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-CURRENT-ARCHIVE.txt'
- ) ) {
-
- $handle = fopen(
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-CURRENT-ARCHIVE.txt',
- 'r'
- );
-
- if ( is_resource( $handle ) ) {
- // TODO: looks like a random place for this...
- $this->settings['archive_dir'] = stream_get_line( $handle, 0 );
- }
- }
-
- $files_to_clean = [
- '/WP-STATIC-2ND-CRAWL-LIST.txt',
- '/WP-STATIC-CRAWLED-LINKS.txt',
- '/WP-STATIC-DISCOVERED-URLS.txt',
- '/WP2STATIC-FILES-TO-DEPLOY.txt',
- '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt',
- '/WP-STATIC-FINAL-CRAWL-LIST.txt',
- '/WP2STATIC-GITLAB-FILES-IN-REPO.txt',
- ];
-
- foreach ( $files_to_clean as $file_to_clean ) {
- if ( file_exists(
- $this->settings['wp_uploads_path'] . '/' . $file_to_clean
- ) ) {
- unlink(
- $this->settings['wp_uploads_path'] . '/' . $file_to_clean
- );
- }
- }
- }
-
- public function initialize_cache_files() : void {
- // TODO: is this still necessary?
- $crawled_links_file =
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-CRAWLED-LINKS.txt';
-
- $resource = fopen( $crawled_links_file, 'w' );
-
- if ( ! is_resource( $resource ) ) {
- return;
- }
-
- fwrite( $resource, '' );
- fclose( $resource );
- }
-
public function cleanup_leftover_archives() : void {
- $upload_dir_paths = scandir( $this->settings['wp_uploads_path'] );
+ $archive_path = $this->settings['wp_uploads_path'] . '/static-html-output/';
+ $zip_path = rtrim( $archive_path, '/' ) . '.zip';
- if ( ! $upload_dir_paths ) {
- return;
- }
-
- $leftover_files =
- preg_grep(
- '/^([^.])/',
- $upload_dir_paths
+ if ( is_dir( $archive_path ) ) {
+ FilesHelper::delete_dir_with_files(
+ $archive_path
);
+ }
- foreach ( $leftover_files as $filename ) {
- if ( strpos( $filename, 'wp-static-html-output-' ) !== false ) {
- $deletion_target = $this->settings['wp_uploads_path'] .
- '/' . $filename;
- if ( is_dir( $deletion_target ) ) {
- FilesHelper::delete_dir_with_files(
- $deletion_target
- );
- } else {
- unlink( $deletion_target );
- }
- }
+ if ( is_file( $zip_path ) ) {
+ unlink( $zip_path );
}
if ( ! defined( 'WP_CLI' ) ) {
@@ -130,44 +34,13 @@ public function cleanup_leftover_archives() : void {
}
public function generateModifiedFileList() : void {
- // preserve the initial crawl list, to be used in debugging + more
- copy(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-INITIAL-CRAWL-LIST.txt',
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-MODIFIED-CRAWL-LIST.txt'
- );
-
- chmod(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-MODIFIED-CRAWL-LIST.txt',
- 0664
- );
-
- // if no excludes or includes, just copy to new target
+ // if no excludes or includes, no changes to CrawlLog
if ( ! isset( $this->settings['excludeURLs'] ) &&
! isset( $this->settings['additionalUrls'] ) ) {
- copy(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-INITIAL-CRAWL-LIST.txt',
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-FINAL-CRAWL-LIST.txt'
- );
-
return;
}
- $modified_crawl_list = [];
-
- // load crawl list into array
- $crawl_list = file(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-MODIFIED-CRAWL-LIST.txt'
- );
-
- if ( ! $crawl_list ) {
- return;
- }
+ // TODO: inclusions get added to CrawlQueue if not in CrawlLog
// applying exclusions before inclusions
if ( isset( $this->settings['excludeURLs'] ) ) {
@@ -176,64 +49,29 @@ public function generateModifiedFileList() : void {
str_replace( "\r", '', $this->settings['excludeURLs'] )
);
- // iterate through crawl list and add any that aren't excluded
- foreach ( $crawl_list as $url_to_crawl ) {
- $url_to_crawl = trim( $url_to_crawl );
- $match = false;
-
- foreach ( $exclusions as $exclusion ) {
- $exclusion = trim( $exclusion );
-
- if ( $exclusion != '' ) {
- if ( strpos( $url_to_crawl, $exclusion ) !== false ) {
- WsLog::l( "Excluding $url_to_crawl because of rule $exclusion" );
-
- $match = true;
- }
- }
-
- if ( ! $match ) {
- $modified_crawl_list[] = $url_to_crawl;
- }
- }
- }
- } else {
- $modified_crawl_list = $crawl_list;
+ Exclusions::addPatterns( $exclusions );
}
if ( isset( $this->settings['additionalUrls'] ) ) {
- $inclusions = explode(
+ $inclusion_cadidates = explode(
"\n",
str_replace( "\r", '', $this->settings['additionalUrls'] )
);
+ // check inclusion isn't already in CrawlLog, else inesert unique into CrawlQueue
+ $inclusions = Exclusions::getAll();
+
foreach ( $inclusions as $inclusion ) {
$inclusion = trim( $inclusion );
- $inclusion = $inclusion;
- $modified_crawl_list[] = $inclusion;
+ if ( ! CrawlLog::hasUrl( $inclusion ) ) {
+ $inclusions[] = $inclusion;
+ }
}
- }
- if ( ! is_array( $modified_crawl_list ) ) {
- return;
+ CrawlLog::addUrls( $inclusions, 'Included by user' );
+ CrawlQueue::addUrls( $inclusions );
}
-
- $modified_crawl_list = array_unique( $modified_crawl_list );
-
- $str = implode( PHP_EOL, $modified_crawl_list );
-
- file_put_contents(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-FINAL-CRAWL-LIST.txt',
- $str
- );
-
- chmod(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-FINAL-CRAWL-LIST.txt',
- 0664
- );
}
}
diff --git a/src/FileCopier.php b/src/FileCopier.php
index cd6b903d..a6535a31 100755
--- a/src/FileCopier.php
+++ b/src/FileCopier.php
@@ -44,7 +44,7 @@ public function getLocalFileForURL() : string {
if ( is_file( $local_file ) ) {
return $local_file;
} else {
- WsLog::l(
+ Logger::l(
'ERROR: trying to copy local file: ' . $local_file .
' for URL: ' . $this->url .
' (FILE NOT FOUND/UNREADABLE)'
@@ -98,7 +98,7 @@ public function copyFile( string $archive_dir ) : void {
if ( is_file( $local_file ) ) {
copy( $local_file, $filename );
} else {
- WsLog::l(
+ Logger::l(
'ERROR: trying to copy local file: ' . $local_file .
' to: ' . $filename .
' in archive dir: ' . $archive_dir .
diff --git a/src/FileWriter.php b/src/FileWriter.php
index 7a165ec3..1c71d0e8 100755
--- a/src/FileWriter.php
+++ b/src/FileWriter.php
@@ -112,17 +112,17 @@ public function saveFile( string $archive_dir ) : void {
$write_result = file_put_contents( $filename, $file_contents );
if ( ! $write_result ) {
- WsLog::l( "Failed saving $this->url to $filename" );
+ Logger::l( "Failed saving $this->url to $filename" );
return;
}
$modified = chmod( $filename, 0664 );
if ( ! $modified ) {
- WsLog::l( "Failed chmod'ing $filename" );
+ Logger::l( "Failed chmod'ing $filename" );
}
} else {
- WsLog::l( "Not saving empty file $this->url" );
+ Logger::l( "Not saving empty file $this->url" );
}
}
}
diff --git a/src/FilesHelper.php b/src/FilesHelper.php
index a5c12016..0c04b75b 100755
--- a/src/FilesHelper.php
+++ b/src/FilesHelper.php
@@ -34,161 +34,6 @@ public static function delete_dir_with_files( string $dir ) : bool {
return $successfully_removed;
}
- /**
- * Detect Active Plugin CSS URLs
- *
- * @return string[] list of URLs
- */
- public static function getPluginCSSURLs() : array {
- $files = [];
-
- $plugins_path = trailingslashit( WP_PLUGIN_DIR );
- $plugins_url = trailingslashit( plugins_url() );
- $active_plugins = get_option( 'active_plugins' );
-
- $active_plugin_dirs = array_map(
- function ( $active_plugin ) use ( $plugins_path ) {
- $plugin_base = dirname( $active_plugin );
-
- // exclude SSG plugin dirs and known uploads dir excludables
- $exclude_plugins = [
- 'simplerstatic',
- 'static-html-output-plugin',
- 'wp2static',
- ];
-
- foreach ( $exclude_plugins as $exclude_plugin ) {
- if ( strpos( $plugin_base, $exclude_plugin ) !== false ) {
- return;
- }
- }
-
- return $plugins_path . $plugin_base;
- },
- $active_plugins
- );
-
- foreach ( $active_plugin_dirs as $active_plugin_dir ) {
-
- if ( is_dir( $active_plugin_dir ) ) {
- $iterator = new RecursiveIteratorIterator(
- new RecursiveDirectoryIterator(
- $active_plugin_dir,
- RecursiveDirectoryIterator::SKIP_DOTS
- )
- );
-
- foreach ( $iterator as $filename => $file_object ) {
- // exclude vendor dirs
- if ( strpos( strtolower( $filename ), 'vendor' ) !== false ) {
- continue;
- }
-
- // exclude likely admin area assets
- if ( strpos( strtolower( $filename ), 'admin' ) !== false ) {
- continue;
- }
-
- $extension = pathinfo( $filename, PATHINFO_EXTENSION );
-
- if ( $extension !== 'css' ) {
- continue;
- }
-
- // Standardise all paths to use / (Windows support)
- // TODO: should come earlier in chain
- $filename = wp_normalize_path( $filename );
-
- $detected_filename =
- str_replace(
- $plugins_path,
- $plugins_url,
- $filename
- );
-
- $detected_filename =
- str_replace(
- get_home_url(),
- '',
- $detected_filename
- );
-
- if ( is_string( $detected_filename ) ) {
- array_push(
- $files,
- $detected_filename
- );
- }
- }
- }
- }
-
- return $files;
- }
-
- /**
- * @return string[] list of URLs
- */
- public static function getThemeFiles( string $theme_type ) : array {
- $wp_site = new WPSite();
-
- $files = [];
- $template_path = '';
- $template_url = '';
-
- if ( $theme_type === 'parent' ) {
- $template_path = $wp_site->parent_theme_path;
- $template_url = get_template_directory_uri();
- } else {
- $template_path = $wp_site->child_theme_path;
- $template_url = get_stylesheet_directory_uri();
- }
-
- $directory = $template_path;
-
- if ( is_dir( $directory ) ) {
- $iterator = new RecursiveIteratorIterator(
- new RecursiveDirectoryIterator(
- $directory,
- RecursiveDirectoryIterator::SKIP_DOTS
- )
- );
-
- foreach ( $iterator as $filename => $file_object ) {
- // $path_crawlable = self::filePathLooksCrawlable( $filename );
- // for theme files, let's just grab CSS files, as these will yield other link
- $extension = pathinfo( $filename, PATHINFO_EXTENSION );
-
- if ( $extension !== 'css' ) {
- continue;
- }
-
- $detected_filename =
- str_replace(
- $template_path,
- $template_url,
- $filename
- );
-
- $detected_filename =
- str_replace(
- get_home_url(),
- '',
- $detected_filename
- );
-
- if ( is_string( $detected_filename ) ) {
- array_push(
- $files,
- $detected_filename
- );
- }
- }
- }
-
- return $files;
- }
-
/**
* @return string[] list of URLs
*/
@@ -197,17 +42,6 @@ public static function detectVendorFiles( string $wp_site_url ) : array {
$vendor_files = [];
- if ( class_exists( '\\Elementor\Api' ) ) {
- $elementor_font_dir = WP_PLUGIN_DIR .
- '/elementor/assets/lib/font-awesome';
-
- $elementor_urls = self::getListOfLocalFilesByUrl(
- $elementor_font_dir
- );
-
- $vendor_files = array_merge( $vendor_files, $elementor_urls );
- }
-
if ( defined( 'WPSEO_VERSION' ) ) {
$yoast_sitemaps = [
'/sitemap_index.xml',
@@ -220,26 +54,6 @@ public static function detectVendorFiles( string $wp_site_url ) : array {
$vendor_files = array_merge( $vendor_files, $yoast_sitemaps );
}
- if ( class_exists( 'autoptimizeMain' ) ) {
- $autoptimize_cache_dir =
- $wp_site->wp_content_path . '/cache/autoptimize';
-
- // get difference between home and wp-contents URL
- $prefix = str_replace(
- $wp_site->site_url,
- '/',
- $wp_site->wp_content_url
- );
-
- $autoptimize_urls = self::getAutoptimizeCacheFiles(
- $autoptimize_cache_dir,
- $wp_site->wp_content_path,
- $prefix
- );
-
- $vendor_files = array_merge( $vendor_files, $autoptimize_urls );
- }
-
if ( class_exists( 'Custom_Permalinks' ) ) {
global $wpdb;
@@ -271,16 +85,6 @@ public static function detectVendorFiles( string $wp_site_url ) : array {
}
}
- if ( class_exists( 'molongui_authorship' ) ) {
- $molongui_path = WP_PLUGIN_DIR . '/molongui-authorship';
-
- $molongui_urls = self::getListOfLocalFilesByUrl(
- $molongui_path
- );
-
- $vendor_files = array_merge( $vendor_files, $molongui_urls );
- }
-
return $vendor_files;
}
@@ -439,7 +243,8 @@ public static function filePathLooksCrawlable( string $file_name ) : bool {
'wp2static-crawled-site',
'thumbs.db',
'vendor',
- 'wp-static-html-output', // exclude earlier version exports
+ 'wp-static-html-output',
+ 'static-html-output',
];
foreach ( $filenames_to_ignore as $ignorable ) {
@@ -506,9 +311,6 @@ public static function buildInitialFileList(
$url_queue = array_merge(
$url_queue,
- self::getThemeFiles( 'parent' ),
- self::getThemeFiles( 'child' ),
- self::getPluginCSSURLs(),
self::detectVendorFiles( $wp_site->site_url ),
self::getAllWPPostURLs( $base_url ),
self::getDateArchiveURLs()
@@ -522,21 +324,14 @@ public static function buildInitialFileList(
);
$unique_urls = array_unique( $url_queue );
+ array_filter( $unique_urls );
sort( $unique_urls );
$initial_crawl_list_total = count( $unique_urls );
- $str = implode( "\n", $unique_urls );
-
- file_put_contents(
- $uploads_path . '/WP-STATIC-INITIAL-CRAWL-LIST.txt',
- $str
- );
-
- file_put_contents(
- $uploads_path . '/WP-STATIC-INITIAL-CRAWL-TOTAL.txt',
- $initial_crawl_list_total
- );
+ // TODO: also add to CrawlLog
+ CrawlQueue::addUrls( $unique_urls );
+ CrawlLog::addUrls( $unique_urls, 'initial_crawl_list', 0 );
return count( $url_queue );
}
@@ -594,14 +389,11 @@ public static function getAllWPPostURLs( string $wp_site_url ) : array {
case 'page':
$permalink = get_page_link( $post->ID );
break;
- case 'post':
- $permalink = get_permalink( $post->ID );
- break;
case 'attachment':
$permalink = get_attachment_link( $post->ID );
break;
default:
- $permalink = get_post_permalink( $post->ID );
+ $permalink = get_permalink( $post->ID );
break;
}
}
@@ -705,27 +497,6 @@ public static function getAllWPPostURLs( string $wp_site_url ) : array {
}
}
- // get all pagination links for each category
- $category_pagination_urls =
- self::getPaginationURLsForCategories( $category_links );
-
- // get all pagination links for each post_type
- $post_pagination_urls =
- self::getPaginationURLsForPosts(
- array_unique( $unique_post_types )
- );
-
- // get all comment links
- $comment_pagination_urls =
- self::getPaginationURLsForComments( $wp_site_url );
-
- $post_urls = array_merge(
- $post_urls,
- $post_pagination_urls,
- $category_pagination_urls,
- $comment_pagination_urls
- );
-
return $post_urls;
}
@@ -762,127 +533,5 @@ function ( string $url ) {
return $cleaned_urls;
}
-
- /**
- * @param string[] $post_types to get pagination URLs from
- * @return string[] list of URLs
- */
- public static function getPaginationURLsForPosts( array $post_types ) : array {
- global $wpdb, $wp_rewrite;
- $wp_site = new WPSite();
- $pagination_base = $wp_rewrite->pagination_base;
- $default_posts_per_page = get_option( 'posts_per_page' );
- $urls_to_include = [];
-
- foreach ( $post_types as $post_type ) {
- $query = "
- SELECT ID,post_type
- FROM %s
- WHERE post_status = '%s'
- AND post_type = '%s'";
-
- $count = $wpdb->get_results(
- sprintf(
- $query,
- $wpdb->posts,
- 'publish',
- $post_type
- )
- );
-
- $post_type_obj = get_post_type_object( $post_type );
-
- if ( ! $post_type_obj || ! isset( $post_type_obj->labels->name ) ) {
- continue;
- }
-
- $plural_form = strtolower( $post_type_obj->labels->name );
- $count = $wpdb->num_rows;
- $total_pages = ceil( $count / $default_posts_per_page );
- $archive_link = get_post_type_archive_link( $post_type );
-
- // only use pagination base when post type is page
- for ( $page = 2; $page <= $total_pages; $page++ ) {
- if ( $post_type === 'page' ) {
- $pagination_url =
- // TODO: check this against custom post types
- // "/{$plural_form}/{$pagination_base}/{$page}";
- "/{$pagination_base}/{$page}";
- } else {
- $pagination_url =
- "/{$archive_link}/{$pagination_base}/{$page}";
- }
-
- $urls_to_include[] = str_replace(
- $wp_site->site_url,
- '/',
- $pagination_url
- );
- }
- }
-
- return $urls_to_include;
- }
-
- /**
- * @param mixed[] $categories with total counts
- * @return string[] list of URLs
- */
- public static function getPaginationURLsForCategories( array $categories ) : array {
- if ( ! $categories ) {
- return [];
- }
-
- global $wp_rewrite;
-
- $urls_to_include = [];
- $pagination_base = $wp_rewrite->pagination_base;
- $default_posts_per_page = get_option( 'posts_per_page' );
-
- foreach ( $categories as $term => $total_posts ) {
- $total_pages = ceil( $total_posts / $default_posts_per_page );
-
- for ( $page = 2; $page <= $total_pages; $page++ ) {
- $urls_to_include[] =
- "{$term}/{$pagination_base}/{$page}";
- }
- }
-
- return $urls_to_include;
- }
-
- /**
- * @return string[] list of URLs
- */
- public static function getPaginationURLsForComments( string $wp_site_url ) : array {
- global $wp_rewrite;
-
- $comments_pagination_base = $wp_rewrite->comments_pagination_base;
-
- $comments = get_comments();
-
- if ( ! is_array( $comments ) ) {
- return [];
- }
-
- $urls_to_include = [];
-
- foreach ( $comments as $comment ) {
- $comment_url = get_comment_link( $comment->comment_ID );
- $comment_url = strtok( $comment_url, '#' );
-
- if ( ! is_string( $comment_url ) ) {
- continue;
- }
-
- $urls_to_include[] = str_replace(
- $wp_site_url,
- '',
- $comment_url
- );
- }
-
- return array_unique( $urls_to_include );
- }
}
diff --git a/src/GitHub.php b/src/GitHub.php
index 6ca6cb26..77a63cb2 100755
--- a/src/GitHub.php
+++ b/src/GitHub.php
@@ -19,6 +19,10 @@ class GitHub extends SitePublisher {
* @var string
*/
public $local_file_contents;
+ /**
+ * @var string
+ */
+ public $local_file;
/**
* @var string
*/
@@ -50,10 +54,6 @@ public function __construct() {
$this->api_base = 'https://api.github.com/repos/';
- $this->previous_hashes_path =
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-GITHUB-PREVIOUS-HASHES.txt';
-
if ( defined( 'WP_CLI' ) ) {
return; }
}
@@ -63,8 +63,7 @@ public function upload_files() : void {
if ( $this->files_remaining < 0 ) {
echo 'ERROR';
- die();
- }
+ die(); }
$this->initiateProgressIndicator();
@@ -76,56 +75,55 @@ public function upload_files() : void {
$lines = $this->getItemsToDeploy( $batch_size );
- $this->openPreviousHashesFile();
-
foreach ( $lines as $line ) {
- list($local_file, $this->target_path) = explode( ',', $line );
+ $this->local_file = $line->url;
+ $this->target_path = $line->remote_path;
- $local_file = $this->archive->path . $local_file;
+ $this->local_file = $this->archive->path . $this->local_file;
- if ( ! is_file( $local_file ) ) {
- continue; }
+ $deploy_queue_path = str_replace( $this->archive->path, '', $this->local_file );
- $this->local_file_contents = (string) file_get_contents( $local_file );
+ if ( ! is_file( $this->local_file ) ) {
+ DeployQueue::removeURL( $deploy_queue_path );
+ continue;
+ }
+
+ $this->local_file_contents = (string) file_get_contents( $this->local_file );
if ( ! $this->local_file_contents ) {
+ DeployQueue::removeURL( $deploy_queue_path );
continue;
}
- if ( isset( $this->file_paths_and_hashes[ $this->target_path ] ) ) {
- $prev = $this->file_paths_and_hashes[ $this->target_path ];
- $current = crc32( $this->local_file_contents );
+ $cached_hash = DeployCache::fileIsCached( $deploy_queue_path );
+
+ if ( $cached_hash ) {
+ $current_hash = md5( $this->local_file_contents );
- if ( $prev != $current ) {
+ if ( $current_hash != $cached_hash ) {
if ( $this->fileExistsInGitHub() ) {
$this->updateFileInGitHub();
+ DeployCache::addFile( $deploy_queue_path );
} else {
$this->createFileInGitHub();
+ DeployCache::addFile( $deploy_queue_path );
}
-
- $this->recordFilePathAndHashInMemory(
- $this->target_path,
- $this->local_file_contents
- );
}
} else {
if ( $this->fileExistsInGitHub() ) {
$this->updateFileInGitHub();
+ DeployCache::addFile( $deploy_queue_path );
} else {
$this->createFileInGitHub();
+ DeployCache::addFile( $deploy_queue_path );
}
-
- $this->recordFilePathAndHashInMemory(
- $this->target_path,
- $this->local_file_contents
- );
}
+ DeployQueue::removeURL( $deploy_queue_path );
+
$this->updateProgress();
}
- $this->writeFilePathAndHashesToFile();
-
$this->pauseBetweenAPICalls();
if ( $this->uploadsCompleted() ) {
@@ -178,13 +176,13 @@ public function test_upload() : void {
$good_response_codes = [ 200, 201, 301, 302, 304 ];
if ( ! in_array( $status_code, $good_response_codes ) ) {
- WsLog::l( "BAD RESPONSE STATUS ($status_code)" );
+ Logger::l( "BAD RESPONSE STATUS ($status_code)" );
throw new StaticHTMLOutputException( 'GitHub API bad response status' );
}
} catch ( StaticHTMLOutputException $e ) {
- WsLog::l( 'GITHUB EXPORT: error encountered' );
- WsLog::l( $e );
+ Logger::l( 'GITHUB EXPORT: error encountered' );
+ Logger::l( $e );
throw new StaticHTMLOutputException( $e );
}
@@ -244,7 +242,7 @@ public function fileExistsInGitHub() : bool {
$commit_message = '';
if ( ! empty( $this->existing_file_object ) ) {
- WsLog::l( "{$this->target_path} path exists in GitHub" );
+ Logger::l( "{$this->target_path} path exists in GitHub" );
return true;
}
diff --git a/src/GitLab.php b/src/GitLab.php
index 81527428..e6d17442 100755
--- a/src/GitLab.php
+++ b/src/GitLab.php
@@ -4,22 +4,26 @@
class GitLab extends SitePublisher {
+ /**
+ * @var string[]
+ */
+ public $files_in_repo;
+ /**
+ * @var string
+ */
+ public $local_file;
+ /**
+ * @var string
+ */
+ public $local_file_contents;
/**
* @var string
*/
- public $files_in_repo_list_path;
+ public $target_path;
public function __construct() {
$this->loadSettings( 'gitlab' );
- $this->files_in_repo_list_path =
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-GITLAB-FILES-IN-REPO.txt';
-
- $this->previous_hashes_path =
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-GITLAB-PREVIOUS-HASHES.txt';
-
if ( defined( 'WP_CLI' ) ) {
return; }
}
@@ -41,76 +45,70 @@ public function upload_files() : void {
$lines = $this->getItemsToDeploy( $batch_size );
- $files_in_tree = file(
- $this->files_in_repo_list_path,
- FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
- );
+ $this->getListOfFilesInRepo();
- if ( is_array( $files_in_tree ) ) {
- $files_in_tree = array_filter( $files_in_tree );
- $files_in_tree = array_unique( $files_in_tree );
- } else {
- $files_in_tree = [];
- }
+ $files_in_tree = $this->files_in_repo;
+ $files_in_tree = array_filter( $files_in_tree );
+ $files_in_tree = array_unique( $files_in_tree );
$files_data = [];
- $this->openPreviousHashesFile();
-
foreach ( $lines as $line ) {
- list($local_file, $target_path) = explode( ',', $line );
+ $this->local_file = $line->url;
+ $this->target_path = $line->remote_path;
+
+ $this->local_file = $this->archive->path . $this->local_file;
- $local_file = $this->archive->path . $local_file;
+ $deploy_queue_path = str_replace( $this->archive->path, '', $this->local_file );
- if ( ! is_file( $local_file ) ) {
- continue; }
+ if ( ! is_file( $this->local_file ) ) {
+ DeployQueue::removeURL( $deploy_queue_path );
+ continue;
+ }
- $local_file_contents = file_get_contents( $local_file );
+ $this->local_file_contents = (string) file_get_contents( $this->local_file );
- if ( ! $local_file_contents ) {
+ if ( ! $this->local_file_contents ) {
+ DeployQueue::removeURL( $deploy_queue_path );
continue;
}
- if ( in_array( $target_path, $files_in_tree ) ) {
- if ( isset( $this->file_paths_and_hashes[ $target_path ] ) ) {
- $prev = $this->file_paths_and_hashes[ $target_path ];
- $current = crc32( $local_file_contents );
+ // does file exist in GitLab?
+ if ( in_array( $this->target_path, $files_in_tree ) ) {
+ $cached_hash = DeployCache::fileIsCached( $deploy_queue_path );
- if ( $prev != $current ) {
+ // does plugin have cache of file?
+ if ( $cached_hash ) {
+ $current_hash = md5( $this->local_file_contents );
+ // plugin cache doesn't match current file hash
+ if ( $current_hash != $cached_hash ) {
$files_data[] = [
'action' => 'update',
- 'file_path' => $target_path,
- 'content' => base64_encode(
- $local_file_contents
- ),
+ 'file_path' => $this->target_path,
+ 'content' => base64_encode( $this->local_file_contents ),
'encoding' => 'base64',
];
}
+ // plugin has no cache for file that exists in GitLab
} else {
$files_data[] = [
'action' => 'update',
- 'file_path' => $target_path,
- 'content' => base64_encode(
- $local_file_contents
- ),
+ 'file_path' => $this->target_path,
+ 'content' => base64_encode( $this->local_file_contents ),
'encoding' => 'base64',
];
}
+ // file doesn't exist in GitLab
} else {
$files_data[] = [
'action' => 'create',
- 'file_path' => $target_path,
- 'content' => base64_encode(
- $local_file_contents
- ),
+ 'file_path' => $this->target_path,
+ 'content' => base64_encode( $this->local_file_contents ),
'encoding' => 'base64',
];
}
- $this->recordFilePathAndHashInMemory(
- $target_path,
- $local_file_contents
- );
+ DeployQueue::removeURL( $deploy_queue_path );
// NOTE: delay and progress askew in GitLab as we may
// upload all in one request. Progress indicates building
@@ -123,34 +121,41 @@ public function upload_files() : void {
$commits_endpoint = 'https://gitlab.com/api/v4/projects/' .
$this->settings['glProject'] . '/repository/commits';
- try {
- $client = new Request();
+ if ( $files_data ) {
+ try {
+ $client = new Request();
- $post_options = [
- 'branch' => 'master',
- 'commit_message' => 'StaticHTMLOutput Deployment',
- 'actions' => $files_data,
- ];
+ $post_options = [
+ 'branch' => 'master',
+ 'commit_message' => 'StaticHTMLOutput Deployment',
+ 'actions' => $files_data,
+ ];
- $headers = [
- 'PRIVATE-TOKEN: ' . $this->settings['glToken'],
- 'Content-Type: application/json',
- ];
+ $headers = [
+ 'PRIVATE-TOKEN: ' . $this->settings['glToken'],
+ 'Content-Type: application/json',
+ ];
- $client->postWithJSONPayloadCustomHeaders(
- $commits_endpoint,
- $post_options,
- $headers
- );
+ $client->postWithJSONPayloadCustomHeaders(
+ $commits_endpoint,
+ $post_options,
+ $headers
+ );
- $this->checkForValidResponses(
- $client->status_code,
- [ 200, 201, 301, 302, 304 ]
- );
+ $this->checkForValidResponses(
+ $client->status_code,
+ [ 200, 201, 301, 302, 304 ]
+ );
- $this->writeFilePathAndHashesToFile();
- } catch ( StaticHTMLOutputException $e ) {
- $this->handleException( $e );
+ foreach ( $files_data as $file ) {
+ $deploy_queue_path =
+ str_replace( $this->archive->path, '', $file['file_path'] );
+
+ DeployCache::addFile( $deploy_queue_path );
+ }
+ } catch ( StaticHTMLOutputException $e ) {
+ $this->handleException( $e );
+ }
}
if ( $this->uploadsCompleted() ) {
@@ -162,11 +167,9 @@ public function upload_files() : void {
* @param mixed[] $items file objects
*/
public function addToListOfFilesInRepos( array $items ) : void {
- file_put_contents(
- $this->files_in_repo_list_path,
- implode( PHP_EOL, $items ) . PHP_EOL,
- FILE_APPEND | LOCK_EX
- );
+ $this->files_in_repo = $this->files_in_repo ? $this->files_in_repo : [];
+
+ $this->files_in_repo = array_merge( $this->files_in_repo, $items );
}
/**
@@ -206,7 +209,7 @@ public function getRepositoryTree( int $page ) : void {
$good_response_codes = [ '200', '201', '301', '302', '304' ];
if ( ! in_array( $client->status_code, $good_response_codes ) ) {
- WsLog::l( 'BAD RESPONSE STATUS (' . $client->status_code . '): ' );
+ Logger::l( 'BAD RESPONSE STATUS (' . $client->status_code . '): ' );
throw new StaticHTMLOutputException( 'GitLab API bad response status' );
}
@@ -296,15 +299,8 @@ public function createGitLabPagesConfig() : void {
$target_path = $this->archive->path . '.gitlab-ci.yml';
file_put_contents( $target_path, $config_file );
chmod( $target_path, 0664 );
- $export_line = '.gitlab-ci.yml,.gitlab-ci.yml';
-
- file_put_contents(
- $this->export_file_list,
- $export_line . PHP_EOL,
- FILE_APPEND | LOCK_EX
- );
- chmod( $this->export_file_list, 0664 );
+ DeployQueue::addUrl( '.gitlab-ci.yml', '.gitlab-ci.yml' );
}
}
diff --git a/src/HTMLProcessor.php b/src/HTMLProcessor.php
index 672ca3c6..042aa825 100755
--- a/src/HTMLProcessor.php
+++ b/src/HTMLProcessor.php
@@ -50,10 +50,6 @@ class HTMLProcessor extends StaticHTMLOutput {
* @var string
*/
public $raw_html;
- /**
- * @var bool
- */
- public $harvest_new_urls;
/**
* @var string
*/
@@ -94,7 +90,7 @@ public function __construct(
bool $remove_wp_meta = false,
string $rewrite_rules = '',
string $base_url,
- string $selected_deployment_option = 'folder',
+ string $selected_deployment_option = 'zip',
string $wp_site_url,
string $wp_uploads_path
) {
@@ -425,8 +421,6 @@ public function processHTML( string $html_document, string $page_url ) : bool {
$this->page_url = new Net_URL2( $page_url );
- $this->detectIfURLsShouldBeHarvested();
-
$this->discovered_urls = [];
// PERF: 70% of function time
@@ -513,26 +507,10 @@ public function processHTML( string $html_document, string $page_url ) : bool {
}
$this->stripHTMLComments();
- $this->writeDiscoveredURLs();
return true;
}
- public function detectIfURLsShouldBeHarvested() : void {
- if ( ! defined( 'WP_CLI' ) ) {
- $ajax_method = filter_input( INPUT_POST, 'ajax_action' );
-
- $this->harvest_new_urls = $ajax_method === 'crawl_site';
- } else {
- // we shouldn't harvest any while we're in the second crawl
- if ( defined( 'CRAWLING_DISCOVERED' ) ) {
- return;
- } else {
- $this->harvest_new_urls = true;
- }
- }
- }
-
public function processLink( DOMElement $element ) : void {
$this->normalizeURL( $element, 'href' );
$this->forceHTTPS( $element, 'href' );
@@ -612,20 +590,22 @@ public function addDiscoveredURL( string $url ) : void {
$this->processed_urls[] = (string) $url;
- if ( $this->harvest_new_urls ) {
- if ( ! $this->isValidURL( (string) $url ) ) {
- return;
- }
+ if ( ! $this->isValidURL( (string) $url ) ) {
+ return;
+ }
- if ( $this->isInternalLink( (string) $url ) ) {
- $path = (string) parse_url( (string) $url, PHP_URL_PATH );
+ if ( $this->isInternalLink( (string) $url ) ) {
+ $path = (string) parse_url( (string) $url, PHP_URL_PATH );
- if ( $path[0] !== '/' ) {
- return;
- }
+ if ( empty( $path ) || $path[0] !== '/' ) {
+ return;
+ }
- $this->discovered_urls[] = $path;
+ if ( trim( $path ) === '/' ) {
+ return;
}
+
+ $this->discovered_urls[] = $path;
}
}
@@ -846,33 +826,6 @@ public function processMeta( DOMElement $element ) : void {
$this->rewriteBaseURL( $element );
}
- public function writeDiscoveredURLs() : void {
- $ajax_method = filter_input( INPUT_POST, 'ajax_action' );
-
- if ( $ajax_method === 'crawl_again' ) {
- return;
- }
-
- if ( defined( 'WP_CLI' ) ) {
- if ( defined( 'CRAWLING_DISCOVERED' ) ) {
- return;
- }
- }
-
- file_put_contents(
- $this->wp_uploads_path .
- '/WP-STATIC-DISCOVERED-URLS.txt',
- PHP_EOL .
- implode( PHP_EOL, array_unique( $this->discovered_urls ) ),
- FILE_APPEND | LOCK_EX
- );
-
- chmod(
- $this->wp_uploads_path .
- '/WP-STATIC-DISCOVERED-URLS.txt',
- 0664
- );
- }
// make link absolute, using current page to determine full path
public function normalizeURL( DOMElement $element, string $attribute ) : void {
@@ -1163,6 +1116,21 @@ public function rewriteWPPaths( DOMElement $element ) : void {
}
}
+ /**
+ * @return string[] Discovered URLs
+ */
+ public function getDiscoveredURLs() : array {
+ $discovered_urls = array_unique( $this->discovered_urls );
+ array_filter( $discovered_urls );
+ sort( $discovered_urls );
+
+ if ( ! $discovered_urls ) {
+ return [];
+ }
+
+ return $discovered_urls;
+ }
+
public function getHTML() : string {
$processed_html = (string) $this->xml_doc->saveHtml();
diff --git a/src/Logger.php b/src/Logger.php
new file mode 100755
index 00000000..06faf19e
--- /dev/null
+++ b/src/Logger.php
@@ -0,0 +1,110 @@
+prefix . 'statichtmloutput_log';
+
+ $wpdb->insert(
+ $table_name,
+ [
+ 'log' => $text,
+ ]
+ );
+ }
+
+ public static function createTable() : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_log';
+
+ $charset_collate = $wpdb->get_charset_collate();
+
+ $sql = "CREATE TABLE $table_name (
+ id mediumint(9) NOT NULL AUTO_INCREMENT,
+ time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ log TEXT NOT NULL,
+ PRIMARY KEY (id)
+ ) $charset_collate;";
+
+ require_once ABSPATH . 'wp-admin/includes/upgrade.php';
+ dbDelta( $sql );
+ }
+
+ /**
+ * Log multiple lines at once
+ *
+ * @param string[] $lines List of lines to log
+ */
+ public static function lines( array $lines ) : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_log';
+
+ $current_time = current_time( 'mysql' );
+
+ $query = "INSERT INTO $table_name (log) VALUES ";
+
+ foreach ( $lines as $line ) {
+ $query .= "('$line'),";
+ }
+
+ $query = rtrim( $query, ',' );
+
+ $wpdb->query( $query );
+ }
+
+ /**
+ * Get all log lines
+ *
+ * @return mixed[] array of Log items
+ */
+ public static function getAll() : array {
+ global $wpdb;
+ $logs = [];
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_log';
+
+ $rows = $wpdb->get_results( "SELECT time, log FROM $table_name ORDER BY id ASC" );
+
+ foreach ( $rows as $row ) {
+ $logs[] = $row;
+ }
+
+ return $logs;
+ }
+
+ /**
+ * Poll latest log lines
+ */
+ public static function poll() : string {
+ global $wpdb;
+ $logs = '';
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_log';
+
+ $rows = $wpdb->get_results( "SELECT time, log FROM $table_name ORDER BY id DESC" );
+
+ foreach ( $rows as $row ) {
+ $logs .= $row->time . ': ' . $row->log . PHP_EOL;
+ }
+
+ return $logs;
+ }
+
+ /**
+ * Clear Log via truncation
+ */
+ public static function truncate() : void {
+ global $wpdb;
+
+ $table_name = $wpdb->prefix . 'statichtmloutput_log';
+
+ $wpdb->query( "TRUNCATE TABLE $table_name" );
+ }
+}
+
diff --git a/src/Netlify.php b/src/Netlify.php
index dd36d2ee..ae2f39f7 100755
--- a/src/Netlify.php
+++ b/src/Netlify.php
@@ -106,7 +106,7 @@ public function test_netlify() : void {
}
} else {
$err = 'BAD RESPONSE STATUS FROM NETLIFY API';
- WsLog::l( $err );
+ Logger::l( $err );
throw new StaticHTMLOutputException( $err );
}
} catch ( StaticHTMLOutputException $e ) {
diff --git a/src/Options.php b/src/Options.php
index a8600975..f21f2f5a 100755
--- a/src/Options.php
+++ b/src/Options.php
@@ -25,7 +25,6 @@ class Options {
'baseUrl',
'baseUrl-bitbucket',
'baseUrl-bunnycdn',
- 'baseUrl-folder',
'baseUrl-github',
'baseUrl-gitlab',
'baseUrl-netlify',
@@ -84,7 +83,6 @@ class Options {
'baseUrl',
'baseUrl-bitbucket',
'baseUrl-bunnycdn',
- 'baseUrl-folder',
'baseUrl-github',
'baseUrl-gitlab',
'baseUrl-netlify',
diff --git a/src/PostSettings.php b/src/PostSettings.php
index 69e7450e..37343745 100755
--- a/src/PostSettings.php
+++ b/src/PostSettings.php
@@ -47,11 +47,6 @@ public static function get( array $sets = [] ) : array {
'deployBatchSize',
];
- $key_sets['folder'] = [
- 'baseUrl-folder',
- 'targetFolder',
- ];
-
$key_sets['zip'] = [
'baseUrl-zip',
];
diff --git a/src/S3.php b/src/S3.php
index dbe99656..f2d889a3 100755
--- a/src/S3.php
+++ b/src/S3.php
@@ -20,14 +20,14 @@ class S3 extends SitePublisher {
* @var string
*/
public $hash_key;
+ /**
+ * @var string
+ */
+ public $local_file;
public function __construct() {
$this->loadSettings( 's3' );
- $this->previous_hashes_path =
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-S3-PREVIOUS-HASHES.txt';
-
if ( defined( 'WP_CLI' ) ) {
return; }
}
@@ -49,37 +49,42 @@ public function upload_files() : void {
$lines = $this->getItemsToDeploy( $batch_size );
- $this->openPreviousHashesFile();
-
foreach ( $lines as $line ) {
- list($local_file, $this->target_path) = explode( ',', $line );
+ $this->local_file = $line->url;
+ $this->target_path = $line->remote_path;
+
+ $this->local_file = $this->archive->path . $this->local_file;
- $local_file = $this->archive->path . $local_file;
+ $deploy_queue_path = str_replace( $this->archive->path, '', $this->local_file );
- if ( ! is_file( $local_file ) ) {
- continue; }
+ if ( ! is_file( $this->local_file ) ) {
+ DeployQueue::removeURL( $deploy_queue_path );
+ continue;
+ }
- $this->local_file_contents = (string) file_get_contents( $local_file );
+ $this->local_file_contents = (string) file_get_contents( $this->local_file );
if ( ! $this->local_file_contents ) {
+ DeployQueue::removeURL( $deploy_queue_path );
continue;
}
- $this->hash_key = $this->target_path . basename( $local_file );
+ $cached_hash = DeployCache::fileIsCached( $deploy_queue_path );
- if ( isset( $this->file_paths_and_hashes[ $this->hash_key ] ) ) {
- $prev = $this->file_paths_and_hashes[ $this->hash_key ];
- $current = crc32( $this->local_file_contents );
+ if ( $cached_hash ) {
+ $current_hash = md5( $this->local_file_contents );
- if ( $prev != $current ) {
+ if ( $current_hash != $cached_hash ) {
try {
$this->put_s3_object(
$this->target_path .
- basename( $local_file ),
+ basename( $this->local_file ),
$this->local_file_contents,
- MimeTypes::guess_type( $local_file )
+ MimeTypes::guess_type( $this->local_file )
);
+ DeployCache::addFile( $deploy_queue_path );
+
} catch ( StaticHTMLOutputException $e ) {
$this->handleException( $e );
}
@@ -88,29 +93,26 @@ public function upload_files() : void {
try {
$this->put_s3_object(
$this->target_path .
- basename( $local_file ),
+ basename( $this->local_file ),
$this->local_file_contents,
- MimeTypes::guess_type( $local_file )
+ MimeTypes::guess_type( $this->local_file )
);
+ DeployCache::addFile( $deploy_queue_path );
+
} catch ( StaticHTMLOutputException $e ) {
- $mime_type = MimeTypes::guess_type( $local_file );
- $error = $local_file . PHP_EOL . $e;
+ $mime_type = MimeTypes::guess_type( $this->local_file );
+ $error = $this->local_file . PHP_EOL . $e;
$this->handleException( $error );
}
}
- $this->recordFilePathAndHashInMemory(
- $this->hash_key,
- $this->local_file_contents
- );
+ DeployQueue::removeURL( $deploy_queue_path );
$this->updateProgress();
}
- $this->writeFilePathAndHashesToFile();
-
$this->pauseBetweenAPICalls();
if ( $this->uploadsCompleted() ) {
@@ -130,7 +132,7 @@ public function test_s3() : void {
echo 'SUCCESS';
}
} catch ( StaticHTMLOutputException $e ) {
- WsLog::l( 'S3 TEST ERROR RETURNED: ' . $e );
+ Logger::l( 'S3 TEST ERROR RETURNED: ' . $e );
throw new StaticHTMLOutputException( $e );
}
}
@@ -260,10 +262,10 @@ public function put_s3_object(
}
public function cloudfront_invalidate_all_items() : void {
- WsLog::l( 'Invalidating all CloudFront items' );
+ Logger::l( 'Invalidating all CloudFront items' );
if ( ! isset( $this->settings['cfDistributionId'] ) ) {
- WsLog::l( 'no Cloudfront ID found' );
+ Logger::l( 'no Cloudfront ID found' );
if ( ! defined( 'WP_CLI' ) ) {
echo 'SUCCESS'; }
@@ -306,7 +308,7 @@ public function cloudfront_invalidate_all_items() : void {
);
if ( ! $fp ) {
- WsLog::l( "CLOUDFRONT CONNECTION ERROR: {$errno} {$errstr}" );
+ Logger::l( "CLOUDFRONT CONNECTION ERROR: {$errno} {$errstr}" );
die( "Connection failed: {$errno} {$errstr}\n" );
}
@@ -317,7 +319,7 @@ public function cloudfront_invalidate_all_items() : void {
$resp .= fgets( $fp, 1024 );
}
- WsLog::l( "CloudFront response body: {$resp}" );
+ Logger::l( "CloudFront response body: {$resp}" );
fclose( $fp );
diff --git a/src/SiteCrawler.php b/src/SiteCrawler.php
index 6beab1ba..0dd95b2b 100755
--- a/src/SiteCrawler.php
+++ b/src/SiteCrawler.php
@@ -62,10 +62,6 @@ class SiteCrawler extends StaticHTMLOutput {
* @var string
*/
public $crawled_links_file;
- /**
- * @var int
- */
- public $remaining_urls_to_crawl;
public function __construct() {
$this->loadSettings(
@@ -90,330 +86,129 @@ public function __construct() {
$this->archive_dir = '';
$this->list_of_urls_to_crawl_path = '';
$this->urls_to_crawl = [];
-
- if ( ! defined( 'WP_CLI' ) ) {
- // @codingStandardsIgnoreStart
- if ( $_POST['ajax_action'] === 'crawl_again' ) {
- $this->crawl_discovered_links();
- } elseif ( $_POST['ajax_action'] === 'crawl_site' ) {
- $this->crawl_site();
- }
- // @codingStandardsIgnoreEnd
- }
- }
-
- public function generate_discovered_links_list() : void {
- $second_crawl_file_path = $this->settings['wp_uploads_path'] .
- '/WP-STATIC-2ND-CRAWL-LIST.txt';
-
- $already_crawled = file(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-INITIAL-CRAWL-LIST.txt',
- FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
- );
-
- if ( ! $already_crawled ) {
- $already_crawled = [];
- }
-
- $unique_discovered_links = [];
-
- $discovered_links_file = $this->settings['wp_uploads_path'] .
- '/WP-STATIC-DISCOVERED-URLS.txt';
-
- if ( is_file( $discovered_links_file ) ) {
- $discovered_links = file(
- $discovered_links_file,
- FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
- );
-
- if ( ! $discovered_links ) {
- $discovered_links = [];
- }
-
- $unique_discovered_links = array_unique( $discovered_links );
- sort( $unique_discovered_links );
- }
-
- file_put_contents(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-DISCOVERED-URLS-LOG.txt',
- implode( PHP_EOL, $unique_discovered_links )
- );
-
- chmod(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-DISCOVERED-URLS-LOG.txt',
- 0664
- );
-
- file_put_contents(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-DISCOVERED-URLS-TOTAL.txt',
- count( $unique_discovered_links )
- );
-
- chmod(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-DISCOVERED-URLS-TOTAL.txt',
- 0664
- );
-
- $discovered_links = array_diff(
- $unique_discovered_links,
- $already_crawled
- );
-
- if ( ! empty( $this->progress_bar ) ) {
- $this->progress_bar->finish();
- $this->progress_bar = \WP_ClI\Utils\make_progress_bar(
- 'Crawling discovered links',
- count( $discovered_links )
- );
- }
-
- file_put_contents(
- $second_crawl_file_path,
- implode( PHP_EOL, $discovered_links )
- );
-
- chmod( $second_crawl_file_path, 0664 );
-
- copy(
- $second_crawl_file_path,
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt'
- );
-
- chmod(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt',
- 0664
- );
}
- public function crawl_discovered_links() : void {
- if ( defined( 'WP_CLI' ) && ! defined( 'CRAWLING_DISCOVERED' ) ) {
- define( 'CRAWLING_DISCOVERED', true );
- }
-
- $second_crawl_file_path = $this->settings['wp_uploads_path'] .
- '/WP-STATIC-2ND-CRAWL-LIST.txt';
-
- // NOTE: the first iteration of the 2nd crawl phase,
- // the list of URLs for 2nd crawl is prepared
- if ( ! is_file( $second_crawl_file_path ) ) {
- $this->generate_discovered_links_list();
- }
-
- $this->list_of_urls_to_crawl_path =
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt';
-
- if ( ! is_file( $this->list_of_urls_to_crawl_path ) ) {
- WsLog::l(
- 'ERROR: LIST OF URLS TO CRAWL NOT FOUND AT: ' .
- $this->list_of_urls_to_crawl_path
- );
- die();
+ public function crawl_site() : void {
+ if ( CrawlQueue::getTotal() > 0 ) {
+ $this->crawlABitMore();
} else {
- if ( filesize( $this->list_of_urls_to_crawl_path ) ) {
- $this->crawlABitMore();
- } else {
- if ( ! defined( 'WP_CLI' ) ) {
- echo 'SUCCESS';
- }
+ if ( ! defined( 'WP_CLI' ) ) {
+ echo 'SUCCESS';
+ } elseif ( ! empty( $this->progress_bar ) ) {
+ $this->progress_bar->finish();
}
}
}
- public function crawl_site() : void {
- // crude detection for CLI export to use 2nd crawl phase
- $this->list_of_urls_to_crawl_path =
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-FINAL-2ND-CRAWL-LIST.txt';
-
- if ( is_file( $this->list_of_urls_to_crawl_path ) ) {
- $this->crawl_discovered_links();
-
+ public function progressBarTick() : void {
+ if ( empty( $this->progress_bar ) ) {
return;
}
- $this->list_of_urls_to_crawl_path =
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-FINAL-CRAWL-LIST.txt';
-
- if ( ! is_file( $this->list_of_urls_to_crawl_path ) ) {
- WsLog::l(
- 'ERROR: LIST OF URLS TO CRAWL NOT FOUND AT: ' .
- $this->list_of_urls_to_crawl_path
- );
- die();
- } else {
- if ( filesize( $this->list_of_urls_to_crawl_path ) ) {
- $this->crawlABitMore();
- } else {
- if ( ! defined( 'WP_CLI' ) ) {
- echo 'SUCCESS';
- }
- }
- }
+ $this->progress_bar->tick(
+ 1,
+ sprintf(
+ 'Processing URLs %d / %d',
+ (int) filter_var( $this->progress_bar->current(), FILTER_SANITIZE_NUMBER_INT ) + 1,
+ CrawlLog::getTotalCrawlableURLs()
+ )
+ );
}
public function crawlABitMore() : void {
$batch_of_links_to_crawl = [];
- $crawl_list = file(
- $this->list_of_urls_to_crawl_path,
- FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
- );
+ $crawlable_urls = CrawlQueue::getTotalCrawlableURLs();
- if ( ! $crawl_list ) {
+ if ( ! $crawlable_urls ) {
return;
}
- $this->urls_to_crawl = $crawl_list;
+ // get total CrawlQueue
+ $total_urls = CrawlQueue::getTotal();
- if ( is_array( $this->urls_to_crawl ) ) {
- $total_links = count( $this->urls_to_crawl );
+ // get batch size (smaller of total urls or crawl_increment)
+ $batch_size = min( $total_urls, $this->settings['crawl_increment'] );
- if ( $total_links < 1 ) {
- WsLog::l(
- 'ERROR: LIST OF URLS TO CRAWL NOT FOUND AT: ' .
- $this->list_of_urls_to_crawl_path
- );
- die();
- }
+ // fetch just amount of URLs needed (limit to crawl_increment)
+ $this->urls_to_crawl = CrawlQueue::getCrawlablePaths( $batch_size );
- if ( $this->settings['crawl_increment'] > $total_links ) {
- $this->settings['crawl_increment'] = $total_links;
- }
-
- for ( $i = 0; $i < $this->settings['crawl_increment']; $i++ ) {
- $link_from_crawl_list = array_shift( $this->urls_to_crawl );
-
- if ( $link_from_crawl_list ) {
- $batch_of_links_to_crawl[] = $link_from_crawl_list;
- }
- }
+ $this->archive_dir = $this->settings['wp_uploads_path'] . '/static-html-output/';
- $this->remaining_urls_to_crawl = count( $this->urls_to_crawl );
+ if ( defined( 'WP_CLI' ) && empty( $this->progress_bar ) ) {
+ $this->progress_bar =
+ \WP_CLI\Utils\make_progress_bar(
+ sprintf(
+ 'Processing URLs %d / %d',
+ 0,
+ CrawlLog::getTotalCrawlableURLs()
+ ),
+ CrawlLog::getTotalCrawlableURLs()
+ );
+ }
- // resave crawl list file, minus those from this batch
- file_put_contents(
- $this->list_of_urls_to_crawl_path,
- implode( "\r\n", $this->urls_to_crawl )
- );
+ if ( ! empty( $this->progress_bar ) ) {
+ $this->progress_bar->setTotal( CrawlLog::getTotalCrawlableURLs() );
+ }
- chmod( $this->list_of_urls_to_crawl_path, 0664 );
+ // TODO: add these to Exclusions table
+ $exclusions = [ 'wp-json' ];
- // TODO: required in saving/copying, but not here? optimize...
- $handle = fopen(
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-CURRENT-ARCHIVE.txt',
- 'r'
+ if ( isset( $this->settings['excludeURLs'] ) ) {
+ $user_exclusions = explode(
+ "\n",
+ str_replace( "\r", '', $this->settings['excludeURLs'] )
);
- if ( ! is_resource( $handle ) ) {
- return;
- }
-
- $line = stream_get_line( $handle, 0 );
-
- if ( ! is_string( $line ) ) {
- return;
- }
-
- $this->archive_dir = $line;
-
- $total_urls_path = $this->settings['wp_uploads_path'] .
- '/WP-STATIC-INITIAL-CRAWL-TOTAL.txt';
-
- // TODO: avoid mutation
- // @codingStandardsIgnoreStart
- if (
- defined( 'CRAWLING_DISCOVERED' ) ||
- ( isset( $_POST['ajax_action'] ) &&
- $_POST['ajax_action'] == 'crawl_again'
- )
- ) {
- $total_urls_path = $this->settings['wp_uploads_path'] .
- '/WP-STATIC-DISCOVERED-URLS-TOTAL.txt';
- }
- // @codingStandardsIgnoreEnd
-
- $total_urls_to_crawl = (int) file_get_contents( $total_urls_path );
-
- if ( defined( 'WP_CLI' ) && empty( $this->progress_bar ) ) {
- $this->progress_bar =
- \WP_CLI\Utils\make_progress_bar( 'Crawling site', $total_urls_to_crawl );
- }
-
- $batch_index = 0;
-
- $exclusions = [ 'wp-json' ];
-
- if ( isset( $this->settings['excludeURLs'] ) ) {
- $user_exclusions = explode(
- "\n",
- str_replace( "\r", '', $this->settings['excludeURLs'] )
- );
-
- $exclusions = array_merge(
- $exclusions,
- $user_exclusions
- );
- }
-
- WsLog::l(
- 'Exclusion rules ' . implode( PHP_EOL, $exclusions )
+ $exclusions = array_merge(
+ $exclusions,
+ $user_exclusions
);
+ }
- foreach ( $batch_of_links_to_crawl as $link_to_crawl ) {
- $this->url = $link_to_crawl;
+ foreach ( $this->urls_to_crawl as $link_to_crawl ) {
+ $this->url = $link_to_crawl;
- $this->full_url = $this->settings['wp_site_url'] .
- ltrim( $this->url, '/' );
+ $this->full_url = $this->settings['wp_site_url'] .
+ ltrim( $this->url, '/' );
- foreach ( $exclusions as $exclusion ) {
- $exclusion = trim( $exclusion );
- if ( $exclusion != '' ) {
- if ( false !== strpos( $this->url, $exclusion ) ) {
- WsLog::l(
- 'Excluding ' . $this->url .
- ' because of rule ' . $exclusion
- );
+ foreach ( $exclusions as $exclusion ) {
+ $exclusion = trim( $exclusion );
+ if ( $exclusion != '' ) {
+ if ( false !== strpos( $this->url, $exclusion ) ) {
+ Logger::l(
+ 'Excluding ' . $this->url .
+ ' because of rule ' . $exclusion
+ );
- if ( ! empty( $this->progress_bar ) ) {
- $this->progress_bar->tick();
- }
+ $url_path = (string) parse_url( $this->url, PHP_URL_PATH );
- // skip the outer foreach loop
+ if ( ! $url_path ) {
+ $this->progressBarTick();
continue 2;
}
- }
- }
- $this->file_extension = $this->getExtensionFromURL();
+ // TODO: dummy status to denote skipped due to exclusion rule
+ CrawlLog::updateStatus( $url_path, 777 );
+ CrawlQueue::removeURL( $url_path );
- if ( $this->loadFileForProcessing() ) {
- $this->saveFile();
+ $this->progressBarTick();
+ continue 2;
+ }
}
+ }
- $batch_index++;
+ $this->file_extension = $this->getExtensionFromURL();
- $completed_urls =
- $total_urls_to_crawl -
- $this->remaining_urls_to_crawl -
- count( $batch_of_links_to_crawl ) +
- $batch_index;
+ if ( $this->loadFileForProcessing() ) {
+ $this->saveFile();
+ }
- ProgressLog::l( $completed_urls, $total_urls_to_crawl );
+ // TODO: get crawl status and remove URL from CrawlQueue
- if ( ! empty( $this->progress_bar ) ) {
- $this->progress_bar->tick();
- }
- }
+ // ProgressLog::l( $completed_urls, $total_urls_to_crawl );
+
+ $this->progressBarTick();
}
$this->checkIfMoreCrawlingNeeded();
@@ -468,33 +263,19 @@ public function loadFileForProcessing() : bool {
$good_response_codes = [ 200, 201, 301, 302, 304 ];
- if ( ! in_array( $status_code, $good_response_codes ) ) {
- WsLog::l(
- 'BAD RESPONSE STATUS (' . $status_code . '): ' . $this->url
- );
+ $url_path = (string) parse_url( $this->url, PHP_URL_PATH );
- file_put_contents(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-404-LOG.txt',
- $status_code . ':' . $this->url . PHP_EOL,
- FILE_APPEND | LOCK_EX
- );
+ if ( ! $url_path ) {
+ return false;
+ }
- chmod(
- $this->settings['wp_uploads_path'] .
- '/WP-STATIC-404-LOG.txt',
- 0664
- );
+ CrawlLog::updateStatus( $url_path, $status_code );
+ CrawlQueue::removeURL( $url_path );
- return false;
- } else {
- file_put_contents(
- $this->crawled_links_file,
- $this->url . PHP_EOL,
- FILE_APPEND | LOCK_EX
- );
+ if ( ! in_array( $status_code, $good_response_codes ) ) {
+ Logger::l( "BAD RESPONSE STATUS ($status_code): $this->url" );
- chmod( $this->crawled_links_file, 0664 );
+ return false;
}
$base_url = $this->settings['baseUrl'];
@@ -552,6 +333,7 @@ public function loadFileForProcessing() : bool {
if ( $processed ) {
$this->processed_file = $processor->getHTML();
+ $this->saveDiscoveredURLs( $processor->getDiscoveredURLs(), $this->full_url );
}
break;
@@ -605,6 +387,7 @@ public function loadFileForProcessing() : bool {
if ( $processed ) {
$this->processed_file = $processor->getCSS();
+ $this->saveDiscoveredURLs( $processor->getDiscoveredURLs(), $this->full_url );
}
break;
@@ -632,19 +415,27 @@ public function loadFileForProcessing() : bool {
break;
}
+ if ( defined( 'WP_CLI' ) ) {
+ \WP_CLI::debug( sprintf( 'Processing %s', $this->url ) );
+ }
+
return true;
}
public function checkIfMoreCrawlingNeeded() : void {
- if ( $this->remaining_urls_to_crawl > 0 ) {
+ $remaining_urls_to_crawl = CrawlQueue::getTotal();
+
+ if ( $remaining_urls_to_crawl > 0 ) {
if ( ! defined( 'WP_CLI' ) ) {
- echo $this->remaining_urls_to_crawl;
+ echo $remaining_urls_to_crawl;
} else {
$this->crawl_site();
}
} else {
if ( ! defined( 'WP_CLI' ) ) {
echo 'SUCCESS';
+ } elseif ( ! empty( $this->progress_bar ) ) {
+ $this->progress_bar->finish();
}
}
}
@@ -658,6 +449,10 @@ public function saveFile() : void {
);
$file_writer->saveFile( $this->archive_dir );
+
+ if ( defined( 'WP_CLI' ) ) {
+ \WP_CLI::debug( sprintf( 'Saved %s', $this->url ) );
+ }
}
public function getExtensionFromURL() : string {
@@ -694,7 +489,7 @@ public function detectFileType() : void {
} elseif ( stripos( $type, 'application/json' ) !== false ) {
$this->file_type = 'json';
} else {
- WsLog::l(
+ Logger::l(
'no filetype inferred from content-type: ' .
$this->curl_content_type .
' url: ' . $this->url
@@ -709,10 +504,35 @@ public function detectFileType() : void {
public function checkForCurlErrors( string $response, $curl_handle ) : void {
if ( ! $response ) {
$response = curl_error( $curl_handle );
- WsLog::l(
+ Logger::l(
'cURL error:' .
stripslashes( $response )
);
}
}
+
+ /**
+ * @param string[] $discovered_urls from a processed page
+ */
+ public function saveDiscoveredURLs( array $discovered_urls, string $parent_page ) : void {
+ if ( ! $discovered_urls ) {
+ return;
+ }
+
+ // get all from CrawlLog
+ $known_urls = CrawlLog::getCrawlablePaths();
+
+ // filter only new URLs
+ $new_urls = array_diff( $discovered_urls, $known_urls );
+
+ if ( ! $new_urls ) {
+ return;
+ }
+
+ $page_path = (string) parse_url( $parent_page, PHP_URL_PATH );
+
+ // TODO: also add new URLs to CrawlLog
+ CrawlLog::addUrls( $new_urls, 'discovered on: ' . $page_path, 0 );
+ CrawlQueue::addUrls( $new_urls );
+ }
}
diff --git a/src/SitePublisher.php b/src/SitePublisher.php
index ca2b4104..009a8122 100755
--- a/src/SitePublisher.php
+++ b/src/SitePublisher.php
@@ -8,18 +8,10 @@ abstract class SitePublisher {
* @var mixed[]
*/
public $settings;
- /**
- * @var string
- */
- public $export_file_list;
/**
* @var string
*/
public $archive_dir;
- /**
- * @var int
- */
- public $batch_index;
/**
* @var int
*/
@@ -28,18 +20,10 @@ abstract class SitePublisher {
* @var int
*/
public $files_remaining;
- /**
- * @var string
- */
- public $deploy_count_path;
/**
* @var mixed[]
*/
public $file_paths_and_hashes;
- /**
- * @var string
- */
- public $previous_hashes_path;
/**
* @var Archive
*/
@@ -65,18 +49,10 @@ public function loadSettings( string $deploy_method ) : void {
public function loadArchive() : void {
$this->archive = new Archive();
- $this->archive->setToCurrentArchive();
}
public function bootstrap() : void {
- $this->export_file_list =
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-FILES-TO-DEPLOY.txt';
-
- $this->archive_dir = (string) file_get_contents(
- $this->settings['wp_uploads_path'] .
- '/WP2STATIC-CURRENT-ARCHIVE.txt'
- );
+ $this->archive_dir = $this->settings['wp_uploads_path'] . '/static-html-output/';
}
public function pauseBetweenAPICalls() : void {
@@ -86,41 +62,21 @@ public function pauseBetweenAPICalls() : void {
}
}
+ // TODO: remove?
public function updateProgress() : void {
- $this->batch_index++;
-
- $completed_urls =
- $this->total_urls_to_crawl -
- $this->files_remaining +
- $this->batch_index;
- ProgressLog::l( $completed_urls, $this->total_urls_to_crawl );
}
+ // TODO: remove?
public function initiateProgressIndicator() : void {
- $this->deploy_count_path = $this->settings['wp_uploads_path'] .
- '/WP-STATIC-TOTAL-FILES-TO-DEPLOY.txt';
- $this->total_urls_to_crawl =
- (int) file_get_contents( $this->deploy_count_path );
- $this->batch_index = 0;
}
public function clearFileList() : void {
- if ( is_file( $this->export_file_list ) ) {
- $f = fopen( $this->export_file_list, 'r+' );
-
- if ( ! is_resource( $f ) ) {
- return;
- }
-
- if ( $f !== false ) {
- ftruncate( $f, 0 );
- fclose( $f );
- }
- }
+ DeployQueue::truncate();
+ // TODO: add case for GitLab
if ( isset( $this->glob_hash_path_list ) ) {
if ( is_file( $this->glob_hash_path_list ) ) {
$f = fopen( $this->glob_hash_path_list, 'r+' );
@@ -244,13 +200,7 @@ public function createDeploymentList( string $dir, bool $basename_in_target ) :
$basename_in_target
);
- file_put_contents(
- $this->export_file_list,
- $local_file_path . ',' . $remote_deployment_path . PHP_EOL,
- FILE_APPEND | LOCK_EX
- );
-
- chmod( $this->export_file_list, 0664 );
+ DeployQueue::addUrl( $local_file_path, $remote_deployment_path );
}
}
}
@@ -263,91 +213,41 @@ public function prepareDeploy( bool $basename_in_target = false ) : void {
$basename_in_target
);
- // TODO: detect and use `cat | wc -l` if available
- $linecount = 0;
- $handle = fopen( $this->export_file_list, 'r' );
-
- if ( ! is_resource( $handle ) ) {
- return;
- }
-
- while ( ! feof( $handle ) ) {
- $line = fgets( $handle );
- $linecount++;
- }
-
- fclose( $handle );
-
- $deploy_count_path = $this->settings['wp_uploads_path'] .
- '/WP-STATIC-TOTAL-FILES-TO-DEPLOY.txt';
-
- file_put_contents(
- $deploy_count_path,
- $linecount,
- LOCK_EX
- );
-
- chmod( $deploy_count_path, 0664 );
-
if ( ! defined( 'WP_CLI' ) ) {
echo 'SUCCESS';
}
}
/**
- * @return string[] list of files to deploy
+ * @return mixed[] pairs of local files and remote deploy paths
*/
public function getItemsToDeploy( int $batch_size = 1 ) : array {
- $lines = [];
+ // $lines = [];
+ $batch_of_links_to_deploy = [];
- $f = fopen( $this->export_file_list, 'r' );
+ $deployable_urls = DeployQueue::getTotalDeployableURLs();
- if ( ! is_resource( $f ) ) {
+ if ( ! $deployable_urls ) {
return [];
}
- for ( $i = 0; $i < $batch_size; $i++ ) {
- $file_list = fgets( $f );
+ // get total DeployQueue
+ // TODO: have duplicate total fetching fns in Crawl, Deploy queues
+ $total_urls = DeployQueue::getTotal();
- if ( ! $file_list ) {
- return [];
- }
+ // get batch size (smaller of total urls or crawl_increment)
+ $batch_size = min( $total_urls, $this->settings['deployBatchSize'] );
- $lines[] = rtrim( $file_list );
- }
-
- fclose( $f );
-
- // TODO: optimize this for just one read, one write within func
- $contents = file( $this->export_file_list, FILE_IGNORE_NEW_LINES );
+ // fetch just amount of URLs needed (limit to crawl_increment)
+ $urls_to_deploy = DeployQueue::getDeployablePaths( $batch_size );
- if ( ! $contents ) {
- return [];
- }
-
- for ( $i = 0; $i < $batch_size; $i++ ) {
- // rewrite file minus the lines we took
- array_shift( $contents );
- }
-
- file_put_contents(
- $this->export_file_list,
- implode( PHP_EOL, $contents )
- );
-
- chmod( $this->export_file_list, 0664 );
-
- return $lines;
+ return $urls_to_deploy;
}
public function getRemainingItemsCount() : int {
- $contents = file( $this->export_file_list, FILE_IGNORE_NEW_LINES );
-
- if ( ! is_array( $contents ) ) {
- return 0;
- }
+ $deployable_urls = DeployQueue::getTotalDeployableURLs();
- return count( $contents );
+ return $deployable_urls;
}
// TODO: rename to signalSuccessfulAction or such
@@ -377,8 +277,8 @@ public function uploadsCompleted() : bool {
* @throws StaticHTMLOutputException
*/
public function handleException( string $e ) : void {
- WsLog::l( 'Deployment: error encountered' );
- WsLog::l( $e );
+ Logger::l( 'Deployment: error encountered' );
+ Logger::l( $e );
throw new StaticHTMLOutputException( $e );
}
@@ -388,7 +288,7 @@ public function handleException( string $e ) : void {
*/
public function checkForValidResponses( int $code, array $good_codes ) : void {
if ( ! in_array( $code, $good_codes ) ) {
- WsLog::l(
+ Logger::l(
'BAD RESPONSE STATUS FROM API (' . $code . ')'
);
@@ -399,47 +299,5 @@ public function checkForValidResponses( int $code, array $good_codes ) : void {
);
}
}
-
- public function openPreviousHashesFile() : void {
- $this->file_paths_and_hashes = [];
-
- if ( is_file( $this->previous_hashes_path ) ) {
- $file = fopen( $this->previous_hashes_path, 'r' );
-
- if ( ! is_resource( $file ) ) {
- return;
- }
-
- while ( ( $line = fgetcsv( $file ) ) !== false ) {
- if ( isset( $line[0] ) && isset( $line[1] ) ) {
- $this->file_paths_and_hashes[ $line[0] ] = $line[1];
- }
- }
-
- fclose( $file );
- }
- }
-
- public function recordFilePathAndHashInMemory(
- string $target_path,
- string $local_file_contents
- ) : void {
- $this->file_paths_and_hashes[ $target_path ] =
- crc32( $local_file_contents );
- }
-
- public function writeFilePathAndHashesToFile() : void {
- $fp = fopen( $this->previous_hashes_path, 'w' );
-
- if ( ! is_resource( $fp ) ) {
- return;
- }
-
- foreach ( $this->file_paths_and_hashes as $key => $value ) {
- fwrite( $fp, $key . ',' . $value . PHP_EOL );
- }
-
- fclose( $fp );
- }
}
diff --git a/src/WsLog.php b/src/WsLog.php
deleted file mode 100755
index 7ba400c2..00000000
--- a/src/WsLog.php
+++ /dev/null
@@ -1,35 +0,0 @@
- 'Not permitted' ], 403 );
+ }
+
+ header( 'Content-Type: text/plain' );
+ status_header( 200 );
+
+ $log_rows = StaticHTMLOutput\Logger::getAll();
+
+ foreach ( $log_rows as $log_row ) {
+ echo "$log_row->time \t $log_row->log \t" . PHP_EOL;
+ }
+
+ die();
+ return null;
+}
+
+if ( $crawl_log ) {
+ if ( ! is_admin() ) {
+ wp_send_json( [ 'message' => 'Not permitted' ], 403 );
+ }
+
+ header( 'Content-Type: text/plain' );
+ status_header( 200 );
+
+ $log_rows = StaticHTMLOutput\CrawlLog::getAll();
+
+ foreach ( $log_rows as $log_row ) {
+ $crawl_status = '';
+
+ if ( ! $log_row->status ) {
+ $crawl_status = 'Pending';
+ } elseif ( $log_row->status === '777' ) {
+ $crawl_status = 'Skipped';
+ } else {
+ $crawl_status = $log_row->status;
+ }
+
+ echo str_pad( $crawl_status, 9 ) . " $log_row->url " .
+ " Note: $log_row->note \t" . PHP_EOL;
+ }
+
+ die();
+ return null;
+}
+
+if ( $crawl_queue ) {
+ if ( ! is_admin() ) {
+ wp_send_json( [ 'message' => 'Not permitted' ], 403 );
+ }
+
+ header( 'Content-Type: text/plain' );
+ status_header( 200 );
+
+ $detected_urls = StaticHTMLOutput\CrawlQueue::getCrawlablePaths();
+
+ echo implode( $detected_urls, PHP_EOL );
+
+ die();
+ return null;
+}
+
+if ( $crawl_progress ) {
+ if ( ! is_admin() ) {
+ wp_send_json( [ 'message' => 'Not permitted' ], 403 );
+ }
+
+ $detected_urls = StaticHTMLOutput\CrawlLog::getTotalCrawlableURLs();
+ $crawled_urls = StaticHTMLOutput\CrawlLog::getTotalCrawledURLs();
+
+ $json_response = [
+ 'detected' => $detected_urls,
+ 'crawled' => $crawled_urls,
+ ];
+
+ wp_send_json( $json_response, 200 );
+}
+
+if ( $deploy_progress ) {
+ if ( ! is_admin() ) {
+ wp_send_json( [ 'message' => 'Not permitted' ], 403 );
+ }
+
+ $remaining_urls = StaticHTMLOutput\DeployQueue::getTotal();
+
+ $json_response = [
+ 'remaining' => $remaining_urls,
+ ];
+
+ wp_send_json( $json_response, 200 );
+}
+
function static_html_output_action_links( $links ) {
$settings_link = 'Settings';
array_unshift( $links, $settings_link );
@@ -46,6 +145,8 @@ function wp_static_html_output_server_side_export() {
0
);
+
+
add_filter(
'plugin_action_links_' . plugin_basename( __FILE__ ),
'static_html_output_action_links'
@@ -63,7 +164,7 @@ function static_html_output_ajax() {
$ajax_method = filter_input( INPUT_POST, 'ajax_action' );
$controller_methods = [
- 'generate_filelist_preview',
+ 'detect_urls',
'prepare_for_export',
'post_process_archive_dir',
'finalize_deployment',
@@ -80,10 +181,6 @@ function static_html_output_ajax() {
return null;
} elseif ( strpos( $ajax_method, 'crawl' ) !== false ) {
$class = new StaticHTMLOutput\SiteCrawler();
- // crawl_again is used to detemine 2nd run of crawling
- if ( $ajax_method === 'crawl_again' ) {
- $ajax_method = 'crawl_discovered_links';
- }
} elseif ( strpos( $ajax_method, 'bitbucket' ) !== false ) {
$class = new StaticHTMLOutput\BitBucket();
@@ -112,7 +209,6 @@ function static_html_output_ajax() {
case 'gitlab_prepare_export':
$class->bootstrap();
$class->loadArchive();
- $class->getListOfFilesInRepo();
$class->prepareDeploy( true );
$class->createGitLabPagesConfig();
break;
diff --git a/statichtmloutput.css b/statichtmloutput.css
index fe34f432..ec4a83a8 100755
--- a/statichtmloutput.css
+++ b/statichtmloutput.css
@@ -47,12 +47,12 @@ div.postbox div.inside {
width: 95%;
}
-#exportStatus {
- width: 100%;
+#export_log_textarea {
+ font-family: monospace;
}
-#progress {
- display: none;
+#exportStatus {
+ width: 100%;
}
#progress-container {
@@ -90,7 +90,7 @@ div.postbox div.inside {
height: 18px;
width: 18px;
position: relative;
- display: inline-block;
+ display: none;
margin-top: 0px;
margin-right: 10px;
text-align: center;
diff --git a/tests/CSSProcessorTest.php b/tests/CSSProcessorTest.php
index fd6adfbd..84adbd6e 100755
--- a/tests/CSSProcessorTest.php
+++ b/tests/CSSProcessorTest.php
@@ -9,16 +9,15 @@ final class CSSProcessorTest extends TestCase {
/**
* @covers StaticHTMLOutput\CSSProcessor::__construct
- * @covers StaticHTMLOutput\CSSProcessor::processCSS
* @covers StaticHTMLOutput\CSSProcessor::addDiscoveredURL
- * @covers StaticHTMLOutput\CSSProcessor::detectIfURLsShouldBeHarvested
* @covers StaticHTMLOutput\CSSProcessor::getCSS
* @covers StaticHTMLOutput\CSSProcessor::getProtocolRelativeURL
* @covers StaticHTMLOutput\CSSProcessor::getTargetSiteProtocol
* @covers StaticHTMLOutput\CSSProcessor::isInternalLink
- * @covers StaticHTMLOutput\CSSProcessor::rewriteSiteURLsToPlaceholder
- * @covers StaticHTMLOutput\CSSProcessor::writeDiscoveredURLs
+ * @covers StaticHTMLOutput\CSSProcessor::isValidURL
+ * @covers StaticHTMLOutput\CSSProcessor::processCSS
* @covers StaticHTMLOutput\CSSProcessor::rewritePlaceholderURLsToDestination
+ * @covers StaticHTMLOutput\CSSProcessor::rewriteSiteURLsToPlaceholder
* @dataProvider cssSampleContents
*/
@@ -42,7 +41,7 @@ public function testParsingStylesheets( string $raw_css, string $parsed_css ) {
false, // $remove_wp_meta = false
'', // $rewrite_rules = false
'https://deploysite.com/', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localsite.com/', // $wp_site_url
'/tmp/' // $wp_uploads_path
);
diff --git a/tests/HTMLProcessorTest.php b/tests/HTMLProcessorTest.php
index 1c6e4602..4a66f691 100755
--- a/tests/HTMLProcessorTest.php
+++ b/tests/HTMLProcessorTest.php
@@ -42,7 +42,7 @@ public function testDetectsInternalLink( $link, $expectation ) {
false, // $remove_wp_meta = false
'', // $rewrite_rules = false
'', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'', // $wp_site_url
'' // $wp_uploads_path
);
@@ -140,7 +140,7 @@ public function testNormalizePartialURLInAnchor(
false, // $remove_wp_meta = false
'', // $rewrite_rules = false
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://mywpsite.com', // $wp_site_url
'' // $wp_uploads_path
);
@@ -213,7 +213,7 @@ public function testRewritingSiteURLsToPlaceholder(
false, // $remove_wp_meta = false
'', // $rewrite_rules = false
'', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
$site_url, // $wp_site_url
'' // $wp_uploads_path
);
@@ -289,45 +289,43 @@ public function rewritePlaceholdersProvider() {
/**
* @covers StaticHTMLOutput\HTMLProcessor::__construct
+ * @covers StaticHTMLOutput\CSSProcessor::__construct
+ * @covers StaticHTMLOutput\CSSProcessor::addDiscoveredURL
+ * @covers StaticHTMLOutput\CSSProcessor::getCSS
+ * @covers StaticHTMLOutput\CSSProcessor::getProtocolRelativeURL
+ * @covers StaticHTMLOutput\CSSProcessor::getTargetSiteProtocol
+ * @covers StaticHTMLOutput\CSSProcessor::isInternalLink
+ * @covers StaticHTMLOutput\CSSProcessor::isValidURL
+ * @covers StaticHTMLOutput\CSSProcessor::processCSS
+ * @covers StaticHTMLOutput\CSSProcessor::rewritePlaceholderURLsToDestination
+ * @covers StaticHTMLOutput\CSSProcessor::rewriteSiteURLsToPlaceholder
+ * @covers StaticHTMLOutput\HTMLProcessor::addDiscoveredURL
* @covers StaticHTMLOutput\HTMLProcessor::detectEscapedSiteURLs
- * @covers StaticHTMLOutput\HTMLProcessor::detectIfURLsShouldBeHarvested
* @covers StaticHTMLOutput\HTMLProcessor::detectUnchangedPlaceholderURLs
+ * @covers StaticHTMLOutput\HTMLProcessor::forceHTTPS
+ * @covers StaticHTMLOutput\HTMLProcessor::getBaseURLRewritePatterns
* @covers StaticHTMLOutput\HTMLProcessor::getHTML
* @covers StaticHTMLOutput\HTMLProcessor::getProtocolRelativeURL
* @covers StaticHTMLOutput\HTMLProcessor::getTargetSiteProtocol
- * @covers StaticHTMLOutput\HTMLProcessor::processHTML
- * @covers StaticHTMLOutput\HTMLProcessor::rewriteSiteURLsToPlaceholder
- * @covers StaticHTMLOutput\HTMLProcessor::stripHTMLComments
- * @covers StaticHTMLOutput\HTMLProcessor::writeDiscoveredURLs
- * @covers StaticHTMLOutput\HTMLProcessor::rewriteBaseURL
- * @covers StaticHTMLOutput\HTMLProcessor::addDiscoveredURL
- * @covers StaticHTMLOutput\HTMLProcessor::getBaseURLRewritePatterns
* @covers StaticHTMLOutput\HTMLProcessor::isInternalLink
+ * @covers StaticHTMLOutput\HTMLProcessor::isValidURL
* @covers StaticHTMLOutput\HTMLProcessor::normalizeURL
- * @covers StaticHTMLOutput\HTMLProcessor::processHead
- * @covers StaticHTMLOutput\HTMLProcessor::processLink
- * @covers StaticHTMLOutput\HTMLProcessor::processMeta
- * @covers StaticHTMLOutput\HTMLProcessor::rewriteWPPaths
- * @covers StaticHTMLOutput\HTMLProcessor::forceHTTPS
- * @covers StaticHTMLOutput\HTMLProcessor::processImage
- * @covers StaticHTMLOutput\HTMLProcessor::processImageSrcSet
* @covers StaticHTMLOutput\HTMLProcessor::processAnchor
* @covers StaticHTMLOutput\HTMLProcessor::processGenericHref
* @covers StaticHTMLOutput\HTMLProcessor::processGenericSrc
- * @covers StaticHTMLOutput\HTMLProcessor::rewriteEncodedSiteURLAndHostName
+ * @covers StaticHTMLOutput\HTMLProcessor::processHTML
+ * @covers StaticHTMLOutput\HTMLProcessor::processHead
+ * @covers StaticHTMLOutput\HTMLProcessor::processImage
+ * @covers StaticHTMLOutput\HTMLProcessor::processImageSrcSet
+ * @covers StaticHTMLOutput\HTMLProcessor::processLink
+ * @covers StaticHTMLOutput\HTMLProcessor::processMeta
* @covers StaticHTMLOutput\HTMLProcessor::processStyle
* @covers StaticHTMLOutput\HTMLProcessor::processStyleAttribute
- * @covers StaticHTMLOutput\CSSProcessor::__construct
- * @covers StaticHTMLOutput\CSSProcessor::addDiscoveredURL
- * @covers StaticHTMLOutput\CSSProcessor::detectIfURLsShouldBeHarvested
- * @covers StaticHTMLOutput\CSSProcessor::getCSS
- * @covers StaticHTMLOutput\CSSProcessor::getProtocolRelativeURL
- * @covers StaticHTMLOutput\CSSProcessor::getTargetSiteProtocol
- * @covers StaticHTMLOutput\CSSProcessor::isInternalLink
- * @covers StaticHTMLOutput\CSSProcessor::processCSS
- * @covers StaticHTMLOutput\CSSProcessor::rewritePlaceholderURLsToDestination
- * @covers StaticHTMLOutput\CSSProcessor::rewriteSiteURLsToPlaceholder
- * @covers StaticHTMLOutput\CSSProcessor::writeDiscoveredURLs
+ * @covers StaticHTMLOutput\HTMLProcessor::rewriteBaseURL
+ * @covers StaticHTMLOutput\HTMLProcessor::rewriteEncodedSiteURLAndHostName
+ * @covers StaticHTMLOutput\HTMLProcessor::rewriteSiteURLsToPlaceholder
+ * @covers StaticHTMLOutput\HTMLProcessor::rewriteWPPaths
+ * @covers StaticHTMLOutput\HTMLProcessor::stripHTMLComments
* @dataProvider processHTMLProvider
*/
public function testProcessHTML(
@@ -370,7 +368,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localhost:4040', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://mywpsite.com/a-page/',
@@ -384,7 +382,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localhost:4040', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://mywpsite.com/a-page/',
@@ -398,7 +396,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localhost:4040', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://mywpsite.com/a-page/',
@@ -412,7 +410,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://mydomain.com', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://mydomain.com/',
@@ -426,7 +424,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://mydomain.com', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://mydomain.com/',
@@ -440,7 +438,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localhost', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://localhost/',
@@ -454,7 +452,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'http://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localhost', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://localhost/',
@@ -468,7 +466,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localhost:4444', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://localhost:4444/',
@@ -482,7 +480,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localhost:4444', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://localhost:4444/',
@@ -496,7 +494,7 @@ public function processHTMLProvider() {
false, // $remove_wp_meta = false
'', // $rewrite_rules = ''
'https://mynewdomain.com', // $base_url
- '', // $selected_deployment_option = 'folder'
+ '', // $selected_deployment_option = 'zip'
'http://localhost:4444', // $wp_site_url
'/tmp/', // $wp_uploads_path - temp write file during test while refactoring
'http://localhost:4444/',
diff --git a/tools/V6Cleanup.php b/tools/V6Cleanup.php
index 72717e8b..25a8c444 100755
--- a/tools/V6Cleanup.php
+++ b/tools/V6Cleanup.php
@@ -9,7 +9,7 @@ public static function cleanup() : void {
$deleted_v6_options = delete_option( 'wp2static-options' );
if ( $deleted_v6_options ) {
- WsLog::l( 'Deleted Version 6 options from DB' );
+ Logger::l( 'Deleted Version 6 options from DB' );
}
$v6_txt_files = [
@@ -32,7 +32,7 @@ public static function cleanup() : void {
$deleted_file = unlink( SiteInfo::getPath( 'uploads' ) . $txt_file );
if ( $deleted_file ) {
- WsLog::l( 'Deleted Version 6 text file: ' . $txt_file );
+ Logger::l( 'Deleted Version 6 text file: ' . $txt_file );
}
}
}
@@ -44,7 +44,7 @@ public static function cleanup() : void {
$deleted_zip = unlink( $v6_zip_file );
if ( $deleted_zip ) {
- WsLog::l( 'Deleted Version 6 zip file: ' . $v6_zip_file );
+ Logger::l( 'Deleted Version 6 zip file: ' . $v6_zip_file );
}
}
}
@@ -54,7 +54,7 @@ public static function cleanup() : void {
if ( is_array( $v6_archives ) ) {
foreach ( $v6_archives as $v6_archive ) {
if ( is_dir( $v6_archive ) ) {
- WsLog::l( 'Deleting Version 6 archive: ' . $v6_archive );
+ Logger::l( 'Deleting Version 6 archive: ' . $v6_archive );
FilesHelper::delete_dir_with_files( $v6_archive );
}
}
diff --git a/views/crawl-queue-page.php b/views/crawl-queue-page.php
new file mode 100755
index 00000000..02573385
--- /dev/null
+++ b/views/crawl-queue-page.php
@@ -0,0 +1,23 @@
+
+
+
| URLs in Crawl Queue | +
|---|
| Crawl queue is empty. | +
| + |
| - - | -
- displayTextfield($this, 'baseUrl-folder', 'http://mystaticsite.com', '', ''); ?> - - Set this to the URL your visitors will use to access your site. - - |
-
|---|---|
| - - | -
- - - By exporting to a directory on your current server, you can check how it will look when published and make any adjustments needed. If you put this in a publicly accessible path and the links have been rewritten to support it, you may use this method to easily preview your static site without needing to leave your browser. - -As a safeguard, this plugin will only allow you to export to a new directory, an empty directory, or one that contains a file named |
-
This is set to 1, by default, in order to allow exporting on low-resource environments, such as shared hosting servers. Each increment is the amount of files the server will try to process on each request that the browser sends it. Incrementing this will speed up your exports, by processing more are a time. If your export is failing, due to resource (memory, CPU) limits being reached, try setting this to a lower number.
+This is set to 5, by default, in order to allow exporting on low-resource environments, such as shared hosting servers. Each increment is the amount of files the server will try to process on each request that the browser sends it. Incrementing this will speed up your exports, by processing more are a time. If your export is failing, due to resource (memory, CPU) limits being reached, try setting this to a lower number.
total_detected_urls; ?> + + URLs were detected on your site that will be used to initiate the crawl. Other URLs will be discovered while crawling.
+Your ZIP file will be available for download at URL or on your filesystem at PATH
+ +Your uncompressed static site files will also be available on your filesystem at PATH
+