diff --git a/src/CLI.php b/src/CLI.php index 28d7efc3..4ed7a360 100755 --- a/src/CLI.php +++ b/src/CLI.php @@ -100,14 +100,13 @@ public function generate() : void { $plugin = Controller::getInstance(); WP_CLI::log( 'Generating file list' ); - $plugin->generate_filelist_preview(); + $plugin->detect_urls(); WP_CLI::log( 'Preparing for export' ); $plugin->prepare_for_export(); $site_crawler = new SiteCrawler(); $site_crawler->crawl_site(); - $site_crawler->crawl_discovered_links(); WP_CLI::log( 'Performing post process actions' ); $plugin->post_process_archive_dir(); diff --git a/src/FilesHelper.php b/src/FilesHelper.php index a20c5c60..0c04b75b 100755 --- a/src/FilesHelper.php +++ b/src/FilesHelper.php @@ -389,14 +389,11 @@ public static function getAllWPPostURLs( string $wp_site_url ) : array { case 'page': $permalink = get_page_link( $post->ID ); break; - case 'post': - $permalink = get_permalink( $post->ID ); - break; case 'attachment': $permalink = get_attachment_link( $post->ID ); break; default: - $permalink = get_post_permalink( $post->ID ); + $permalink = get_permalink( $post->ID ); break; } } diff --git a/src/HTMLProcessor.php b/src/HTMLProcessor.php index 869f0cfa..7d97263f 100755 --- a/src/HTMLProcessor.php +++ b/src/HTMLProcessor.php @@ -620,7 +620,7 @@ public function addDiscoveredURL( string $url ) : void { if ( $this->isInternalLink( (string) $url ) ) { $path = (string) parse_url( (string) $url, PHP_URL_PATH ); - if ( $path[0] !== '/' ) { + if ( empty( $path ) || $path[0] !== '/' ) { return; } diff --git a/src/SiteCrawler.php b/src/SiteCrawler.php index 9cda4a35..57dc015a 100755 --- a/src/SiteCrawler.php +++ b/src/SiteCrawler.php @@ -94,9 +94,19 @@ public function crawl_site() : void { } else { if ( ! defined( 'WP_CLI' ) ) { echo 'SUCCESS'; + } else if ( !empty( $this->progress_bar ) ) { + $this->progress_bar->finish(); } } } + + public function progressBarTick() : void { + if ( empty( $this->progress_bar ) ) { + return; + } + + $this->progress_bar->tick( 1, sprintf( 'Processing URLs %d / %d', filter_var( $this->progress_bar->current(), FILTER_SANITIZE_NUMBER_INT ) + 1, CrawlLog::getTotalCrawlableURLs() ) ); + } public function crawlABitMore() : void { $batch_of_links_to_crawl = []; @@ -118,11 +128,14 @@ public function crawlABitMore() : void { $this->archive_dir = $this->settings['wp_uploads_path'] . '/static-html-output/'; - // TODO: modify this to show Detected / Crawled URL progress - // if ( defined( 'WP_CLI' ) && empty( $this->progress_bar ) ) { - // $this->progress_bar = - // \WP_CLI\Utils\make_progress_bar( 'Crawling site', $total_urls_to_crawl ); - // } + if ( defined( 'WP_CLI' ) && empty( $this->progress_bar ) ) { + $this->progress_bar = + \WP_CLI\Utils\make_progress_bar( sprintf( 'Processing URLs %d / %d', 0, CrawlLog::getTotalCrawlableURLs() ), CrawlLog::getTotalCrawlableURLs() ); + } + + if ( ! empty( $this->progress_bar ) ) { + $this->progress_bar->setTotal( CrawlLog::getTotalCrawlableURLs() ); + } // TODO: add these to Exclusions table $exclusions = [ 'wp-json' ]; @@ -157,6 +170,7 @@ public function crawlABitMore() : void { $url_path = (string) parse_url( $this->url, PHP_URL_PATH ); if ( ! $url_path ) { + $this->progressBarTick(); continue 2; } @@ -164,12 +178,7 @@ public function crawlABitMore() : void { CrawlLog::updateStatus( $url_path, 777 ); CrawlQueue::removeURL( $url_path ); - // TODO: reimplement progress bar - // if ( ! empty( $this->progress_bar ) ) { - // $this->progress_bar->tick(); - // } - - // skip the outer foreach loop + $this->progressBarTick(); continue 2; } } @@ -185,10 +194,7 @@ public function crawlABitMore() : void { // ProgressLog::l( $completed_urls, $total_urls_to_crawl ); - // TODO: reimplement progress bar - // if ( ! empty( $this->progress_bar ) ) { - // $this->progress_bar->tick(); - // } + $this->progressBarTick(); } $this->checkIfMoreCrawlingNeeded(); @@ -393,6 +399,10 @@ public function loadFileForProcessing() : bool { break; } + if ( defined( 'WP_CLI' ) ) { + \WP_CLI::debug( sprintf( 'Processing %s', $this->url ) ); + } + return true; } @@ -408,6 +418,8 @@ public function checkIfMoreCrawlingNeeded() : void { } else { if ( ! defined( 'WP_CLI' ) ) { echo 'SUCCESS'; + } else if ( !empty( $this->progress_bar ) ) { + $this->progress_bar->finish(); } } } @@ -421,6 +433,10 @@ public function saveFile() : void { ); $file_writer->saveFile( $this->archive_dir ); + + if ( defined( 'WP_CLI' ) ) { + \WP_CLI::debug( sprintf( 'Saved %s', $this->url ) ); + } } public function getExtensionFromURL() : string {