Skip to content

Commit d84d77c

Browse files
committed
wip CrawlQueue;CrawlLog
1 parent f2dca0e commit d84d77c

File tree

8 files changed

+334
-108
lines changed

8 files changed

+334
-108
lines changed

src/CSSProcessor.php

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -431,19 +431,13 @@ public function writeDiscoveredURLs() : void {
431431
}
432432
}
433433

434-
file_put_contents(
435-
$this->wp_uploads_path .
436-
'/WP-STATIC-DISCOVERED-URLS.txt',
437-
PHP_EOL .
438-
implode( PHP_EOL, array_unique( $this->discovered_urls ) ),
439-
FILE_APPEND | LOCK_EX
440-
);
434+
// TODO: check for existing URLs in CrawlLog and only add non-processed to CrawlQueue
435+
$unique_urls = array_unique( $this->discovered_urls );
436+
array_filter( $unique_urls );
437+
sort( $unique_urls );
441438

442-
chmod(
443-
$this->wp_uploads_path .
444-
'/WP-STATIC-DISCOVERED-URLS.txt',
445-
0664
446-
);
439+
// TODO: also add new URLs to CrawlLog
440+
CrawlQueue::addUrls( $unique_urls );
447441
}
448442

449443
public function isValidURL( string $url ) : bool {

src/Controller.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ public function setDefaultOptions() : void {
110110

111111
public function activate_for_single_site() : void {
112112
$this->setDefaultOptions();
113+
CrawlQueue::createTable();
114+
CrawlLog::createTable();
115+
Exclusions::createTable();
113116
}
114117

115118
/**

src/CrawlLog.php

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
<?php
2+
3+
namespace StaticHTMLOutput;
4+
5+
class CrawlLog {
6+
7+
public static function createTable() : void {
8+
global $wpdb;
9+
10+
$table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
11+
12+
$charset_collate = $wpdb->get_charset_collate();
13+
14+
/**
15+
* Detected/discovered URLs added with initial status of 0
16+
* and will be updated with response code after crawling
17+
*/
18+
$sql = "CREATE TABLE $table_name (
19+
id mediumint(9) NOT NULL AUTO_INCREMENT,
20+
url VARCHAR(2083) NOT NULL,
21+
note TEXT NOT NULL,
22+
status SMALLINT DEFAULT 0 NOT NULL,
23+
PRIMARY KEY (id)
24+
) $charset_collate;";
25+
26+
require_once ABSPATH . 'wp-admin/includes/upgrade.php';
27+
dbDelta( $sql );
28+
}
29+
30+
/**
31+
* Add all Urls to log
32+
*
33+
* @param string[] $urls List of URLs to log info for
34+
*/
35+
public static function addUrls( array $urls, string $note, int $status = 0 ) : void {
36+
global $wpdb;
37+
38+
$table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
39+
40+
$placeholders = [];
41+
$values = [];
42+
43+
foreach ( $urls as $url ) {
44+
if ( ! $url ) {
45+
continue;
46+
}
47+
48+
$placeholders[] = '(%s)';
49+
$values[] = rawurldecode( $url );
50+
$placeholders[] = '(%s)';
51+
$values[] = $note;
52+
$placeholders[] = '(%d)';
53+
$values[] = $status;
54+
}
55+
56+
$query_string =
57+
'INSERT INTO ' . $table_name . ' (url) VALUES ' .
58+
implode( ', ', $placeholders );
59+
$query = $wpdb->prepare( $query_string, $values );
60+
61+
$wpdb->query( $query );
62+
}
63+
64+
/**
65+
* Get all crawlable URLs
66+
*
67+
* @return string[] All crawlable URLs
68+
*/
69+
public static function getCrawlablePaths() : array {
70+
global $wpdb;
71+
$urls = [];
72+
73+
$table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
74+
75+
$rows = $wpdb->get_results( "SELECT url FROM $table_name ORDER by url ASC" );
76+
77+
foreach ( $rows as $row ) {
78+
$urls[] = $row->url;
79+
}
80+
81+
return $urls;
82+
}
83+
84+
/**
85+
* Get total crawlable URLs
86+
*
87+
* @return int Total crawlable URLs
88+
*/
89+
public static function getTotalCrawlableURLs() : int {
90+
global $wpdb;
91+
92+
$table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
93+
94+
$total_crawl_log = $wpdb->get_var( "SELECT COUNT(*) FROM $table_name" );
95+
96+
return $total_crawl_log;
97+
}
98+
99+
/**
100+
* Clear CrawlQueue via truncate or deletion
101+
*/
102+
public static function truncate() : void {
103+
WsLog::l( 'Deleting CrawlQueue (Detected URLs)' );
104+
105+
global $wpdb;
106+
107+
$table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
108+
109+
$wpdb->query( "TRUNCATE TABLE $table_name" );
110+
111+
$total_crawl_log = self::getTotalCrawlableURLs();
112+
113+
if ( $total_crawl_log > 0 ) {
114+
WsLog::l( 'failed to truncate CrawlQueue: try deleting instead' );
115+
}
116+
}
117+
118+
/**
119+
* Count URLs in Crawl Queue
120+
*/
121+
public static function getTotal() : int {
122+
global $wpdb;
123+
124+
$table_name = $wpdb->prefix . 'statichtmloutput_crawl_log';
125+
126+
$total = $wpdb->get_var( "SELECT count(*) FROM $table_name" );
127+
128+
return $total;
129+
}
130+
}

src/CrawlQueue.php

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
<?php
2+
3+
namespace StaticHTMLOutput;
4+
5+
class CrawlQueue {
6+
7+
public static function createTable() : void {
8+
global $wpdb;
9+
10+
$table_name = $wpdb->prefix . 'statichtmloutput_urls';
11+
12+
$charset_collate = $wpdb->get_charset_collate();
13+
14+
$sql = "CREATE TABLE $table_name (
15+
id mediumint(9) NOT NULL AUTO_INCREMENT,
16+
url VARCHAR(2083) NOT NULL,
17+
PRIMARY KEY (id)
18+
) $charset_collate;";
19+
20+
require_once ABSPATH . 'wp-admin/includes/upgrade.php';
21+
dbDelta( $sql );
22+
}
23+
24+
/**
25+
* Add all Urls to queue
26+
*
27+
* @param string[] $urls List of URLs to crawl
28+
*/
29+
public static function addUrls( array $urls ) : void {
30+
global $wpdb;
31+
32+
$table_name = $wpdb->prefix . 'statichtmloutput_urls';
33+
34+
$placeholders = [];
35+
$values = [];
36+
37+
foreach ( $urls as $url ) {
38+
if ( ! $url ) {
39+
continue;
40+
}
41+
42+
$placeholders[] = '(%s)';
43+
$values[] = rawurldecode( $url );
44+
}
45+
46+
$query_string =
47+
'INSERT INTO ' . $table_name . ' (url) VALUES ' .
48+
implode( ', ', $placeholders );
49+
$query = $wpdb->prepare( $query_string, $values );
50+
51+
$wpdb->query( $query );
52+
}
53+
54+
/**
55+
* Get all crawlable URLs
56+
*
57+
* @return string[] All crawlable URLs
58+
*/
59+
public static function getCrawlablePaths() : array {
60+
global $wpdb;
61+
$urls = [];
62+
63+
$table_name = $wpdb->prefix . 'statichtmloutput_urls';
64+
65+
$rows = $wpdb->get_results( "SELECT url FROM $table_name ORDER by url ASC" );
66+
67+
foreach ( $rows as $row ) {
68+
$urls[] = $row->url;
69+
}
70+
71+
return $urls;
72+
}
73+
74+
/**
75+
* Get total crawlable URLs
76+
*
77+
* @return int Total crawlable URLs
78+
*/
79+
public static function getTotalCrawlableURLs() : int {
80+
global $wpdb;
81+
82+
$table_name = $wpdb->prefix . 'statichtmloutput_urls';
83+
84+
$total_urls = $wpdb->get_var( "SELECT COUNT(*) FROM $table_name" );
85+
86+
return $total_urls;
87+
}
88+
89+
/**
90+
* Clear CrawlQueue via truncate or deletion
91+
*/
92+
public static function truncate() : void {
93+
WsLog::l( 'Deleting CrawlQueue (Detected URLs)' );
94+
95+
global $wpdb;
96+
97+
$table_name = $wpdb->prefix . 'statichtmloutput_urls';
98+
99+
$wpdb->query( "TRUNCATE TABLE $table_name" );
100+
101+
$total_urls = self::getTotalCrawlableURLs();
102+
103+
if ( $total_urls > 0 ) {
104+
WsLog::l( 'failed to truncate CrawlQueue: try deleting instead' );
105+
}
106+
}
107+
108+
/**
109+
* Count URLs in Crawl Queue
110+
*/
111+
public static function getTotal() : int {
112+
global $wpdb;
113+
114+
$table_name = $wpdb->prefix . 'statichtmloutput_urls';
115+
116+
$total = $wpdb->get_var( "SELECT count(*) FROM $table_name" );
117+
118+
return $total;
119+
}
120+
}

src/Exclusions.php

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?php
2+
3+
namespace StaticHTMLOutput;
4+
5+
class Exclusions {
6+
7+
public static function createTable() : void {
8+
global $wpdb;
9+
10+
$table_name = $wpdb->prefix . 'statichtmloutput_exclusions';
11+
12+
$charset_collate = $wpdb->get_charset_collate();
13+
14+
$sql = "CREATE TABLE $table_name (
15+
id mediumint(9) NOT NULL AUTO_INCREMENT,
16+
pattern VARCHAR(2083) NOT NULL,
17+
PRIMARY KEY (id)
18+
) $charset_collate;";
19+
20+
require_once ABSPATH . 'wp-admin/includes/upgrade.php';
21+
dbDelta( $sql );
22+
}
23+
24+
/**
25+
* Add all Urls to queue
26+
*
27+
* @param string[] $urls List of URLs to crawl
28+
*/
29+
public static function addPatterns( array $patterns ) : void {
30+
global $wpdb;
31+
32+
$table_name = $wpdb->prefix . 'statichtmloutput_exclusions';
33+
34+
$placeholders = [];
35+
$values = [];
36+
37+
foreach ( $patterns as $pattern ) {
38+
if ( ! $pattern ) {
39+
continue;
40+
}
41+
42+
$placeholders[] = '(%s)';
43+
$values[] = $pattern;
44+
}
45+
46+
$query_string =
47+
'INSERT INTO ' . $table_name . ' (pattern) VALUES ' .
48+
implode( ', ', $placeholders );
49+
$query = $wpdb->prepare( $query_string, $values );
50+
51+
$wpdb->query( $query );
52+
}
53+
54+
}

0 commit comments

Comments
 (0)