diff --git a/tests/bootstrap.php b/tests/bootstrap.php index ec051bc1..2bf7f86b 100644 --- a/tests/bootstrap.php +++ b/tests/bootstrap.php @@ -9,6 +9,7 @@ require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-token.php'; require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-lexer.php'; require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-parser.php'; +require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-naive-query-stream.php'; require_once __DIR__ . '/../wp-includes/sqlite/class-wp-sqlite-query-rewriter.php'; require_once __DIR__ . '/../wp-includes/sqlite/class-wp-sqlite-lexer.php'; require_once __DIR__ . '/../wp-includes/sqlite/class-wp-sqlite-token.php'; diff --git a/tests/mysql/WP_MySQL_Naive_Query_Stream_Tests.php b/tests/mysql/WP_MySQL_Naive_Query_Stream_Tests.php new file mode 100644 index 00000000..ac2aedaa --- /dev/null +++ b/tests/mysql/WP_MySQL_Naive_Query_Stream_Tests.php @@ -0,0 +1,110 @@ +append_sql( 'SELECT id FROM users;' ); + $this->assertTrue( $stream->next_query() ); + $this->assertSame( 'SELECT id FROM users;', $stream->get_query() ); + } + + public function test_next_query_returns_false_if_the_input_is_incomplete(): void { + $stream = new WP_MySQL_Naive_Query_Stream(); + $stream->append_sql( 'SELECT id FROM users' ); + $this->assertFalse( $stream->next_query() ); + } + + public function test_next_query_returns_true_if_the_input_is_complete_but_undelimited(): void { + $stream = new WP_MySQL_Naive_Query_Stream(); + $stream->append_sql( 'SELECT id FROM users' ); + $stream->mark_input_complete(); + $this->assertTrue( $stream->next_query() ); + $this->assertSame( 'SELECT id FROM users', $stream->get_query() ); + } + + public function test_next_query_parses_multiple_queries_with_even_appends(): void { + $stream = new WP_MySQL_Naive_Query_Stream(); + $stream->append_sql( 'SELECT id FROM users; SELECT name FROM users2;' ); + + $this->assertTrue( $stream->next_query() ); + $this->assertSame( 'SELECT id FROM users;', $stream->get_query() ); + + $this->assertTrue( $stream->next_query() ); + $this->assertSame( ' SELECT name FROM users2;', $stream->get_query() ); + + $this->assertFalse( $stream->next_query() ); + + $stream->append_sql( 'SELECT name FROM users3;' ); + $this->assertTrue( $stream->next_query() ); + $this->assertSame( 'SELECT name FROM users3;', $stream->get_query() ); + + $this->assertFalse( $stream->next_query() ); + } + + public function test_next_query_parses_multiple_queries_with_uneven_appends(): void { + $stream = new WP_MySQL_Naive_Query_Stream(); + $stream->append_sql( 'SELECT id FROM ' ); + + $this->assertFalse( $stream->next_query() ); + + $stream->append_sql( 'users; SELECT name ' ); + $this->assertTrue( $stream->next_query() ); + $this->assertSame( 'SELECT id FROM users;', $stream->get_query() ); + + $this->assertFalse( $stream->next_query() ); + $stream->append_sql( ', id FROM users2; INSERT' ); + $this->assertTrue( $stream->next_query() ); + $this->assertSame( ' SELECT name , id FROM users2;', $stream->get_query() ); + + $this->assertFalse( $stream->next_query() ); + + $stream->append_sql( ' INTO users3 VALUES (1, 2)' ); + $stream->mark_input_complete(); + $this->assertTrue( $stream->next_query() ); + $this->assertSame( ' INSERT INTO users3 VALUES (1, 2)', $stream->get_query() ); + } + + public function test_next_query_parses_queries_with_trailing_block_comments_included(): void { + $stream = new WP_MySQL_Naive_Query_Stream(); + $stream->append_sql( 'SELECT id FROM users /* foo */' ); + $stream->mark_input_complete(); + + $this->assertTrue( $stream->next_query() ); + $this->assertSame( 'SELECT id FROM users /* foo */', $stream->get_query() ); + + $this->assertFalse( $stream->next_query() ); + } + + public function test_next_query_parses_queries_with_trailing_block_comments_excluded(): void { + $stream = new WP_MySQL_Naive_Query_Stream(); + $stream->append_sql( 'SELECT id FROM users; /* foo */' ); + $stream->mark_input_complete(); + + $this->assertTrue( $stream->next_query() ); + $this->assertSame( 'SELECT id FROM users;', $stream->get_query() ); + + $this->assertFalse( $stream->next_query() ); + $this->assertEquals(WP_MySQL_Naive_Query_Stream::STATE_FINISHED, $stream->get_state()); + } + + public function test_treats_too_large_input_as_a_syntax_error(): void { + $five_megabytes = str_repeat( 'lorem ', 1024 * 1024 ); + + $stream = new WP_MySQL_Naive_Query_Stream(); + $stream->append_sql( $five_megabytes ); + $this->assertFalse( $stream->next_query() ); + $this->assertEquals(WP_MySQL_Naive_Query_Stream::STATE_SYNTAX_ERROR, $stream->get_state()); + } + + public function test_next_query_returns_false_if_the_input_has_a_syntax_error(): void { + $this->markTestSkipped('This test is expected to fail because the naive query stream doesn\'t understand what a valid query is. It\'s just a heuristic that works for most cases.'); + + $stream = new WP_MySQL_Naive_Query_Stream(); + $stream->append_sql( 'SELECT id FROM users WHERE id = ihj' ); + $stream->mark_input_complete(); + $this->assertFalse( $stream->next_query() ); + } +} diff --git a/wp-includes/mysql/class-wp-mysql-naive-query-stream.php b/wp-includes/mysql/class-wp-mysql-naive-query-stream.php new file mode 100644 index 00000000..0349e5df --- /dev/null +++ b/wp-includes/mysql/class-wp-mysql-naive-query-stream.php @@ -0,0 +1,165 @@ +append_sql( 'SELECT id FROM users; SELECT * FROM posts;' ); + * while ( $stream->next_query() ) { + * $sql_string = $stream->get_query(); + * // Process the query. + * } + * $stream->append_sql( 'CREATE TABLE users (id INT, name VARCHAR(255));' ); + * while ( $stream->next_query() ) { + * $sql_string = $stream->get_query(); + * // Process the query. + * } + * $stream->mark_input_complete(); + * $stream->next_query(); // returns false + */ +class WP_MySQL_Naive_Query_Stream { + + private $sql_buffer = ''; + private $input_complete = false; + private $state = true; + private $last_query = false; + + const STATE_QUERY = 'valid'; + const STATE_SYNTAX_ERROR = 'syntax_error'; + const STATE_PAUSED_ON_INCOMPLETE_INPUT = 'paused_on_incomplete_input'; + const STATE_FINISHED = 'finished'; + + /** + * The maximum size of the buffer to store the SQL input. We don't + * have enough information from the lexer to distinguish between + * an incomplete input and a syntax error so we use a heuristic – + * if we've accumulated more than this amount of SQL input, we assume + * it's a syntax error. That's why this class is called a "naive" query + * stream. + */ + const MAX_SQL_BUFFER_SIZE = 1024 * 1024 * 2; + + public function __construct() {} + + public function append_sql( string $sql ) { + if($this->input_complete) { + return false; + } + $this->sql_buffer .= $sql; + $this->state = self::STATE_QUERY; + return true; + } + + public function is_paused_on_incomplete_input(): bool { + return $this->state === self::STATE_PAUSED_ON_INCOMPLETE_INPUT; + } + + public function mark_input_complete() { + $this->input_complete = true; + } + + public function next_query() { + $this->last_query = false; + if($this->state === self::STATE_PAUSED_ON_INCOMPLETE_INPUT) { + return false; + } + + $result = $this->do_next_query(); + if(!$result && strlen($this->sql_buffer) > self::MAX_SQL_BUFFER_SIZE) { + $this->state = self::STATE_SYNTAX_ERROR; + return false; + } + return $result; + } + + private function do_next_query() { + + $query = []; + $lexer = new WP_MySQL_Lexer( $this->sql_buffer ); + while ( $lexer->next_token() ) { + $token = $lexer->get_token(); + $query[] = $token; + if ( $token->id === WP_MySQL_Lexer::SEMICOLON_SYMBOL ) { + // Got a complete query! + break; + } + } + + // @TODO: expose this method from the lexer + // if($lexer->get_state() === WP_MySQL_Lexer::STATE_SYNTAX_ERROR) { + // return false; + // } + + if(!count($query)) { + if ( $this->input_complete ) { + $this->state = self::STATE_FINISHED; + } else { + $this->state = self::STATE_PAUSED_ON_INCOMPLETE_INPUT; + } + return false; + } + + // The last token either needs to end with a semicolon, or be the + // last token in the input. + $last_token = $query[count($query) - 1]; + if ( + $last_token->id !== WP_MySQL_Lexer::SEMICOLON_SYMBOL && + ! $this->input_complete + ) { + $this->state = self::STATE_PAUSED_ON_INCOMPLETE_INPUT; + return false; + } + + // See if the query has any meaningful tokens. We don't want to return + // to give the caller a comment disguised as a query. + $has_meaningful_tokens = false; + foreach($query as $token) { + if ( + $token->id !== WP_MySQL_Lexer::WHITESPACE && + $token->id !== WP_MySQL_Lexer::COMMENT && + $token->id !== WP_MySQL_Lexer::MYSQL_COMMENT_START && + $token->id !== WP_MySQL_Lexer::MYSQL_COMMENT_END && + $token->id !== WP_MySQL_Lexer::EOF + ) { + $has_meaningful_tokens = true; + break; + } + } + if(!$has_meaningful_tokens) { + if ( $this->input_complete ) { + $this->state = self::STATE_FINISHED; + } else { + $this->state = self::STATE_PAUSED_ON_INCOMPLETE_INPUT; + } + return false; + } + + // Remove the query from the input buffer and return it. + $last_byte = $last_token->start + $last_token->length; + $query = substr($this->sql_buffer, 0, $last_byte); + $this->sql_buffer = substr($this->sql_buffer, $last_byte); + $this->last_query = $query; + $this->state = self::STATE_QUERY; + return true; + } + + public function get_query() { + return $this->last_query; + } + + public function get_state() { + return $this->state; + } + +}