diff --git a/.github/workflows/php.yml b/.github/workflows/php.yml index 7a68055..b098f1f 100644 --- a/.github/workflows/php.yml +++ b/.github/workflows/php.yml @@ -37,7 +37,7 @@ jobs: phpunit-versions: '7.5.20' coverage: true - php-versions: '8.1' - phpunit-versions: '9.5.4' + phpunit-versions: '9.5.16' steps: - name: Setup PHP diff --git a/README.md b/README.md index d61b12a..1e2e73f 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,10 @@ The `tokens` property contains an array of tokens. SQL keywords are returned as with multi-word terms (e.g. `DEFAULT CHARACTER SET`) as a single token. Strings and escaped identifiers are not further processed; they are returned exactly as expressed in the input SQL. +By default, the tokenizer will ignore unterminated comments and strings, and stop parsing at +that point, producing no further tokens. You can set `$parser->throw_on_bad_syntax = true;` to +throw an exception of type `iamcal\SQLParserSyntaxException` instead. + ## Performance diff --git a/src/SQLParser.php b/src/SQLParser.php index ed4eb15..b1e52af 100644 --- a/src/SQLParser.php +++ b/src/SQLParser.php @@ -2,6 +2,8 @@ namespace iamcal; +class SQLParserSyntaxException extends \Exception { } + class SQLParser{ # @@ -13,6 +15,7 @@ class SQLParser{ public $source_map = array(); public $find_single_table = false; + public $throw_on_bad_syntax = false; public function parse($sql){ @@ -59,6 +62,7 @@ private function _lex($sql){ if (preg_match('!--!A', $sql, $m, 0, $pos)){ $p2 = strpos($sql, "\n", $pos); if ($p2 === false){ + if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated comment at position $pos"); $pos = $len; }else{ $pos = $p2+1; @@ -68,6 +72,7 @@ private function _lex($sql){ if (preg_match('!/\\*!A', $sql, $m, 0, $pos)){ $p2 = strpos($sql, "*/", $pos); if ($p2 === false){ + if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated comment at position $pos"); $pos = $len; }else{ $pos = $p2+2; @@ -88,6 +93,7 @@ private function _lex($sql){ if (substr($sql, $pos, 1) == '`'){ $p2 = strpos($sql, "`", $pos+1); if ($p2 === false){ + if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated backtick at position $pos"); $pos = $len; }else{ $source_map[] = array($pos, 1+$p2-$pos); @@ -113,6 +119,7 @@ private function _lex($sql){ # if ($sql[$pos] == "'" || $sql[$pos] == '"'){ + $str_start_pos = $pos; $c = $pos+1; $q = $sql[$pos]; while ($c < strlen($sql)){ @@ -128,6 +135,10 @@ private function _lex($sql){ } $c++; } + if ($c >= strlen($sql)){ + if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated string at position $str_start_pos"); + $pos = $len; + } continue; } diff --git a/tests/InvalidTest.php b/tests/InvalidTest.php new file mode 100644 index 0000000..20e8dce --- /dev/null +++ b/tests/InvalidTest.php @@ -0,0 +1,43 @@ +lex("CREATE TABLE `users ( id int(10) )"); + $this->assertEquals(count($tokens), 1); + + $tokens = $obj->lex("CREATE TABLE `users` ' ( `id` int(10) )"); + $this->assertEquals(count($tokens), 2); + } + + function testBrokenSyntaxException(){ + + // in exception mode, it throws an exception... + + $obj = new iamcal\SQLParser(); + $obj->throw_on_bad_syntax = true; + + try { + $obj->lex("CREATE TABLE `users ( id int(10) )"); + $this->fail("Expected Exception has not been raised"); + } catch (Exception $ex) { + $this->assertInstanceOf('iamcal\SQLParserSyntaxException', $ex); + } + + try { + $obj->lex("CREATE TABLE `users` ' ( `id` int(10) )"); + $this->fail("Expected Exception has not been raised"); + } catch (Exception $ex) { + $this->assertInstanceOf('iamcal\SQLParserSyntaxException', $ex); + } + + } + }