Skip to content

Commit

Permalink
Merge 62e5ae2 into 4080544
Browse files Browse the repository at this point in the history
  • Loading branch information
iamcal committed Mar 5, 2022
2 parents 4080544 + 62e5ae2 commit b7f79e7
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 0 deletions.
4 changes: 4 additions & 0 deletions README.md
Expand Up @@ -98,6 +98,10 @@ The `tokens` property contains an array of tokens. SQL keywords are returned as
with multi-word terms (e.g. `DEFAULT CHARACTER SET`) as a single token. Strings and escaped
identifiers are not further processed; they are returned exactly as expressed in the input SQL.

By default, the tokenizer will ignore unterminated comments and strings, and stop parsing at
that point, producing no further tokens. You can set `$parser->throw_on_bad_syntax = true;` to
throw an exception of type `iamcal\SQLParserSyntaxException` instead.


## Performance

Expand Down
11 changes: 11 additions & 0 deletions src/SQLParser.php
Expand Up @@ -2,6 +2,8 @@

namespace iamcal;

class SQLParserSyntaxException extends \Exception { }

class SQLParser{

#
Expand All @@ -13,6 +15,7 @@ class SQLParser{
public $source_map = array();

public $find_single_table = false;
public $throw_on_bad_syntax = false;

public function parse($sql){

Expand Down Expand Up @@ -59,6 +62,7 @@ private function _lex($sql){
if (preg_match('!--!A', $sql, $m, 0, $pos)){
$p2 = strpos($sql, "\n", $pos);
if ($p2 === false){
if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated comment at position $pos");
$pos = $len;
}else{
$pos = $p2+1;
Expand All @@ -68,6 +72,7 @@ private function _lex($sql){
if (preg_match('!/\\*!A', $sql, $m, 0, $pos)){
$p2 = strpos($sql, "*/", $pos);
if ($p2 === false){
if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated comment at position $pos");
$pos = $len;
}else{
$pos = $p2+2;
Expand All @@ -88,6 +93,7 @@ private function _lex($sql){
if (substr($sql, $pos, 1) == '`'){
$p2 = strpos($sql, "`", $pos+1);
if ($p2 === false){
if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated backtick at position $pos");
$pos = $len;
}else{
$source_map[] = array($pos, 1+$p2-$pos);
Expand All @@ -113,6 +119,7 @@ private function _lex($sql){

# <character string literal>
if ($sql[$pos] == "'" || $sql[$pos] == '"'){
$str_start_pos = $pos;
$c = $pos+1;
$q = $sql[$pos];
while ($c < strlen($sql)){
Expand All @@ -128,6 +135,10 @@ private function _lex($sql){
}
$c++;
}
if ($c >= strlen($sql)){
if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated string at position $str_start_pos");
$pos = $len;
}
continue;
}

Expand Down
40 changes: 40 additions & 0 deletions tests/InvalidTest.php
@@ -0,0 +1,40 @@
<?php
use PHPUnit\Framework\TestCase;

final class InvalidTest extends TestCase{

# tests for invalid inputs

function testBrokenSyntaxRegular(){

// by default, bad syntax (unterminated strings, comments, etc) will just not produce a token

$obj = new iamcal\SQLParser();

$tokens = $obj->lex("CREATE TABLE `users ( id int(10) )");
$this->assertEquals(count($tokens), 1);

$tokens = $obj->lex("CREATE TABLE `users` ' ( `id` int(10) )");
$this->assertEquals(count($tokens), 2);
}

function testBrokenSyntaxException1(){

// in exception mode, it throws an exception...

$obj = new iamcal\SQLParser();
$obj->throw_on_bad_syntax = true;

$this->expectException(iamcal\SQLParserSyntaxException::class);
$obj->lex("CREATE TABLE `users ( id int(10) )");
}

function testBrokenSyntaxException2(){

$obj = new iamcal\SQLParser();
$obj->throw_on_bad_syntax = true;

$this->expectException(iamcal\SQLParserSyntaxException::class);
$obj->lex("CREATE TABLE `users` ' ( `id` int(10) )");
}
}

0 comments on commit b7f79e7

Please sign in to comment.