Skip to content

Commit 5e10970

Browse files
committed
added parsing for PostgreSQL operations
1 parent 118a345 commit 5e10970

File tree

6 files changed

+257
-7
lines changed

6 files changed

+257
-7
lines changed

src/ast/mod.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,11 @@ pub enum Expr {
191191
right: Box<Expr>,
192192
},
193193
/// Unary operation e.g. `NOT foo`
194-
UnaryOp { op: UnaryOperator, expr: Box<Expr> },
194+
UnaryOp {
195+
op: UnaryOperator,
196+
expr: Box<Expr>,
197+
infix: bool,
198+
},
195199
/// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))`
196200
Cast {
197201
expr: Box<Expr>,
@@ -282,7 +286,13 @@ impl fmt::Display for Expr {
282286
high
283287
),
284288
Expr::BinaryOp { left, op, right } => write!(f, "{} {} {}", left, op, right),
285-
Expr::UnaryOp { op, expr } => write!(f, "{} {}", op, expr),
289+
Expr::UnaryOp { op, expr, infix } => {
290+
if *infix {
291+
write!(f, "{}{}", expr, op)
292+
} else {
293+
write!(f, "{} {}", op, expr)
294+
}
295+
}
286296
Expr::Cast { expr, data_type } => write!(f, "CAST({} AS {})", expr, data_type),
287297
Expr::Extract { field, expr } => write!(f, "EXTRACT({} FROM {})", field, expr),
288298
Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation),

src/ast/operator.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ pub enum UnaryOperator {
2121
Plus,
2222
Minus,
2323
Not,
24+
PGBitwiseNot,
25+
PGSqrt,
26+
PGCbrt,
27+
PGFactorial,
28+
PGInfixFactorial,
29+
PGAbs,
2430
}
2531

2632
impl fmt::Display for UnaryOperator {
@@ -29,6 +35,12 @@ impl fmt::Display for UnaryOperator {
2935
UnaryOperator::Plus => "+",
3036
UnaryOperator::Minus => "-",
3137
UnaryOperator::Not => "NOT",
38+
UnaryOperator::PGBitwiseNot => "~",
39+
UnaryOperator::PGSqrt => "|/",
40+
UnaryOperator::PGCbrt => "||/",
41+
UnaryOperator::PGFactorial => "!",
42+
UnaryOperator::PGInfixFactorial => "!!",
43+
UnaryOperator::PGAbs => "@",
3244
})
3345
}
3446
}
@@ -56,6 +68,9 @@ pub enum BinaryOperator {
5668
BitwiseOr,
5769
BitwiseAnd,
5870
BitwiseXor,
71+
PGBitwiseXor,
72+
PGBitwiseShiftLeft,
73+
PGBitwiseShiftRight,
5974
}
6075

6176
impl fmt::Display for BinaryOperator {
@@ -80,6 +95,9 @@ impl fmt::Display for BinaryOperator {
8095
BinaryOperator::BitwiseOr => "|",
8196
BinaryOperator::BitwiseAnd => "&",
8297
BinaryOperator::BitwiseXor => "^",
98+
BinaryOperator::PGBitwiseXor => "#",
99+
BinaryOperator::PGBitwiseShiftLeft => "<<",
100+
BinaryOperator::PGBitwiseShiftRight => ">>",
83101
})
84102
}
85103
}

src/parser.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ impl<'a> Parser<'a> {
250250
Keyword::NOT => Ok(Expr::UnaryOp {
251251
op: UnaryOperator::Not,
252252
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
253+
infix: false,
253254
}),
254255
// Here `w` is a word, check if it's a part of a multi-part
255256
// identifier, a function call, or a simple identifier:
@@ -283,6 +284,31 @@ impl<'a> Parser<'a> {
283284
},
284285
}, // End of Token::Word
285286
Token::Mult => Ok(Expr::Wildcard),
287+
Token::Tilde => Ok(Expr::UnaryOp {
288+
op: UnaryOperator::PGBitwiseNot,
289+
expr: Box::new(self.parse_subexpr(0)?),
290+
infix: false,
291+
}),
292+
Token::DoubleExclamationMark => Ok(Expr::UnaryOp {
293+
op: UnaryOperator::PGInfixFactorial,
294+
expr: Box::new(self.parse_subexpr(0)?),
295+
infix: false,
296+
}),
297+
Token::SquareRoot => Ok(Expr::UnaryOp {
298+
op: UnaryOperator::PGSqrt,
299+
expr: Box::new(self.parse_subexpr(0)?),
300+
infix: false,
301+
}),
302+
Token::CubeRoot => Ok(Expr::UnaryOp {
303+
op: UnaryOperator::PGCbrt,
304+
expr: Box::new(self.parse_subexpr(0)?),
305+
infix: false,
306+
}),
307+
Token::Ampersat => Ok(Expr::UnaryOp {
308+
op: UnaryOperator::PGAbs,
309+
expr: Box::new(self.parse_subexpr(0)?),
310+
infix: false,
311+
}),
286312
tok @ Token::Minus | tok @ Token::Plus => {
287313
let op = if tok == Token::Plus {
288314
UnaryOperator::Plus
@@ -292,6 +318,7 @@ impl<'a> Parser<'a> {
292318
Ok(Expr::UnaryOp {
293319
op,
294320
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
321+
infix: false,
295322
})
296323
}
297324
Token::Number(_)
@@ -658,6 +685,9 @@ impl<'a> Parser<'a> {
658685
Token::Caret => Some(BinaryOperator::BitwiseXor),
659686
Token::Ampersand => Some(BinaryOperator::BitwiseAnd),
660687
Token::Div => Some(BinaryOperator::Divide),
688+
Token::ShiftLeft => Some(BinaryOperator::PGBitwiseShiftLeft),
689+
Token::ShiftRight => Some(BinaryOperator::PGBitwiseShiftRight),
690+
Token::Sharp => Some(BinaryOperator::PGBitwiseXor),
661691
Token::Word(w) => match w.keyword {
662692
Keyword::AND => Some(BinaryOperator::And),
663693
Keyword::OR => Some(BinaryOperator::Or),
@@ -707,6 +737,13 @@ impl<'a> Parser<'a> {
707737
}
708738
} else if Token::DoubleColon == tok {
709739
self.parse_pg_cast(expr)
740+
} else if Token::ExclamationMark == tok {
741+
// PostgreSQL factorial operation
742+
Ok(Expr::UnaryOp {
743+
op: UnaryOperator::PGFactorial,
744+
expr: Box::new(expr),
745+
infix: true,
746+
})
710747
} else {
711748
// Can only happen if `get_next_precedence` got out of sync with this function
712749
panic!("No infix parser for token {:?}", tok)
@@ -785,11 +822,12 @@ impl<'a> Parser<'a> {
785822
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC),
786823
Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20),
787824
Token::Pipe => Ok(21),
788-
Token::Caret => Ok(22),
825+
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
789826
Token::Ampersand => Ok(23),
790827
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
791828
Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40),
792829
Token::DoubleColon => Ok(50),
830+
Token::ExclamationMark => Ok(50),
793831
_ => Ok(0),
794832
}
795833
}

src/tokenizer.rs

Lines changed: 103 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ pub enum Token {
5353
Neq,
5454
/// Less Than operator `<`
5555
Lt,
56-
/// Greater han operator `>`
56+
/// Greater Than operator `>`
5757
Gt,
5858
/// Less Than Or Equals operator `<=`
5959
LtEq,
@@ -101,6 +101,24 @@ pub enum Token {
101101
RBrace,
102102
/// Right Arrow `=>`
103103
RArrow,
104+
/// Sharp `#` use for PostgreSQL Bitwise XOR operator
105+
Sharp,
106+
/// Tilde `~` use for PostgreSQL Bitwise NOT operator
107+
Tilde,
108+
/// Bitwise left operator `<<` use for PostgreSQL
109+
ShiftLeft,
110+
/// Bitwise right operator `>>` use for PostgreSQL
111+
ShiftRight,
112+
/// Exclamation Mark `!` use for PostgreSQL factorial operator
113+
ExclamationMark,
114+
/// Exclamation Mark `!!` use for PostgreSQL prefix factorial operator
115+
DoubleExclamationMark,
116+
/// Ampersat `@` use for PostgreSQL abs operator
117+
Ampersat,
118+
/// PostgreSQL square root math operator
119+
SquareRoot,
120+
/// PostgreSQL cube root math operator
121+
CubeRoot,
104122
}
105123

106124
impl fmt::Display for Token {
@@ -142,6 +160,15 @@ impl fmt::Display for Token {
142160
Token::LBrace => f.write_str("{"),
143161
Token::RBrace => f.write_str("}"),
144162
Token::RArrow => f.write_str("=>"),
163+
Token::Sharp => f.write_str("#"),
164+
Token::ExclamationMark => f.write_str("!"),
165+
Token::DoubleExclamationMark => f.write_str("!!"),
166+
Token::Tilde => f.write_str("~"),
167+
Token::Ampersat => f.write_str("@"),
168+
Token::ShiftLeft => f.write_str("<<"),
169+
Token::ShiftRight => f.write_str(">>"),
170+
Token::SquareRoot => f.write_str("|/"),
171+
Token::CubeRoot => f.write_str("||/"),
145172
}
146173
}
147174
}
@@ -398,7 +425,14 @@ impl<'a> Tokenizer<'a> {
398425
'|' => {
399426
chars.next(); // consume the '|'
400427
match chars.peek() {
401-
Some('|') => self.consume_and_return(chars, Token::StringConcat),
428+
Some('/') => self.consume_and_return(chars, Token::SquareRoot),
429+
Some('|') => {
430+
chars.next(); // consume the second '|'
431+
match chars.peek() {
432+
Some('/') => self.consume_and_return(chars, Token::CubeRoot),
433+
_ => Ok(Some(Token::StringConcat)),
434+
}
435+
}
402436
// Bitshift '|' operator
403437
_ => Ok(Some(Token::Pipe)),
404438
}
@@ -415,21 +449,24 @@ impl<'a> Tokenizer<'a> {
415449
chars.next(); // consume
416450
match chars.peek() {
417451
Some('=') => self.consume_and_return(chars, Token::Neq),
418-
_ => self.tokenizer_error("Expected to see '=' after '!' character"),
452+
Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
453+
_ => Ok(Some(Token::ExclamationMark)),
419454
}
420455
}
421456
'<' => {
422457
chars.next(); // consume
423458
match chars.peek() {
424459
Some('=') => self.consume_and_return(chars, Token::LtEq),
425460
Some('>') => self.consume_and_return(chars, Token::Neq),
461+
Some('<') => self.consume_and_return(chars, Token::ShiftLeft),
426462
_ => Ok(Some(Token::Lt)),
427463
}
428464
}
429465
'>' => {
430466
chars.next(); // consume
431467
match chars.peek() {
432468
Some('=') => self.consume_and_return(chars, Token::GtEq),
469+
Some('>') => self.consume_and_return(chars, Token::ShiftRight),
433470
_ => Ok(Some(Token::Gt)),
434471
}
435472
}
@@ -448,6 +485,9 @@ impl<'a> Tokenizer<'a> {
448485
'^' => self.consume_and_return(chars, Token::Caret),
449486
'{' => self.consume_and_return(chars, Token::LBrace),
450487
'}' => self.consume_and_return(chars, Token::RBrace),
488+
'~' => self.consume_and_return(chars, Token::Tilde),
489+
'#' => self.consume_and_return(chars, Token::Sharp),
490+
'@' => self.consume_and_return(chars, Token::Ampersat),
451491
other => self.consume_and_return(chars, Token::Char(other)),
452492
},
453493
None => Ok(None),
@@ -560,6 +600,7 @@ mod tests {
560600
use super::super::dialect::GenericDialect;
561601
use super::super::dialect::MsSqlDialect;
562602
use super::*;
603+
use crate::dialect::PostgreSqlDialect;
563604

564605
#[test]
565606
fn tokenize_select_1() {
@@ -930,6 +971,65 @@ mod tests {
930971
compare(expected, tokens);
931972
}
932973

974+
#[test]
975+
fn tokenize_postgresql_bitwise_operations() {
976+
let sql = String::from("SELECT ~one << two # three >> four");
977+
let dialect = PostgreSqlDialect {};
978+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
979+
let tokens = tokenizer.tokenize().unwrap();
980+
981+
let expected = vec![
982+
Token::make_keyword("SELECT"),
983+
Token::Whitespace(Whitespace::Space),
984+
Token::Tilde,
985+
Token::make_word("one", None),
986+
Token::Whitespace(Whitespace::Space),
987+
Token::ShiftLeft,
988+
Token::Whitespace(Whitespace::Space),
989+
Token::make_word("two", None),
990+
Token::Whitespace(Whitespace::Space),
991+
Token::Sharp,
992+
Token::Whitespace(Whitespace::Space),
993+
Token::make_word("three", None),
994+
Token::Whitespace(Whitespace::Space),
995+
Token::ShiftRight,
996+
Token::Whitespace(Whitespace::Space),
997+
Token::make_word("four", None),
998+
];
999+
1000+
compare(expected, tokens);
1001+
}
1002+
1003+
#[test]
1004+
fn tokenize_postgresql_math_operations() {
1005+
let sql = String::from("SELECT !!5 5! @-6 |/4 ||/8");
1006+
let dialect = PostgreSqlDialect {};
1007+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
1008+
let tokens = tokenizer.tokenize().unwrap();
1009+
1010+
let expected = vec![
1011+
Token::make_keyword("SELECT"),
1012+
Token::Whitespace(Whitespace::Space),
1013+
Token::DoubleExclamationMark,
1014+
Token::Number("5".to_string()),
1015+
Token::Whitespace(Whitespace::Space),
1016+
Token::Number("5".to_string()),
1017+
Token::ExclamationMark,
1018+
Token::Whitespace(Whitespace::Space),
1019+
Token::Ampersat,
1020+
Token::Minus,
1021+
Token::Number("6".to_string()),
1022+
Token::Whitespace(Whitespace::Space),
1023+
Token::SquareRoot,
1024+
Token::Number("4".to_string()),
1025+
Token::Whitespace(Whitespace::Space),
1026+
Token::CubeRoot,
1027+
Token::Number("8".to_string()),
1028+
];
1029+
1030+
compare(expected, tokens);
1031+
}
1032+
9331033
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
9341034
//println!("------------------------------");
9351035
//println!("tokens = {:?}", actual);

tests/sqlparser_common.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,8 @@ fn parse_select_count_distinct() {
343343
name: ObjectName(vec![Ident::new("COUNT")]),
344344
args: vec![FunctionArg::Unnamed(Expr::UnaryOp {
345345
op: UnaryOperator::Plus,
346-
expr: Box::new(Expr::Identifier(Ident::new("x")))
346+
expr: Box::new(Expr::Identifier(Ident::new("x"))),
347+
infix: false,
347348
})],
348349
over: None,
349350
distinct: true,
@@ -506,11 +507,13 @@ fn parse_unary_math() {
506507
left: Box::new(UnaryOp {
507508
op: UnaryOperator::Minus,
508509
expr: Box::new(Identifier(Ident::new("a"))),
510+
infix: false,
509511
}),
510512
op: BinaryOperator::Plus,
511513
right: Box::new(UnaryOp {
512514
op: UnaryOperator::Minus,
513515
expr: Box::new(Identifier(Ident::new("b"))),
516+
infix: false,
514517
}),
515518
},
516519
verified_expr(sql)
@@ -565,6 +568,7 @@ fn parse_not_precedence() {
565568
high: Box::new(Expr::Value(number("2"))),
566569
negated: true,
567570
}),
571+
infix: false,
568572
},
569573
);
570574

@@ -579,6 +583,7 @@ fn parse_not_precedence() {
579583
op: BinaryOperator::NotLike,
580584
right: Box::new(Expr::Value(Value::SingleQuotedString("b".into()))),
581585
}),
586+
infix: false,
582587
},
583588
);
584589

@@ -593,6 +598,7 @@ fn parse_not_precedence() {
593598
list: vec![Expr::Value(Value::SingleQuotedString("a".into()))],
594599
negated: true,
595600
}),
601+
infix: false,
596602
},
597603
);
598604
}
@@ -2606,6 +2612,7 @@ fn parse_exists_subquery() {
26062612
Expr::UnaryOp {
26072613
op: UnaryOperator::Not,
26082614
expr: Box::new(Expr::Exists(Box::new(expected_inner))),
2615+
infix: false,
26092616
},
26102617
select.selection.unwrap(),
26112618
);

0 commit comments

Comments
 (0)