Skip to content

Commit

Permalink
Merge pull request #251 from andialbrecht/filters_sql
Browse files Browse the repository at this point in the history
Update Filters sql
  • Loading branch information
vmuriart committed Jun 6, 2016
2 parents c6a5e7a + 5747015 commit b9d81ac
Show file tree
Hide file tree
Showing 14 changed files with 249 additions and 1,024 deletions.
7 changes: 0 additions & 7 deletions sqlparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,4 @@ def split(sql, encoding=None):
:returns: A list of strings.
"""
stack = engine.FilterStack()
stack.split_statements = True
return [u(stmt).strip() for stmt in stack.run(sql, encoding)]


def split2(stream):
from sqlparse.engine.filter import StatementFilter
splitter = StatementFilter()
return list(splitter.process(None, stream))
10 changes: 10 additions & 0 deletions sqlparse/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ def u(s, encoding=None):
return str(s)


def unicode_compatible(cls):
return cls


text_type = str
string_types = (str,)
from io import StringIO
Expand All @@ -39,6 +43,12 @@ def u(s, encoding=None):
return unicode(s, encoding)


def unicode_compatible(cls):
cls.__unicode__ = cls.__str__
cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
return cls


text_type = unicode
string_types = (basestring,)
from StringIO import StringIO
48 changes: 12 additions & 36 deletions sqlparse/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@


class FilterStack(object):

def __init__(self):
self.preprocess = []
self.stmtprocess = []
self.postprocess = []
self.split_statements = False
self._grouping = False

def enable_grouping(self):
Expand All @@ -27,42 +25,20 @@ def enable_grouping(self):
def run(self, sql, encoding=None):
stream = lexer.tokenize(sql, encoding)
# Process token stream
if self.preprocess:
for filter_ in self.preprocess:
stream = filter_.process(self, stream)

if (self.stmtprocess or self.postprocess or
self.split_statements or self._grouping):
splitter = StatementFilter()
stream = splitter.process(self, stream)

if self._grouping:

def _group(stream):
for stmt in stream:
grouping.group(stmt)
yield stmt
stream = _group(stream)
for filter_ in self.preprocess:
stream = filter_.process(stream)

if self.stmtprocess:
stream = StatementFilter().process(stream)

def _run1(stream):
ret = []
for stmt in stream:
for filter_ in self.stmtprocess:
filter_.process(self, stmt)
ret.append(stmt)
return ret
stream = _run1(stream)
# Output: Stream processed Statements
for stmt in stream:
if self._grouping:
stmt = grouping.group(stmt)

if self.postprocess:
for filter_ in self.stmtprocess:
filter_.process(stmt)

def _run2(stream):
for stmt in stream:
stmt.tokens = list(stmt.flatten())
for filter_ in self.postprocess:
stmt = filter_.process(self, stmt)
yield stmt
stream = _run2(stream)
for filter_ in self.postprocess:
stmt = filter_.process(stmt)

return stream
yield stmt
100 changes: 53 additions & 47 deletions sqlparse/engine/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,113 +5,119 @@
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php

from sqlparse.sql import Statement, Token
from sqlparse import tokens as T
from sqlparse import sql, tokens as T


class StatementFilter(object):
"Filter that split stream at individual statements"
"""Filter that split stream at individual statements"""

def __init__(self):
self._in_declare = False
self._in_dbldollar = False
self._is_create = False
self._begin_depth = 0
self._reset()

def _reset(self):
"Set the filter attributes to its default values"
"""Set the filter attributes to its default values"""
self._in_declare = False
self._in_dbldollar = False
self._is_create = False
self._begin_depth = 0

self.consume_ws = False
self.tokens = []
self.level = 0

def _change_splitlevel(self, ttype, value):
"Get the new split level (increase, decrease or remain equal)"
"""Get the new split level (increase, decrease or remain equal)"""
# PostgreSQL
if ttype == T.Name.Builtin \
and value.startswith('$') and value.endswith('$'):
if ttype == T.Name.Builtin and value[0] == '$' and value[-1] == '$':

# 2nd dbldollar found. $quote$ completed
# decrease level
if self._in_dbldollar:
self._in_dbldollar = False
return -1
else:
self._in_dbldollar = True
return 1

# if inside $$ everything inside is defining function character.
# Nothing inside can create a new statement
elif self._in_dbldollar:
return 0

# ANSI
# if normal token return
# wouldn't parenthesis increase/decrease a level?
# no, inside a paranthesis can't start new statement
if ttype not in T.Keyword:
return 0

# Everything after here is ttype = T.Keyword
# Also to note, once entered an If statement you are done and basically
# returning
unified = value.upper()

# three keywords begin with CREATE, but only one of them is DDL
# DDL Create though can contain more words such as "or replace"
if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
self._is_create = True
return 0

# can have nested declare inside of being...
if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
self._in_declare = True
return 1

if unified == 'BEGIN':
self._begin_depth += 1
if self._in_declare or self._is_create:
if self._is_create:
# FIXME(andi): This makes no sense.
return 1
return 0

if unified in ('END IF', 'END FOR', 'END WHILE'):
return -1

# Should this respect a preceeding BEGIN?
# In CASE ... WHEN ... END this results in a split level -1.
# Would having multiple CASE WHEN END and a Assigment Operator
# cause the statement to cut off prematurely?
if unified == 'END':
# Should this respect a preceeding BEGIN?
# In CASE ... WHEN ... END this results in a split level -1.
self._begin_depth = max(0, self._begin_depth - 1)
return -1

if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
self._is_create = True
return 0

if unified in ('IF', 'FOR', 'WHILE') \
and self._is_create and self._begin_depth > 0:
if (unified in ('IF', 'FOR', 'WHILE') and
self._is_create and self._begin_depth > 0):
return 1

if unified in ('END IF', 'END FOR', 'END WHILE'):
return -1

# Default
return 0

def process(self, stack, stream):
"Process the stream"
consume_ws = False
splitlevel = 0
stmt = None
stmt_tokens = []
def process(self, stream):
"""Process the stream"""
EOS_TTYPE = T.Whitespace, T.Comment.Single

# Run over all stream tokens
for ttype, value in stream:
# Yield token if we finished a statement and there's no whitespaces
if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
stmt.tokens = stmt_tokens
yield stmt
# It will count newline token as a non whitespace. In this context
# whitespace ignores newlines.
# why don't multi line comments also count?
if self.consume_ws and ttype not in EOS_TTYPE:
yield sql.Statement(self.tokens)

# Reset filter and prepare to process next statement
self._reset()
consume_ws = False
splitlevel = 0
stmt = None

# Create a new statement if we are not currently in one of them
if stmt is None:
stmt = Statement()
stmt_tokens = []

# Change current split level (increase, decrease or remain equal)
splitlevel += self._change_splitlevel(ttype, value)
self.level += self._change_splitlevel(ttype, value)

# Append the token to the current statement
stmt_tokens.append(Token(ttype, value))
self.tokens.append(sql.Token(ttype, value))

# Check if we get the end of a statement
if splitlevel <= 0 and ttype is T.Punctuation and value == ';':
consume_ws = True
if self.level <= 0 and ttype is T.Punctuation and value == ';':
self.consume_ws = True

# Yield pending statement (if any)
if stmt is not None:
stmt.tokens = stmt_tokens
yield stmt
if self.tokens:
yield sql.Statement(self.tokens)
5 changes: 3 additions & 2 deletions sqlparse/engine/grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def align_comments(tlist):
token = tlist.token_next_by(i=sql.Comment, idx=token)


def group(tlist):
def group(stmt):
for func in [
group_comments,
group_brackets,
Expand All @@ -291,4 +291,5 @@ def group(tlist):
group_foreach,
group_begin,
]:
func(tlist)
func(stmt)
return stmt

0 comments on commit b9d81ac

Please sign in to comment.