Skip to content

Commit

Permalink
Merge pull request #2 from erik/erik/dont-do-this
Browse files Browse the repository at this point in the history
Add rules from Postgres' "Don't Do This" list
  • Loading branch information
erik committed May 7, 2019
2 parents 00aab44 + 7b0b6dd commit 67d1f89
Show file tree
Hide file tree
Showing 15 changed files with 468 additions and 23 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,19 @@ New
~~~

- Added `-x, --expanded` to show explanations of message codes after linting.
- Added `DisallowNotIn` rule.
- Added `DisallowPaddedCharType` rule.
- Added `DisallowTimetzType` rule.
- Added `DisallowTimestampPrecision` rule.

Changes
~~~~~~~

Fixes
~~~~~

- Small documentation fixes for rules.

v1.2.0 (2019-01-14)
-------------------

Expand Down
29 changes: 29 additions & 0 deletions docs/rules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,42 @@ DisallowFloatTypes
:show-inheritance:
:exclude-members: enable

DisallowNotIn
-------------
.. autoclass:: squabble.rules.disallow_not_in.DisallowNotIn
:members:
:show-inheritance:
:exclude-members: enable

DisallowPaddedCharType
----------------------
.. autoclass:: squabble.rules.disallow_padded_char_type.DisallowPaddedCharType
:members:
:show-inheritance:
:exclude-members: enable

DisallowRenameEnumValue
-----------------------
.. autoclass:: squabble.rules.disallow_rename_enum_value.DisallowRenameEnumValue
:members:
:show-inheritance:
:exclude-members: enable

DisallowTimestampPrecision
--------------------------
.. autoclass:: squabble.rules.disallow_timestamp_precision.DisallowTimestampPrecision
:members:
:show-inheritance:
:exclude-members: enable


DisallowTimetzType
------------------
.. autoclass:: squabble.rules.disallow_timetz_type.DisallowTimetzType
:members:
:show-inheritance:
:exclude-members: enable

RequireColumns
--------------
.. autoclass:: squabble.rules.require_columns.RequireColumns
Expand Down
12 changes: 10 additions & 2 deletions squabble/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@
'postgres': {
'description': (
'A sane set of defaults that checks for obviously '
'dangerous Postgres migrations.'
'dangerous Postgres migrations and query antipatterns.'
),
'config': {
'rules': {
'DisallowRenameEnumValue': {},
'DisallowChangeColumnType': {},
'DisallowNotIn': {},
'DisallowTimetzType': {},
'DisallowPaddedCharType': {},
'DisallowTimestampPrecision': {},
}
}
},
Expand All @@ -50,9 +54,9 @@
'AddColumnDisallowConstraints': {
'disallowed': ['DEFAULT', 'NOT NULL', 'UNIQUE']
},
'RequireConcurrentIndex': {},
'DisallowRenameEnumValue': {},
'DisallowChangeColumnType': {},
'RequireConcurrentIndex': {},
}
}
},
Expand All @@ -70,6 +74,10 @@
'DisallowRenameEnumValue': {},
'DisallowChangeColumnType': {},
'DisallowFloatTypes': {},
'DisallowNotIn': {},
'DisallowTimetzType': {},
'DisallowPaddedCharType': {},
'DisallowTimestampPrecision': {},
'RequirePrimaryKey': {},
# Yes, these are incompatible.
'DisallowForeignKey': {},
Expand Down
19 changes: 3 additions & 16 deletions squabble/rules/disallow_float_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from squabble.lint import Severity
from squabble.message import Message
from squabble.rules import BaseRule
from squabble.util import format_type_name


def _parse_column_type(typ):
Expand All @@ -21,21 +22,7 @@ def _parse_column_type(typ):
create_table = pglast.Node(pglast.parse_sql(sql))[0].stmt
type_name = create_table.tableElts[0].typeName

return _type_name_to_string(type_name)


def _type_name_to_string(node):
"""
Return a simple stringified version of a ``pglast.node.TypeName`` node.
Note that this won't be suitable for printing, and ignores type
modifiers (e.g. ``NUMERIC(3,4)`` => ``NUMERIC``).
"""

return '.'.join([
part['String']['str']
for part in node.names._items
])
return format_type_name(type_name)


class DisallowFloatTypes(BaseRule):
Expand Down Expand Up @@ -84,7 +71,7 @@ def enable(self, root_ctx, _config):

@squabble.rule.node_visitor
def _check_column_def(self, ctx, node):
col_type = _type_name_to_string(node.typeName)
col_type = format_type_name(node.typeName)

if col_type in self._INEXACT_TYPES:
ctx.report(self.LossyFloatType(), node=node, severity=Severity.LOW)
80 changes: 80 additions & 0 deletions squabble/rules/disallow_not_in.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from pglast.enums import A_Expr_Kind, BoolExprType, SubLinkType

import squabble.rule
from squabble.lint import Severity
from squabble.message import Message
from squabble.rules import BaseRule


class DisallowNotIn(BaseRule):
"""
Prevent ``NOT IN`` as part of queries, due to the unexpected behavior
around ``NULL`` values.
Configuration: ::
{ "DisallowNotIn": {} }
"""

class NotInNotAllowed(Message):
"""
``NOT IN`` (along with any expression containing ``NOT ... IN``) should
generally not be used as it behaves in unexpected ways if there is a
null present.
.. code-block:: sql
-- Always returns 0 rows
SELECT * FROM foo WHERE col NOT IN (1, null);
-- Returns 0 rows if any value of bar.x is null
SELECT * FROM foo WHERE col NOT IN (SELECT x FROM bar);
``col IN (1, null)`` returns TRUE if ``col=1``, and NULL
otherwise (i.e. it can never return FALSE).
Since ``NOT (TRUE) = FALSE``, but ``NOT (NULL) = NULL``, it is not
possible for this expression to return ``TRUE``.
If you can guarantee that there will never be a null in the list of
values, ``NOT IN`` is safe to use, but will not be optimized nicely.
"""
CODE = 1010
TEMPLATE = 'using `NOT IN` has nonintuitive behavior with null values'

def enable(self, ctx, _config):
ctx.register('A_Expr', self._check_not_in_list())

def check_bool_expr(child_ctx, child_node):
if child_node.boolop == BoolExprType.NOT_EXPR:
ctx.register('SubLink', self._check_not_in_subquery())

ctx.register('BoolExpr', check_bool_expr)

@squabble.rule.node_visitor
def _check_not_in_list(self, ctx, node):
"""Handles cases like ``WHERE NOT IN (1, 2, 3)``."""
# We're only interested in `IN` expressions
if node.kind != A_Expr_Kind.AEXPR_IN:
return

# Specifically only ``NOT IN``
elif node.name.string_value != "<>":
return

return ctx.report(
self.NotInNotAllowed(),
node=node.rexpr[0],
severity=Severity.LOW)

@squabble.rule.node_visitor
def _check_not_in_subquery(self, ctx, node):
"""Handles cases like ``WHERE NOT IN (SELECT * FROM foo)``."""

if node.subLinkType != SubLinkType.ANY_SUBLINK:
return

return ctx.report(
self.NotInNotAllowed(),
node=node,
severity=Severity.LOW)
61 changes: 61 additions & 0 deletions squabble/rules/disallow_padded_char_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import squabble.rule
from squabble.lint import Severity
from squabble.message import Message
from squabble.rules import BaseRule
from squabble.util import format_type_name


class DisallowPaddedCharType(BaseRule):
"""
Prevent using ``CHAR(n)`` data type.
Postgres recommends never using ``CHAR(n)``, as any value stored in this
type will be padded with spaces to the declared width. This padding wastes
space, but doesn't make operations on any faster; in fact the reverse,
thanks to the need to strip spaces in many contexts.
In most cases, the variable length types ``TEXT`` or ``VARCHAR`` will be
more appropriate.
Configuration ::
{ "DisallowPaddedCharType": {} }
"""

_DISALLOWED_TYPES = {
# note: ``bpchar`` for "bounded, padded char"
'pg_catalog.bpchar'
}

class WastefulCharType(Message):
"""
Any value stored in this type will be padded with spaces to the
declared width. This padding wastes space, but doesn't make operations
on any faster; in fact the reverse, thanks to the need to strip spaces
in many contexts.
From a storage point of view, ``CHAR(n)`` is not a fixed-width type.
The actual number of bytes varies since characters (e.g. unicode) may
take more than one byte, and the stored values are therefore treated as
variable-length anyway (even though the space padding is included in
the storage).
If a maximum length must be enforced in the database, use
``VARCHAR(n)``, otherwise, consider using ``TEXT`` as a replacement.
"""
TEMPLATE = '`CHAR(n)` has unnecessary space and time overhead,' + \
' consider using `TEXT` or `VARCHAR`'
CODE = 1013

def enable(self, root_ctx, _config):
root_ctx.register('ColumnDef', self._check_column_def())

@squabble.rule.node_visitor
def _check_column_def(self, ctx, node):
col_type = format_type_name(node.typeName)

if col_type in self._DISALLOWED_TYPES:
ctx.report(
self.WastefulCharType(),
node=node,
severity=Severity.LOW)
83 changes: 83 additions & 0 deletions squabble/rules/disallow_timestamp_precision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import pglast

import squabble.rule
from squabble.lint import Severity
from squabble.message import Message
from squabble.rules import BaseRule
from squabble.util import format_type_name


class DisallowTimestampPrecision(BaseRule):
"""
Prevent using ``TIMESTAMP(p)`` due to rounding behavior.
For both ``TIMESTAMP(p)`` and ``TIMESTAMP WITH TIME ZONE(p)``, (as well as
the corresponding ``TIME`` types) the optional precision parameter ``p``
rounds the value instead of truncating.
This means that it is possible to store values that are half a second in
the future for ``p == 0``.
To only enforce this rule for certain values of ``p``, set the
configuration option ``allow_precision_greater_than``.
Configuration ::
{ "DisallowTimetzType": {
"allow_precision_greater_than": 0
}
}
"""

_CHECKED_TYPES = {
'pg_catalog.time',
'pg_catalog.timetz',
'pg_catalog.timestamp',
'pg_catalog.timestamptz',
}

_DEFAULT_MIN_PRECISION = 9999

class NoTimestampPrecision(Message):
"""
Specifying a fixed precision for ``TIMESTAMP`` and ``TIME`` types will
cause the database to round inserted values (instead of truncating, as
one would expect).
This rounding behavior means that some values that get inserted may be
in the future, up to half a second with a precision of ``0``.
Instead, explicitly using ``date_trunc('granularity', time)`` may be a
better option.
"""
TEMPLATE = "use `date_trunc` instead of fixed precision timestamps"
CODE = 1014

def enable(self, root_ctx, config):
min_precision = int(
config.get(
'allow_precision_greater_than',
self._DEFAULT_MIN_PRECISION))

root_ctx.register('ColumnDef', self._check_column_def(min_precision))

@squabble.rule.node_visitor
def _check_column_def(self, ctx, node, min_precision):
col_type = format_type_name(node.typeName)

print('looking at', col_type)

if col_type not in self._CHECKED_TYPES:
return

modifiers = node.typeName.typmods
if modifiers == pglast.Missing or \
len(modifiers) != 1 or \
modifiers[0].val.node_tag != 'Integer':
return

if modifiers[0].val.ival.value <= min_precision:
ctx.report(
self.NoTimestampPrecision(),
node=node.typeName,
severity=Severity.LOW)

0 comments on commit 67d1f89

Please sign in to comment.