Skip to content

Commit

Permalink
pythongh-113796: Add more validation checks in the csv.Dialect constr…
Browse files Browse the repository at this point in the history
…uctor (pythonGH-113797)

ValueError is now raised if the same character is used in different roles.
  • Loading branch information
serhiy-storchaka authored and aisk committed Feb 11, 2024
1 parent f9cbe14 commit 6ff8bed
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 13 deletions.
67 changes: 54 additions & 13 deletions Lib/test/test_csv.py
Expand Up @@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase):
in TestDialectRegistry.
"""
def _test_arg_valid(self, ctor, arg):
ctor(arg)
self.assertRaises(TypeError, ctor)
self.assertRaises(TypeError, ctor, None)
self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
self.assertRaises(TypeError, ctor, arg, bad_attr=0)
self.assertRaises(TypeError, ctor, arg, delimiter='')
self.assertRaises(TypeError, ctor, arg, escapechar='')
self.assertRaises(TypeError, ctor, arg, quotechar='')
self.assertRaises(TypeError, ctor, arg, delimiter='^^')
self.assertRaises(TypeError, ctor, arg, escapechar='^^')
self.assertRaises(TypeError, ctor, arg, quotechar='^^')
self.assertRaises(csv.Error, ctor, arg, 'foo')
self.assertRaises(TypeError, ctor, arg, delimiter=None)
self.assertRaises(TypeError, ctor, arg, delimiter=1)
self.assertRaises(TypeError, ctor, arg, escapechar=1)
self.assertRaises(TypeError, ctor, arg, quotechar=1)
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
Expand All @@ -46,6 +52,40 @@ def _test_arg_valid(self, ctor, arg):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
self.assertRaises(ValueError, ctor, arg, delimiter='\n')
self.assertRaises(ValueError, ctor, arg, escapechar='\n')
self.assertRaises(ValueError, ctor, arg, quotechar='\n')
self.assertRaises(ValueError, ctor, arg, delimiter='\r')
self.assertRaises(ValueError, ctor, arg, escapechar='\r')
self.assertRaises(ValueError, ctor, arg, quotechar='\r')
ctor(arg, delimiter=' ')
ctor(arg, escapechar=' ')
ctor(arg, quotechar=' ')
ctor(arg, delimiter='\t', skipinitialspace=True)
ctor(arg, escapechar='\t', skipinitialspace=True)
ctor(arg, quotechar='\t', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
delimiter=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
escapechar=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
quotechar=' ', skipinitialspace=True)
ctor(arg, delimiter='^')
ctor(arg, escapechar='^')
ctor(arg, quotechar='^')
self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^')
self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^')
self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^')
ctor(arg, delimiter='\x85')
ctor(arg, escapechar='\x85')
ctor(arg, quotechar='\x85')
ctor(arg, lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
delimiter='\x85', lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
escapechar='\x85', lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
quotechar='\x85', lineterminator='\x85')

def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
Expand Down Expand Up @@ -535,14 +575,6 @@ class unspecified():
finally:
csv.unregister_dialect('testC')

def test_bad_dialect(self):
# Unknown parameter
self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
# Bad values
self.assertRaises(TypeError, csv.reader, [], delimiter = None)
self.assertRaises(TypeError, csv.reader, [], quoting = -1)
self.assertRaises(TypeError, csv.reader, [], quoting = 100)

def test_copy(self):
for name in csv.list_dialects():
dialect = csv.get_dialect(name)
Expand Down Expand Up @@ -1088,10 +1120,15 @@ class mydialect(csv.Dialect):
'"lineterminator" must be a string')

def test_invalid_chars(self):
def create_invalid(field_name, value):
def create_invalid(field_name, value, **kwargs):
class mydialect(csv.Dialect):
pass
delimiter = ','
quoting = csv.QUOTE_ALL
quotechar = '"'
lineterminator = '\r\n'
setattr(mydialect, field_name, value)
for field_name, value in kwargs.items():
setattr(mydialect, field_name, value)
d = mydialect()

for field_name in ("delimiter", "escapechar", "quotechar"):
Expand All @@ -1100,6 +1137,10 @@ class mydialect(csv.Dialect):
self.assertRaises(csv.Error, create_invalid, field_name, "abc")
self.assertRaises(csv.Error, create_invalid, field_name, b'x')
self.assertRaises(csv.Error, create_invalid, field_name, 5)
self.assertRaises(ValueError, create_invalid, field_name, "\n")
self.assertRaises(ValueError, create_invalid, field_name, "\r")
self.assertRaises(ValueError, create_invalid, field_name, " ",
skipinitialspace=True)


class TestSniffer(unittest.TestCase):
Expand Down
@@ -0,0 +1,3 @@
Add more validation checks in the :class:`csv.Dialect` constructor.
:exc:`ValueError` is now raised if the same character is used in different
roles.
39 changes: 39 additions & 0 deletions Modules/_csv.c
Expand Up @@ -331,6 +331,33 @@ dialect_check_quoting(int quoting)
return -1;
}

static int
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect)
{
if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) {
PyErr_Format(PyExc_ValueError, "bad %s value", name);
return -1;
}
if (PyUnicode_FindChar(
dialect->lineterminator, c, 0,
PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0)
{
PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name);
return -1;
}
return 0;
}

static int
dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2)
{
if (c1 == c2 && c1 != NOT_SET) {
PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2);
return -1;
}
return 0;
}

#define D_OFF(x) offsetof(DialectObj, x)

static struct PyMemberDef Dialect_memberlist[] = {
Expand Down Expand Up @@ -508,6 +535,18 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
goto err;
}
if (dialect_check_char("delimiter", self->delimiter, self) ||
dialect_check_char("escapechar", self->escapechar, self) ||
dialect_check_char("quotechar", self->quotechar, self) ||
dialect_check_chars("delimiter", "escapechar",
self->delimiter, self->escapechar) ||
dialect_check_chars("delimiter", "quotechar",
self->delimiter, self->quotechar) ||
dialect_check_chars("escapechar", "quotechar",
self->escapechar, self->quotechar))
{
goto err;
}

ret = Py_NewRef(self);
err:
Expand Down

0 comments on commit 6ff8bed

Please sign in to comment.