Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Merge pull request #109 from alphagov/cross-platform-newlines-for-csv…
Browse files Browse the repository at this point in the history
…-upload

Cross platform newlines for csv upload
  • Loading branch information
maxfliri committed Jun 24, 2013
2 parents 7986721 + 0c23d92 commit 77fee6c
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 13 deletions.
8 changes: 7 additions & 1 deletion backdrop/core/parse_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@ def parse_csv(incoming_data):
return list(
parse_rows(
ignore_comment_column(unicode_csv_dict_reader(
ignore_comment_lines(incoming_data), 'utf-8')
ignore_comment_lines(lines(incoming_data)), 'utf-8')
)
)
)


def lines(stream):
for candidate_line in stream:
for line in candidate_line.splitlines(True):
yield line


def is_empty_row(row):
return all(not v for v in row.values())

Expand Down
85 changes: 73 additions & 12 deletions tests/core/test_csv.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,33 @@
# -*- coding: utf-8 -*-

from StringIO import StringIO
from cStringIO import StringIO
import unittest
import cStringIO
from hamcrest import assert_that, only_contains, is_

from backdrop.core.parse_csv import parse_csv
from backdrop.core.parse_csv import parse_csv, lines
from backdrop.core.errors import ParseError


class ParseCsvTestCase(unittest.TestCase):
def test_parse_csv(self):
csv_stream = StringIO("a,b\nx,y\nq,w")
csv_stream = _string_io("a,b\nx,y\nq,w")

data = parse_csv(csv_stream)

assert_that(data,
only_contains({"a": "x", "b": "y"}, {"a": "q", "b": "w"}))

def test_parse_empty_csv(self):
csv_stream = StringIO("")
csv_stream = _string_io("")

data = parse_csv(csv_stream)

assert_that(data, is_([]))

def test_parse_utf8_data(self):
csv = u"a,b\nà,ù"
csv_stream = StringIO(csv.encode("utf-8"))
csv_stream = _string_io(csv, "utf-8")

data = parse_csv(csv_stream)

Expand All @@ -35,25 +36,25 @@ def test_parse_utf8_data(self):
))

def test_error_when_values_for_columns_are_missing(self):
incoming_data = StringIO("a,b\nx,y\nq")
incoming_data = _string_io("a,b\nx,y\nq")

self.assertRaises(ParseError, parse_csv, incoming_data)

def test_error_when_there_are_more_values_than_columns(self):
incoming_data = StringIO("a,b\nx,y,s,d\nq,w")
incoming_data = _string_io("a,b\nx,y,s,d\nq,w")

self.assertRaises(ParseError, parse_csv, incoming_data)

def test_error_when_input_is_not_utf8(self):
csv = u"a,b\nà,ù"

csv_stream = StringIO(csv.encode("iso-8859-1"))
csv_stream = _string_io(csv, "iso-8859-1")

self.assertRaises(ParseError, parse_csv, csv_stream)

def test_ignore_when_empty_row(self):
csv = u"a,b\n,\nc,d"
csv_stream = StringIO(csv.encode("utf-8"))
csv_stream = _string_io(csv, "utf-8")

data = parse_csv(csv_stream)

Expand All @@ -63,7 +64,7 @@ def test_ignore_when_empty_row(self):

def test_accept_when_some_values_empty(self):
csv = u"a,b\n,\nc,d\nc,"
csv_stream = StringIO(csv.encode("utf-8"))
csv_stream = _string_io(csv, "utf-8")

data = parse_csv(csv_stream)

Expand All @@ -74,7 +75,7 @@ def test_accept_when_some_values_empty(self):

def test_ignore_comments(self):
csv = u"# top comment\na,b\n# any random comment\nc,d"
csv_stream = StringIO(csv.encode("utf-8"))
csv_stream = _string_io(csv, "utf-8")

data = parse_csv(csv_stream)

Expand All @@ -84,10 +85,70 @@ def test_ignore_comments(self):

def test_ignore_values_in_comments_column(self):
csv = u"a,comment,b\nc,d,e"
csv_stream = StringIO(csv.encode("utf-8"))
csv_stream = _string_io(csv, "utf-8")

data = parse_csv(csv_stream)

assert_that(data, only_contains(
{"a": u"c", "b": u"e"}
))

def test_accept_csv_with_CR_as_line_separator(self):
csv = u"prop1,prop2\rvalue 1,value 2"
csv_stream = _string_io(csv, "utf-8")

data = parse_csv(csv_stream)

assert_that(data, only_contains(
{"prop1": "value 1", "prop2": "value 2"}
))

def test_accept_csv_with_CRLF_as_line_separator(self):
csv = u"prop1,prop2\r\nvalue 1,value 2"
csv_stream = _string_io(csv, "utf-8")

data = parse_csv(csv_stream)

assert_that(data, only_contains(
{"prop1": "value 1", "prop2": "value 2"}
))

def test_preserve_newlines_in_quoted_values(self):
csv = u"prop1,prop2\nvalue,\"value\nwith newline\""

csv_stream = _string_io(csv, "utf-8")

data = parse_csv(csv_stream)

assert_that(data, only_contains(
{"prop1": "value", "prop2": "value\nwith newline"}
))


class LinesGeneratorTest(unittest.TestCase):
def test_handles_CR_LF_and_CRLF(self):
text = "1\n2\r3\r\n4"

lines_list = list(lines(_string_io(text)))

assert_that(lines_list, is_(["1\n", "2\r", "3\r\n", "4"]))

def test_handles_emptylines(self):
text = "q\n\rw\r\r\ne"

lines_list = list(lines(_string_io(text)))

assert_that(lines_list, is_(["q\n", "\r", "w\r", "\r\n", "e"]))

def test_ignores_trailing_empty_line(self):
text = "asd\n"

lines_list = list(lines(_string_io(text)))

assert_that(lines_list, is_(["asd\n"]))


def _string_io(content, encoding=None):
if encoding is not None:
content = content.encode(encoding)
return cStringIO.StringIO(content)

0 comments on commit 77fee6c

Please sign in to comment.