diff --git a/.travis.yml b/.travis.yml index 3f285f6..773dde1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,8 @@ language: python python: - - "2.6" - - "2.7" + - 2.6 + - 2.7 + - 3.5 env: - DJANGO=1.5 - DJANGO=1.6 @@ -10,12 +11,22 @@ env: - DJANGO=1.9 matrix: exclude: - - python: "2.6" + - python: 2.6 + env: DJANGO=1.6 + - python: 2.6 env: DJANGO=1.7 - - python: "2.6" + - python: 2.6 env: DJANGO=1.8 - - python: "2.6" + - python: 2.6 env: DJANGO=1.9 + - python: 3.5 + env: DJANGO=1.5 + - python: 3.5 + env: DJANGO=1.6 + - python: 3.5 + env: DJANGO=1.7 + + install: - pip install -q Django==$DJANGO - pip install -r dev_requirements.txt diff --git a/dev_requirements.txt b/dev_requirements.txt index 5ba9722..b1657b6 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -2,3 +2,4 @@ coveralls==0.3 coverage==3.6 flake8==2.5.1 django>=1.5 +unicodecsv>=0.14.1 diff --git a/djqscsv/djqscsv.py b/djqscsv/djqscsv.py index 034f689..3fb62af 100644 --- a/djqscsv/djqscsv.py +++ b/djqscsv/djqscsv.py @@ -1,6 +1,7 @@ -import csv import datetime +import unicodecsv as csv + from django.core.exceptions import ValidationError from django.utils.text import slugify from django.http import HttpResponse @@ -56,14 +57,14 @@ def write_csv(queryset, file_obj, **kwargs): use_verbose_names = kwargs.get('use_verbose_names', True) field_order = kwargs.get('field_order', None) - csv_kwargs = {} + csv_kwargs = {'encoding': 'utf-8'} for key, val in six.iteritems(kwargs): if key not in DJQSCSV_KWARGS: csv_kwargs[key] = val # add BOM to support CSVs in MS Excel (for Windows only) - file_obj.write(_safe_utf8_stringify(u'\ufeff')) + file_obj.write(b'\xef\xbb\xbf') # the CSV must always be built from a values queryset # in order to introspect the necessary fields. @@ -133,12 +134,10 @@ def write_csv(queryset, file_obj, **kwargs): merged_header_map.update(dict((k, k) for k in extra_columns)) merged_header_map.update(field_header_map) - merged_header_map = dict((k, _safe_utf8_stringify(v)) - for (k, v) in merged_header_map.items()) writer.writerow(merged_header_map) for record in values_qs: - record = _sanitize_unicode_record(field_serializer_map, record) + record = _sanitize_record(field_serializer_map, record) writer.writerow(record) @@ -147,7 +146,8 @@ def generate_filename(queryset, append_datestamp=False): Takes a queryset and returns a default base filename based on the underlying model """ - base_filename = slugify(unicode(queryset.model.__name__)) + '_export.csv' + base_filename = slugify(six.text_type(queryset.model.__name__)) \ + + '_export.csv' if append_datestamp: base_filename = _append_datestamp(base_filename) @@ -167,20 +167,11 @@ def _validate_and_clean_filename(filename): else: filename = filename[:-4] - filename = slugify(unicode(filename)) + '.csv' + filename = slugify(six.text_type(filename)) + '.csv' return filename -def _safe_utf8_stringify(value): - if isinstance(value, str): - return value - elif isinstance(value, unicode): - return value.encode('utf-8') - else: - return unicode(value).encode('utf-8') - - -def _sanitize_unicode_record(field_serializer_map, record): +def _sanitize_record(field_serializer_map, record): def _serialize_value(value): # provide default serializer for the case when @@ -188,14 +179,18 @@ def _serialize_value(value): if isinstance(value, datetime.datetime): return value.isoformat() else: - return unicode(value) + return six.text_type(value) obj = {} for key, val in six.iteritems(record): if val is not None: serializer = field_serializer_map.get(key, _serialize_value) newval = serializer(val) - obj[_safe_utf8_stringify(key)] = _safe_utf8_stringify(newval) + # If the user provided serializer did not produce a string, + # coerce it to a string + if not isinstance(newval, six.text_type): + newval = six.text_type(newval) + obj[key] = newval return obj diff --git a/setup.py b/setup.py index b93dee3..82c96ba 100644 --- a/setup.py +++ b/setup.py @@ -23,5 +23,5 @@ "Framework :: Django", "License :: OSI Approved :: GNU General Public License (GPL)" ], - install_requires=['django>=1.5'], + install_requires=['django>=1.5', 'unicodecsv>=0.14.1'], ) diff --git a/test_app/djqscsv_tests/tests/test_csv_creation.py b/test_app/djqscsv_tests/tests/test_csv_creation.py index 843c98c..c2ae0b1 100644 --- a/test_app/djqscsv_tests/tests/test_csv_creation.py +++ b/test_app/djqscsv_tests/tests/test_csv_creation.py @@ -3,8 +3,8 @@ from django import VERSION as DJANGO_VERSION -import csv -import itertools +import unicodecsv as csv +from io import BytesIO from djqscsv_tests.context import djqscsv @@ -12,12 +12,10 @@ from djqscsv_tests.util import create_people_and_get_queryset -from django.utils import six - -if six.PY3: - from io import StringIO -else: - from StringIO import StringIO +try: + from django.utils.six.moves import zip_longest +except ImportError: + from itertools import izip_longest as zip_longest class CSVTestCase(TestCase): @@ -27,15 +25,14 @@ def setUp(self): def csv_match(self, csv_file, expected_data, **csv_kwargs): assertion_results = [] - csv_data = csv.reader(csv_file, **csv_kwargs) + csv_data = csv.reader(csv_file, encoding='utf-8', **csv_kwargs) iteration_happened = False is_first = True - test_pairs = itertools.izip_longest(csv_data, expected_data, - fillvalue=[]) + test_pairs = list(zip_longest(csv_data, expected_data, fillvalue=[])) for csv_row, expected_row in test_pairs: if is_first: # add the BOM to the data - expected_row = (['\xef\xbb\xbf' + expected_row[0]] + + expected_row = ([u'\ufeff' + expected_row[0]] + expected_row[1:]) is_first = False iteration_happened = True @@ -54,14 +51,14 @@ def assertNotMatchesCsv(self, *args, **kwargs): self.assertFalse(all(assertion_results)) def assertQuerySetBecomesCsv(self, qs, expected_data, **kwargs): - obj = StringIO() + obj = BytesIO() djqscsv.write_csv(qs, obj, **kwargs) - csv_file = filter(None, obj.getvalue().split('\n')) + csv_file = filter(None, obj.getvalue().splitlines()) self.assertMatchesCsv(csv_file, expected_data) def assertEmptyQuerySetMatches(self, expected_data, **kwargs): qs = self.qs.none() - obj = StringIO() + obj = BytesIO() if DJANGO_VERSION[:2] == (1, 5): with self.assertRaises(djqscsv.CSVException): djqscsv.write_csv(qs, obj) @@ -119,7 +116,7 @@ def test_write_csv_limited_no_verbose(self): def test_empty_queryset_no_verbose(self): self.assertEmptyQuerySetMatches( - '\xef\xbb\xbfid,name,address,info,hobby_id,born\r\n', + b'\xef\xbb\xbfid,name,address,info,hobby_id,born\r\n', use_verbose_names=False) @@ -134,8 +131,8 @@ def test_write_csv_limited(self): def test_empty_queryset(self): self.assertEmptyQuerySetMatches( - '\xef\xbb\xbfID,Person\'s name,address,' - 'Info on Person,hobby_id,born\r\n') + b'\xef\xbb\xbfID,Person\'s name,address,' + b'Info on Person,hobby_id,born\r\n') class FieldHeaderMapTests(CSVTestCase): @@ -174,8 +171,8 @@ def test_write_csv_with_related_custom_headers(self): def test_empty_queryset_custom_headers(self): self.assertEmptyQuerySetMatches( - '\xef\xbb\xbfID,Person\'s name,' - 'address,INFORMATION,hobby_id,born\r\n', + b'\xef\xbb\xbfID,Person\'s name,' + b'address,INFORMATION,hobby_id,born\r\n', field_header_map={'info': 'INFORMATION'}) @@ -301,7 +298,7 @@ def test_render_to_csv_response_no_filename(self): response = djqscsv.render_to_csv_response(self.qs, use_verbose_names=False) self.assertEqual(response['Content-Type'], 'text/csv') - self.assertMatchesCsv(response.content.split('\n'), + self.assertMatchesCsv(response.content.splitlines(), self.FULL_PERSON_CSV_NO_VERBOSE) self.assertRegexpMatches(response['Content-Disposition'], @@ -312,7 +309,7 @@ def test_render_to_csv_response(self): filename="test_csv", use_verbose_names=False) self.assertEqual(response['Content-Type'], 'text/csv') - self.assertMatchesCsv(response.content.split('\n'), + self.assertMatchesCsv(response.content.splitlines(), self.FULL_PERSON_CSV_NO_VERBOSE) def test_render_to_csv_response_other_delimiter(self): @@ -322,7 +319,7 @@ def test_render_to_csv_response_other_delimiter(self): delimiter='|') self.assertEqual(response['Content-Type'], 'text/csv') - self.assertMatchesCsv(response.content.split('\n'), + self.assertMatchesCsv(response.content.splitlines(), self.FULL_PERSON_CSV_NO_VERBOSE, delimiter="|") @@ -333,5 +330,5 @@ def test_render_to_csv_fails_on_delimiter_mismatch(self): delimiter='|') self.assertEqual(response['Content-Type'], 'text/csv') - self.assertNotMatchesCsv(response.content.split('\n'), + self.assertNotMatchesCsv(response.content.splitlines(), self.FULL_PERSON_CSV_NO_VERBOSE) diff --git a/test_app/djqscsv_tests/tests/test_utilities.py b/test_app/djqscsv_tests/tests/test_utilities.py index cebd8a2..671e9af 100644 --- a/test_app/djqscsv_tests/tests/test_utilities.py +++ b/test_app/djqscsv_tests/tests/test_utilities.py @@ -6,8 +6,6 @@ from django.test import TestCase from django.core.exceptions import ValidationError -from django.utils.encoding import python_2_unicode_compatible - from djqscsv_tests.context import djqscsv from djqscsv_tests.util import create_people_and_get_queryset @@ -49,15 +47,15 @@ class SanitizeUnicodeRecordTests(TestCase): def test_sanitize(self): record = {'name': 'Tenar', 'nickname': u'\ufeffThe White Lady of Gont'} - sanitized = djqscsv._sanitize_unicode_record({}, record) + sanitized = djqscsv._sanitize_record({}, record) self.assertEqual(sanitized, {'name': 'Tenar', - 'nickname': '\xef\xbb\xbfThe White Lady of Gont'}) + 'nickname': u'\ufeffThe White Lady of Gont'}) def test_sanitize_date(self): record = {'name': 'Tenar', 'created': datetime.datetime(1, 1, 1)} - sanitized = djqscsv._sanitize_unicode_record({}, record) + sanitized = djqscsv._sanitize_record({}, record) self.assertEqual(sanitized, {'name': 'Tenar', 'created': '0001-01-01T00:00:00'}) @@ -70,14 +68,14 @@ def test_sanitize_date_with_non_string_formatter(self): """ record = {'name': 'Tenar'} serializer = {'name': lambda d: len(d)} - sanitized = djqscsv._sanitize_unicode_record(serializer, record) + sanitized = djqscsv._sanitize_record(serializer, record) self.assertEqual(sanitized, {'name': '5'}) def test_sanitize_date_with_formatter(self): record = {'name': 'Tenar', 'created': datetime.datetime(1973, 5, 13)} serializer = {'created': lambda d: d.strftime('%Y-%m-%d')} - sanitized = djqscsv._sanitize_unicode_record(serializer, record) + sanitized = djqscsv._sanitize_record(serializer, record) self.assertEqual(sanitized, {'name': 'Tenar', 'created': '1973-05-13'}) @@ -86,7 +84,7 @@ def test_sanitize_date_with_bad_formatter(self): record = {'name': 'Tenar', 'created': datetime.datetime(1973, 5, 13)} with self.assertRaises(AttributeError): - djqscsv._sanitize_unicode_record(attrgetter('day'), record) + djqscsv._sanitize_record(attrgetter('day'), record) class AppendDatestampTests(TestCase): @@ -118,24 +116,3 @@ def test_generate_filename(self): self.assertRegexpMatches(djqscsv.generate_filename(qs, True), r'person_export_[0-9]{8}.csv') - - -class SafeUtf8EncodeTest(TestCase): - def test_safe_utf8_encode(self): - - @python_2_unicode_compatible - class Foo(object): - def __str__(self): - return u'¯\_(ツ)_/¯' - - for val in (u'¯\_(ツ)_/¯', 'plain', r'raw', - b'123', 11312312312313, False, - datetime.datetime(2001, 1, 1), - 4, None, [], set(), Foo): - - first_pass = djqscsv._safe_utf8_stringify(val) - second_pass = djqscsv._safe_utf8_stringify(first_pass) - third_pass = djqscsv._safe_utf8_stringify(second_pass) - self.assertEqual(first_pass, second_pass) - self.assertEqual(second_pass, third_pass) - self.assertEqual(type(first_pass), type(third_pass))