From cf562863092471f91e468b341a426eead1f14379 Mon Sep 17 00:00:00 2001 From: Adam Kariv Date: Thu, 18 Oct 2018 12:26:44 +0300 Subject: [PATCH] Allow empty/Excel files by handling 'None' encoding (#228) * Allow empty files, handle 'None' encoding * Only set encoding if encoding was detected --- data/empty.csv | 0 datapackage/resource.py | 6 ++++-- tests/test_infer.py | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 data/empty.csv diff --git a/data/empty.csv b/data/empty.csv new file mode 100644 index 0000000..e69de29 diff --git a/datapackage/resource.py b/datapackage/resource.py index e71e0a2..a5585e7 100644 --- a/datapackage/resource.py +++ b/datapackage/resource.py @@ -253,8 +253,10 @@ def infer(self, **options): for chunk in stream: contents += chunk if len(contents) > 1000: break - encoding = cchardet.detect(contents)['encoding'].lower() - descriptor['encoding'] = 'utf-8' if encoding == 'ascii' else encoding + encoding = cchardet.detect(contents)['encoding'] + if encoding is not None: + encoding = encoding.lower() + descriptor['encoding'] = 'utf-8' if encoding == 'ascii' else encoding # Schema if not descriptor.get('schema'): diff --git a/tests/test_infer.py b/tests/test_infer.py index 3d7ed2b..07aa940 100644 --- a/tests/test_infer.py +++ b/tests/test_infer.py @@ -29,3 +29,7 @@ def test_infer(): def test_infer_non_utf8_file(): descriptor = infer('data/data_with_accents.csv') assert descriptor['resources'][0]['encoding'] == 'iso-8859-1' + +def test_infer_empty_file(): + descriptor = infer('data/empty.csv') + assert descriptor['resources'][0].get('encoding') is None