Merge branch 'master' into patch-1

frictionlessdata · Sep 26, 2020 · 43969db · 43969db
2 parents 3b38d5f + 9580d8d
commit 43969db
Show file tree

Hide file tree

Showing 9 changed files with 92 additions and 9 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -8,7 +8,9 @@ language:
   python
 
 python:
-  - 2.7
+  # NOTE: Recover after a fix:
+  # https://github.com/tobgu/pyrsistent/issues/208
+  # - 2.7
   - 3.6
   - 3.7
   - 3.8

diff --git a/README.md b/README.md
@@ -8,6 +8,11 @@
 
 A library for reading and writing tabular data (csv/xls/json/etc).
 
+> **[Important Notice]** We have released [Frictionless Framework](https://github.com/frictionlessdata/frictionless-py). This framework is logical continuation of `tabulator` that was extended to be a complete data solution. The change in not breaking for the existing software so no actions are required. Please read the [Migration Guide](https://github.com/frictionlessdata/frictionless-py/blob/master/docs/target/migration-guide/README.md) from `tabulator` to Frictionless Framework.
+> - we continue to bug-fix `tabulator@1.x` in this [repository](https://github.com/frictionlessdata/tabulator-py) as well as it's available on [PyPi](https://pypi.org/project/tabulator/) as it was before
+> - please note that `frictionless@3.x` version's API, we're working on at the moment, is not stable
+> - we will release `frictionless@4.x` by the end of 2020 to be the first SemVer/stable version
+
 ## Features
 
 - **Supports most common tabular formats**: CSV, XLS, ODS, JSON, Google Sheets, SQL, and others. See complete list [below](#supported-file-formats).

diff --git a/data/matrix.csv b/data/matrix.csv
@@ -0,0 +1,5 @@
+f1,f2,f3,f4
+11,12,13,14
+21,22,23,24
+31,32,33,34
+41,42,43,44
diff --git a/data/special/number_format_multicode.xlsx b/data/special/number_format_multicode.xlsx
diff --git a/tabulator/VERSION b/tabulator/VERSION
@@ -1 +1 @@
-1.52.1
+1.52.3
diff --git a/tabulator/parsers/xlsx.py b/tabulator/parsers/xlsx.py
@@ -360,7 +360,14 @@ def convert_excel_number_format_string(excel_number, value):
         percentage = True
     if excel_number == "General":
         return value
-    code = excel_number.split(".")
+    multi_codes = excel_number.split(";")
+    if value < 0 and len(multi_codes) > 1:
+        excel_number = multi_codes[1]
+    else:
+        excel_number = multi_codes[0]
+
+    code = excel_number.split('.')
+
     if len(code) > 2:
         return None
     if len(code) < 2:

diff --git a/tabulator/stream.py b/tabulator/stream.py
@@ -888,18 +888,14 @@ def builtin_processor(extended_rows):
                     if headers and self.__headers:
                         keyed_row = dict(zip(headers, row))
                         row = [keyed_row.get(header) for header in self.__headers]
+                    elif self.__ignored_headers_indexes:
+                        row = [value for index, value in enumerate(row) if index not in self.__ignored_headers_indexes]
                     headers = self.__headers
 
                 # Skip rows by numbers/comments
                 if self.__check_if_row_for_skipping(row_number, headers, row):
                     continue
 
-                # Ignore headers
-                if self.__ignored_headers_indexes:
-                    for index in self.__ignored_headers_indexes:
-                        if index < len(row):
-                            row = row[:index] + row[index+1:]
-
                 yield (row_number, headers, row)
 
         # Skip nagative rows processor

diff --git a/tests/formats/test_xlsx.py b/tests/formats/test_xlsx.py
@@ -121,6 +121,16 @@ def test_stream_xlsx_preserve_formatting_percentage():
         ]
 
 
+
+def test_stream_xlsx_preserve_formatting_number_multicode():
+    source = "data/special/number_format_multicode.xlsx"
+    with Stream(
+        source, headers=1, ignore_blank_headers=True, preserve_formatting=True
+    ) as stream:
+        assert stream.read() == [["4.5"], ["-9.032"], ["15.8"]]
+
+
+
 def test_stream_xlsx_workbook_cache():
     workbook_cache = {}
     source = BASE_URL % "data/special/sheets.xlsx"

diff --git a/tests/test_stream.py b/tests/test_stream.py
@@ -492,6 +492,22 @@ def test_stream_pick_fields_position_and_prefix():
         ]
 
 
+def test_stream_pick_fields_keyed_source():
+    source = [{'id': 1, 'name': 'london'}, {'id': 2, 'name': 'paris'}]
+    with Stream(source, headers=1, skip_fields=['id']) as stream:
+        assert stream.headers == ['name']
+        assert stream.read() == [['london'], ['paris']]
+    with Stream(source, headers=1, skip_fields=[1]) as stream:
+        assert stream.headers == ['name']
+        assert stream.read() == [['london'], ['paris']]
+    with Stream(source, headers=1, skip_fields=['name']) as stream:
+        assert stream.headers == ['id']
+        assert stream.read() == [[1], [2]]
+    with Stream(source, headers=1, skip_fields=[2]) as stream:
+        assert stream.headers == ['id']
+        assert stream.read() == [[1], [2]]
+
+
 def test_stream_limit_fields():
     source = 'text://header1,header2,header3\nvalue1,value2,value3'
     with Stream(source, format='csv', headers=1, limit_fields=1) as stream:
@@ -522,6 +538,48 @@ def test_stream_limit_offset_fields():
         ]
 
 
+def test_stream_matrix_pick_fields():
+    with Stream('data/matrix.csv', headers=1, pick_fields=[2, 'f3']) as stream:
+        assert stream.headers == ['f2', 'f3']
+        assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]
+
+
+def test_stream_matrix_pick_fields_regex():
+    with Stream('data/matrix.csv', headers=1, pick_fields=[{'type': 'regex', 'value': 'f[23]'}]) as stream:
+        assert stream.headers == ['f2', 'f3']
+        assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]
+
+
+def test_stream_matrix_skip_fields():
+    with Stream('data/matrix.csv', headers=1, skip_fields=[1, 'f4']) as stream:
+        assert stream.headers == ['f2', 'f3']
+        assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]
+
+
+def test_stream_matrix_skip_fields_regex():
+    with Stream('data/matrix.csv', headers=1, skip_fields=[{'type': 'regex', 'value': 'f[14]'}]) as stream:
+        assert stream.headers == ['f2', 'f3']
+        assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]
+
+
+def test_stream_matrix_limit_fields():
+    with Stream('data/matrix.csv', headers=1, limit_fields=1) as stream:
+        assert stream.headers == ['f1']
+        assert stream.read() == [['11'], ['21'], ['31'], ['41']]
+
+
+def test_stream_matrix_offset_fields():
+    with Stream('data/matrix.csv', headers=1, offset_fields=3) as stream:
+        assert stream.headers == ['f4']
+        assert stream.read() == [['14'], ['24'], ['34'], ['44']]
+
+
+def test_stream_matrix_limit_and_offset_fields():
+    with Stream('data/matrix.csv', headers=1, limit_fields=2, offset_fields=1) as stream:
+        assert stream.headers == ['f2', 'f3']
+        assert stream.read() == [['12', '13'], ['22', '23'], ['32', '33'], ['42', '43']]
+
+
 # Pick/skip/limit/offset rows
 
 def test_stream_pick_rows():