Merge branch 'master' into readme-review-and-minor-improvements

frictionlessdata · Jul 19, 2018 · c30f8ea · c30f8ea
2 parents 57635b0 + 273f4ff
commit c30f8ea
Show file tree

Hide file tree

Showing 8 changed files with 32 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -769,6 +769,13 @@ $ make test
 
 ## Changelog
 
+Here described only breaking and the most important changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/tabulator-py/commits/master).
+
+### v1.14
+
+Updated behaviour:
+- Now `xls` booleans will be parsed as booleans not integers
+
 ### v1.13
 
 New API added:

diff --git a/data/special/merged-cells-boolean.xls b/data/special/merged-cells-boolean.xls
diff --git a/data/special/table-with-booleans.xls b/data/special/table-with-booleans.xls
diff --git a/tabulator/VERSION b/tabulator/VERSION
@@ -1,2 +1,2 @@
-1.13.0
+1.14.1
 
diff --git a/tabulator/helpers.py b/tabulator/helpers.py
@@ -99,7 +99,7 @@ def normalize_encoding(sample, encoding):
 def detect_html(text):
     """Detect if text is HTML.
     """
-    pattern = re.compile('\s*<(!doctype|html)', re.IGNORECASE)
+    pattern = re.compile('\\s*<(!doctype|html)', re.IGNORECASE)
     return bool(pattern.match(text))
 
 

diff --git a/tabulator/parsers/xls.py b/tabulator/parsers/xls.py
@@ -87,9 +87,14 @@ def __iter_extended_rows(self):
             row_number = x + 1
             row = []
             for y, value in enumerate(self.__sheet.row_values(x)):
+                # hacky way to parse booleans (cell values are integers (0/1) other way)
+                if self.__sheet.cell(x, y).ctype == xlrd.XL_CELL_BOOLEAN:
+                    value = bool(value)
                 if self.__fill_merged_cells:
                     for xlo, xhi, ylo, yhi in self.__sheet.merged_cells:
                         if x in range(xlo, xhi) and y in range(ylo, yhi):
                             value = self.__sheet.cell_value(xlo, ylo)
+                            if self.__sheet.cell(xlo, ylo).ctype == xlrd.XL_CELL_BOOLEAN:
+                                value = bool(value)
                 row.append(value)
             yield (row_number, None, row)
diff --git a/tabulator/parsers/xlsx.py b/tabulator/parsers/xlsx.py
@@ -64,7 +64,7 @@ def open(self, source, encoding=None):
         # Get sheet
         try:
             if isinstance(self.__sheet_pointer, six.string_types):
-                self.__sheet = self.__book.get_sheet_by_name(self.__sheet_pointer)
+                self.__sheet = self.__book[self.__sheet_pointer]
             else:
                 self.__sheet = self.__book.worksheets[self.__sheet_pointer - 1]
         except (KeyError, IndexError):

diff --git a/tests/formats/test_xls.py b/tests/formats/test_xls.py
@@ -65,6 +65,23 @@ def test_stream_xlsx_merged_cells_fill():
         assert stream.read() == [['data', 'data'], ['data', 'data'], ['data', 'data']]
 
 
+def test_stream_xls_with_boolean():
+    with Stream('data/special/table-with-booleans.xls') as stream:
+        assert stream.headers is None
+        assert stream.read() == [['id', 'boolean'], [1.0, True], [2.0, False]]
+
+
+def test_stream_xlsx_merged_cells_boolean():
+    source = 'data/special/merged-cells-boolean.xls'
+    with Stream(source) as stream:
+        assert stream.read() == [[True, ''], ['', ''], ['', '']]
+
+
+def test_stream_xlsx_merged_cells_fill_boolean():
+    source = 'data/special/merged-cells-boolean.xls'
+    with Stream(source, fill_merged_cells=True) as stream:
+        assert stream.read() == [[True, True], [True, True], [True, True]]
+
 # Parser
 
 def test_parser_xls():