Merge 1aa43cc into 7a7cdab

frictionlessdata · Oct 15, 2018 · 5e290e4 · 5e290e4
2 parents 7a7cdab + 1aa43cc
commit 5e290e4
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -374,7 +374,9 @@ with Stream([[1], 'bad', [3]], force_parse=True) as stream:
 
 ##### Skip rows
 
-List of row numbers and/or strings to skip. If it's a string, all rows that begin with it will be skipped (e.g. '#' and '//').
+List of row numbers and/or strings to skip. 
+If it's a string, all rows that begin with it will be skipped (e.g. '#' and '//').
+If it's the empty string, all rows that begin with an empty column will be skipped.
 
 ```python
 source = [['John', 1], ['Alex', 2], ['#Sam', 3], ['Mike', 4], ['John', 5]]

diff --git a/data/special/skip-rows.xlsx b/data/special/skip-rows.xlsx
diff --git a/tabulator/stream.py b/tabulator/stream.py
@@ -517,8 +517,12 @@ def builtin_processor(extended_rows):
                     continue
 
                 # Skip row by comments
-                match = lambda comment: row[0].startswith(comment)
-                if list(filter(match, self.__skip_rows_by_comments)):
+                match = lambda comment: (
+                    (isinstance(row[0], six.string_types) and
+                     row[0].startswith(comment)) if len(comment) > 0
+                    else row[0] in ('', None)
+                )
+                if any(map(match, self.__skip_rows_by_comments)):
                     continue
 
                 # Ignore blank headers

diff --git a/tests/test_stream.py b/tests/test_stream.py
@@ -328,6 +328,12 @@ def test_stream_skip_rows_no_double_skip():
     with Stream(source, skip_rows=[4, -1]) as stream:
         assert stream.read() == [['id', 'name'], ['1', 'english'], ["# it's a comment!"]]
 
+def test_stream_skip_rows_excel_empty_column():
+    source = 'data/special/skip-rows.xlsx'
+    with Stream(source, headers=1, skip_rows=['']) as stream:
+        assert stream.read() == [['A', 'B'], [8, 9]]
+
+
 
 # Post parse