frictionlessdata · roll · Jan 30, 2020 · Dec 19, 2019 · Dec 19, 2019 · Jan 30, 2020
diff --git a/tabulator/stream.py b/tabulator/stream.py
@@ -4,6 +4,7 @@
 from __future__ import absolute_import
 from __future__ import unicode_literals
 
+import re
 import six
 import gzip
 import zipfile
@@ -84,9 +85,10 @@ class Stream(object):
             to False.
 
         skip_rows (List[Union[int, str]], optional):
-            List of row numbers and
-            strings to skip. If a string, it'll skip rows that begin with it
-            (e.g. '#' and '//').
+            List of row numbers, strings and regex patterns to skip.
+            If a string, it'll skip rows that begin with it e.g. '#' and '//'.
+            To provide a regext pattern start it with `^` e.g. `^skip_me.*`
+            For example: `skip_rows=[1, '# comment', '^# (regex|comment)']`
 
         post_parse (List[function], optional):
             List of generator functions that
@@ -152,10 +154,13 @@ def __init__(self,
 
         # Set skip rows
         self.__skip_rows_by_numbers = []
+        self.__skip_rows_by_patterns = []
         self.__skip_rows_by_comments = []
         for directive in copy(skip_rows):
             if isinstance(directive, int):
                 self.__skip_rows_by_numbers.append(directive)
+            elif directive.startswith('^'):
+                self.__skip_rows_by_patterns.append(re.compile(directive))
             else:
                 self.__skip_rows_by_comments.append(str(directive))
 
@@ -716,15 +721,21 @@ def __check_if_row_for_skipping(self, row_number, headers, row):
         if row_number in self.__skip_rows_by_numbers:
             return True
 
+        # Get first cell
+        cell = row[0] if row else None
+
+        # Handle empty cell
+        if cell is None:
+            return '' in self.__skip_rows_by_comments
+
+        # Skip by pattern
+        for pattern in self.__skip_rows_by_patterns:
+            if bool(pattern.match(cell)):
+                return True
+
         # Skip by comment
-        if not row:
-            return False
-        match = lambda comment: (
-            (isinstance(row[0], six.string_types) and
-             row[0].startswith(comment)) if len(comment) > 0
-            else row[0] in ('', None)
-        )
-        if any(map(match, self.__skip_rows_by_comments)):
-            return True
+        for comment in filter(None, self.__skip_rows_by_comments):
+            if cell.startswith(comment):
+                return True
 
         return False
diff --git a/tests/schemes/test_aws.py b/tests/schemes/test_aws.py
@@ -23,7 +23,7 @@
 # Stream
 
 # https://github.com/frictionlessdata/tabulator-py/issues/271
-@pytest.mark.skipif(os.environ.get('TRAVIS') == 'true', reason='See issue #271')
+@pytest.mark.skip
 def test_stream_s3(s3_client, bucket):
 
     # Upload a file
@@ -40,7 +40,7 @@ def test_stream_s3(s3_client, bucket):
 
 
 # https://github.com/frictionlessdata/tabulator-py/issues/271
-@pytest.mark.skipif(os.environ.get('TRAVIS') == 'true', reason='See issue #271')
+@pytest.mark.skip
 def test_stream_s3_endpoint_url(s3_client, bucket):
 
     # Upload a file
@@ -57,7 +57,7 @@ def test_stream_s3_endpoint_url(s3_client, bucket):
 
 
 # https://github.com/frictionlessdata/tabulator-py/issues/271
-@pytest.mark.skipif(os.environ.get('TRAVIS') == 'true', reason='See issue #271')
+@pytest.mark.skip
 def test_stream_s3_non_existent_file(s3_client, bucket):
     with pytest.raises(exceptions.IOError):
         Stream('s3://%s/table.csv' % bucket).open()

diff --git a/tests/test_stream.py b/tests/test_stream.py
@@ -372,6 +372,13 @@ def test_stream_skip_rows_with_headers_example_from_readme():
         assert stream.read() == [['John', 1], ['Alex', 2]]
 
 
+def test_stream_skip_rows_regex():
+    source = [['# comment'], ['name', 'order'], ['# cat'], ['# dog'], ['John', 1], ['Alex', 2]]
+    with Stream(source, headers=1, skip_rows=['# comment', r'^# (cat|dog)']) as stream:
+        assert stream.headers == ['name', 'order']
+        assert stream.read() == [['John', 1], ['Alex', 2]]
+
+
 # Post parse
 
 def test_stream_post_parse_headers():