Rebased from ignore_falsy_headers to ignore_blank_headers

frictionlessdata · Oct 17, 2017 · 6181264 · 6181264
1 parent fb3ec66
commit 6181264
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -140,7 +140,7 @@ Create stream class instance.
 - `allow_html (bool)` - a flag to allow html.  See [allow html](https://github.com/frictionlessdata/tabulator-py#allow-html) section.
 - `sample_size (int)` - rows count for table.sample. Set to "0" to prevent any parsing activities before actual table.iter call. In this case headers will not be extracted from the source. See [sample size](https://github.com/frictionlessdata/tabulator-py#sample-size) section.
 - `bytes_sample_size (int)` - sample size in bytes for operations like encoding detection. See [bytes sample size](https://github.com/frictionlessdata/tabulator-py#bytes-sample-size) section.
-- `ignore_false_headers (bool)` - a flag to ignore any column having a falsy header. See [ignore falsy headers](https://github.com/frictionlessdata/tabulator-py#ignore-falsy-headers) section.
+- `ignore_false_headers (bool)` - a flag to ignore any column having a blank header. See [ignore blank headers](https://github.com/frictionlessdata/tabulator-py#ignore-blank-headers) section.
 - `force_strings (bool)` - if `True` all output will be converted to strings.  See [force strings](https://github.com/frictionlessdata/tabulator-py#force-strings) section.
 - `force_parse (bool)` - if `True` on row parsing error a stream will return an empty row instead of raising an exception. See [force parse](https://github.com/frictionlessdata/tabulator-py#force-parse) section.
 - `skip_rows (int/str[])` - list of rows to skip by row number or row comment. Example: `skip_rows=[1, 2, '#', '//']` - rows 1, 2 and all rows started with `#` and `//` will be skipped. See [skip rows](https://github.com/frictionlessdata/tabulator-py#skip-rows) section.
@@ -480,9 +480,9 @@ with Stream(source, sample_size=0, bytes_sample_size=10) as stream:
 
 In this example our data file doesn't include `iso8859-2` characters in first 10 bytes. So we could see the difference in encoding detection. Note `sample_size` usage here - these two parameters are independent. Here we use `sample_size=0` to prevent rows sample creation (will fail with bad encoding).
 
-### Ignore falsy headers
+### Ignore blank headers
 
-Some data tables could have falsy headers. For example it could be an empty strings in `csv` or `None` values for inline data. By default `tabulator` processes it as an ordinary header:
+Some data tables could have blank headers. For example it could be an empty strings in `csv` or `None` values in inline data. By default `tabulator` processes it as an ordinary header:
 
 ```
 source = 'text://header1,,header3\nvalue1,value2,value3'
@@ -491,11 +491,11 @@ with Stream(source, format='csv', headers=1) as stream:
     stream.read(keyed=True) # {'header1': 'value1', '': 'value2', 'header3': 'value3'}
 ```
 
-But sometimes it's not a desired behavior. You could ignore columns with a falsy header completely using an `ignore_falsy_headers` flag:
+But sometimes it's not a desired behavior. You could ignore columns with a blank header completely using an `ignore_blank_headers` flag:
 
 ```
 source = 'text://header1,,header3\nvalue1,value2,value3'
-with Stream(source, format='csv', headers=1, ignore_false_headers=True) as stream:
+with Stream(source, format='csv', headers=1, ignore_blank_headers=True) as stream:
     stream.headers # ['header1', 'header3']
     stream.read(keyed=True) # {'header1': 'value1', 'header3': 'value3'}
 ```

diff --git a/tabulator/stream.py b/tabulator/stream.py
@@ -27,7 +27,7 @@ def __init__(self,
                  allow_html=False,
                  sample_size=config.DEFAULT_SAMPLE_SIZE,
                  bytes_sample_size=config.DEFAULT_BYTES_SAMPLE_SIZE,
-                 ignore_falsy_headers=False,
+                 ignore_blank_headers=False,
                  force_strings=False,
                  force_parse=False,
                  skip_rows=[],
@@ -68,8 +68,8 @@ def __init__(self,
         self.__allow_html = allow_html
         self.__sample_size = sample_size
         self.__bytes_sample_size = bytes_sample_size
-        self.__ignore_falsy_headers = ignore_falsy_headers
-        self.__falsy_header_indexes = []
+        self.__ignore_blank_headers = ignore_blank_headers
+        self.__blank_header_indexes = []
         self.__force_strings = force_strings
         self.__force_parse = force_parse
         self.__post_parse = copy(post_parse)
@@ -308,11 +308,11 @@ def __extract_headers(self):
                         keyed_source = True
                     else:
                         self.__headers = row
-                    if self.__ignore_falsy_headers:
+                    if self.__ignore_blank_headers:
                         for index, header in list(enumerate(self.__headers)):
-                            if not header:
+                            if header in ['', None]:
                                 del self.__headers[index]
-                                self.__falsy_header_indexes.append(index)
+                                self.__blank_header_indexes.append(index)
             if not keyed_source:
                 del self.__sample_extended_rows[:self.__headers_row]
 
@@ -343,9 +343,9 @@ def builtin_processor(extended_rows):
                 match = lambda comment: row[0].startswith(comment)
                 if list(filter(match, self.__skip_rows_by_comments)):
                     continue
-                # Ignore falsy headers
-                if self.__falsy_header_indexes:
-                    for index in self.__falsy_header_indexes:
+                # Ignore blank headers
+                if self.__blank_header_indexes:
+                    for index in self.__blank_header_indexes:
                         if index < len(row):
                             del row[index]
                 yield (row_number, headers, row)

diff --git a/tests/test_stream.py b/tests/test_stream.py
@@ -202,9 +202,9 @@ def test_stream_bytes_sample_size():
         assert stream.encoding == 'utf-8'
 
 
-# Ignore falsy headers
+# Ignore blank headers
 
-def test_stream_ignore_falsy_headers_false():
+def test_stream_ignore_blank_headers_false():
     source = 'text://header1,,header3\nvalue1,value2,value3'
     with Stream(source, format='csv', headers=1) as stream:
         assert stream.headers == ['header1', '', 'header3']
@@ -213,9 +213,9 @@ def test_stream_ignore_falsy_headers_false():
         ]
 
 
-def test_stream_ignore_falsy_headers_true():
+def test_stream_ignore_blank_headers_true():
     source = 'text://header1,,header3\nvalue1,value2,value3'
-    with Stream(source, format='csv', headers=1, ignore_falsy_headers=True) as stream:
+    with Stream(source, format='csv', headers=1, ignore_blank_headers=True) as stream:
         assert stream.headers == ['header1', 'header3']
         assert stream.read(keyed=True) == [
             {'header1': 'value1', 'header3': 'value3'},