Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Commit

Permalink
Rebased from ignore_falsy_headers to ignore_blank_headers
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed Oct 17, 2017
1 parent fb3ec66 commit 6181264
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 18 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ Create stream class instance.
- `allow_html (bool)` - a flag to allow html. See [allow html](https://github.com/frictionlessdata/tabulator-py#allow-html) section.
- `sample_size (int)` - rows count for table.sample. Set to "0" to prevent any parsing activities before actual table.iter call. In this case headers will not be extracted from the source. See [sample size](https://github.com/frictionlessdata/tabulator-py#sample-size) section.
- `bytes_sample_size (int)` - sample size in bytes for operations like encoding detection. See [bytes sample size](https://github.com/frictionlessdata/tabulator-py#bytes-sample-size) section.
- `ignore_false_headers (bool)` - a flag to ignore any column having a falsy header. See [ignore falsy headers](https://github.com/frictionlessdata/tabulator-py#ignore-falsy-headers) section.
- `ignore_false_headers (bool)` - a flag to ignore any column having a blank header. See [ignore blank headers](https://github.com/frictionlessdata/tabulator-py#ignore-blank-headers) section.
- `force_strings (bool)` - if `True` all output will be converted to strings. See [force strings](https://github.com/frictionlessdata/tabulator-py#force-strings) section.
- `force_parse (bool)` - if `True` on row parsing error a stream will return an empty row instead of raising an exception. See [force parse](https://github.com/frictionlessdata/tabulator-py#force-parse) section.
- `skip_rows (int/str[])` - list of rows to skip by row number or row comment. Example: `skip_rows=[1, 2, '#', '//']` - rows 1, 2 and all rows started with `#` and `//` will be skipped. See [skip rows](https://github.com/frictionlessdata/tabulator-py#skip-rows) section.
Expand Down Expand Up @@ -480,9 +480,9 @@ with Stream(source, sample_size=0, bytes_sample_size=10) as stream:

In this example our data file doesn't include `iso8859-2` characters in first 10 bytes. So we could see the difference in encoding detection. Note `sample_size` usage here - these two parameters are independent. Here we use `sample_size=0` to prevent rows sample creation (will fail with bad encoding).

### Ignore falsy headers
### Ignore blank headers

Some data tables could have falsy headers. For example it could be an empty strings in `csv` or `None` values for inline data. By default `tabulator` processes it as an ordinary header:
Some data tables could have blank headers. For example it could be an empty strings in `csv` or `None` values in inline data. By default `tabulator` processes it as an ordinary header:

```
source = 'text://header1,,header3\nvalue1,value2,value3'
Expand All @@ -491,11 +491,11 @@ with Stream(source, format='csv', headers=1) as stream:
stream.read(keyed=True) # {'header1': 'value1', '': 'value2', 'header3': 'value3'}
```

But sometimes it's not a desired behavior. You could ignore columns with a falsy header completely using an `ignore_falsy_headers` flag:
But sometimes it's not a desired behavior. You could ignore columns with a blank header completely using an `ignore_blank_headers` flag:

```
source = 'text://header1,,header3\nvalue1,value2,value3'
with Stream(source, format='csv', headers=1, ignore_false_headers=True) as stream:
with Stream(source, format='csv', headers=1, ignore_blank_headers=True) as stream:
stream.headers # ['header1', 'header3']
stream.read(keyed=True) # {'header1': 'value1', 'header3': 'value3'}
```
Expand Down
18 changes: 9 additions & 9 deletions tabulator/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self,
allow_html=False,
sample_size=config.DEFAULT_SAMPLE_SIZE,
bytes_sample_size=config.DEFAULT_BYTES_SAMPLE_SIZE,
ignore_falsy_headers=False,
ignore_blank_headers=False,
force_strings=False,
force_parse=False,
skip_rows=[],
Expand Down Expand Up @@ -68,8 +68,8 @@ def __init__(self,
self.__allow_html = allow_html
self.__sample_size = sample_size
self.__bytes_sample_size = bytes_sample_size
self.__ignore_falsy_headers = ignore_falsy_headers
self.__falsy_header_indexes = []
self.__ignore_blank_headers = ignore_blank_headers
self.__blank_header_indexes = []
self.__force_strings = force_strings
self.__force_parse = force_parse
self.__post_parse = copy(post_parse)
Expand Down Expand Up @@ -308,11 +308,11 @@ def __extract_headers(self):
keyed_source = True
else:
self.__headers = row
if self.__ignore_falsy_headers:
if self.__ignore_blank_headers:
for index, header in list(enumerate(self.__headers)):
if not header:
if header in ['', None]:
del self.__headers[index]
self.__falsy_header_indexes.append(index)
self.__blank_header_indexes.append(index)
if not keyed_source:
del self.__sample_extended_rows[:self.__headers_row]

Expand Down Expand Up @@ -343,9 +343,9 @@ def builtin_processor(extended_rows):
match = lambda comment: row[0].startswith(comment)
if list(filter(match, self.__skip_rows_by_comments)):
continue
# Ignore falsy headers
if self.__falsy_header_indexes:
for index in self.__falsy_header_indexes:
# Ignore blank headers
if self.__blank_header_indexes:
for index in self.__blank_header_indexes:
if index < len(row):
del row[index]
yield (row_number, headers, row)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ def test_stream_bytes_sample_size():
assert stream.encoding == 'utf-8'


# Ignore falsy headers
# Ignore blank headers

def test_stream_ignore_falsy_headers_false():
def test_stream_ignore_blank_headers_false():
source = 'text://header1,,header3\nvalue1,value2,value3'
with Stream(source, format='csv', headers=1) as stream:
assert stream.headers == ['header1', '', 'header3']
Expand All @@ -213,9 +213,9 @@ def test_stream_ignore_falsy_headers_false():
]


def test_stream_ignore_falsy_headers_true():
def test_stream_ignore_blank_headers_true():
source = 'text://header1,,header3\nvalue1,value2,value3'
with Stream(source, format='csv', headers=1, ignore_falsy_headers=True) as stream:
with Stream(source, format='csv', headers=1, ignore_blank_headers=True) as stream:
assert stream.headers == ['header1', 'header3']
assert stream.read(keyed=True) == [
{'header1': 'value1', 'header3': 'value3'},
Expand Down

0 comments on commit 6181264

Please sign in to comment.