Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Ability to skip last rows. #225

Merged
merged 4 commits into from
Dec 27, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ language:

python:
- 2.7
- 3.3
# - 3.3 # pytest does not support 3.3 any more
# - 3.4
- 3.5
- 3.6
Expand Down
53 changes: 32 additions & 21 deletions tabulator/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,10 +416,17 @@ def __detect_html(self):
raise exceptions.FormatError(message)

def __apply_processors(self, iterator):
# last row counter will be incremented in builtin_processor()
# and used in skip_negative_rows() to count rows from the end
last_row_number = 0
rows_to_skip_from_end = [n for n in self.__skip_rows_by_numbers if n < 0]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why create a list here if you're not using it?


# Builtin processor
def builtin_processor(extended_rows):
global last_row_number

for row_number, headers, row in extended_rows:
last_row_number = row_number

# Sync headers/row
if headers != self.__headers:
Expand Down Expand Up @@ -452,30 +459,34 @@ def skip_negative_rows(extended_rows):
Rows to skip are taken from Stream.__skip_rows_by_numbers
"""
rows_to_skip = [n for n in self.__skip_rows_by_numbers if n < 0]
if not rows_to_skip:
for row in extended_rows:
buffer_size = abs(min(rows_to_skip))
# collections.deque - takes O[1] time to push/pop values from any side.
buffer = deque()

# use buffer to save last rows
for row in extended_rows:
buffer.append(row)
if len(buffer) > buffer_size:
yield buffer.popleft()

# Now squeeze out the buffer
global last_row_number
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why use the global? usually it's a sign of bad design.
my conclusion is that you should run the 'remove rows from end' bit before the original 'remove_rows' processor - and then this entire thing is not needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh my stupid head :) that's so obvious )))

# with last_row_number, we could transform negative row numbers to positive
rows_to_skip_positive = [last_row_number + 1 + n for n in rows_to_skip]
for row in buffer:
if row[0] not in rows_to_skip_positive:
yield row
else:
buffer_size = abs(min(rows_to_skip)) + 1
# collections.deque - takes O[1] time to push/pop values from any side.
buffer = deque()

# use buffer to save last rows
for row in extended_rows:
buffer.append(row)
if len(buffer) == buffer_size:
yield buffer.popleft()

# Now squeeze out the buffer
last_row_number = buffer[len(buffer)-1][0]
# with last_row_number, we could transform negative row numbers to positive
rows_to_skip_positive = [last_row_number + 1 + n for n in rows_to_skip]
for row in buffer:
if row[0] not in rows_to_skip_positive:
yield row

# form a processors list
processors = [builtin_processor]

if rows_to_skip_from_end:
processors.append(skip_negative_rows)

if self.__post_parse:
processors += self.__post_parse

# Apply processors to iterator
processors = [builtin_processor, skip_negative_rows] + self.__post_parse
for processor in processors:
iterator = processor(iterator)

Expand Down
4 changes: 4 additions & 0 deletions tests/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,10 @@ def test_stream_skip_rows_no_double_skip():
with Stream(source, skip_rows=[3, -2]) as stream:
assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]

# no double skip at the very last row
with Stream(source, skip_rows=[4, -1]) as stream:
assert stream.read() == [['id', 'name'], ['1', 'english'], ["# it's a comment!"]]


# Post parse

Expand Down
1 change: 0 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package=tabulator
skip_missing_interpreters=true
envlist=
py27
py33
py34
py35
py36
Expand Down