frictionlessdata · roll · Dec 27, 2017 · Dec 18, 2017 · Dec 20, 2017 · Dec 20, 2017
diff --git a/README.md b/README.md
@@ -143,7 +143,7 @@ Create stream class instance.
 - `ignore_blank_headers (bool)` - a flag to ignore any column having a blank header. See [ignore blank headers](https://github.com/frictionlessdata/tabulator-py#ignore-blank-headers) section.
 - `force_strings (bool)` - if `True` all output will be converted to strings.  See [force strings](https://github.com/frictionlessdata/tabulator-py#force-strings) section.
 - `force_parse (bool)` - if `True` on row parsing error a stream will return an empty row instead of raising an exception. See [force parse](https://github.com/frictionlessdata/tabulator-py#force-parse) section.
-- `skip_rows (int/str[])` - list of rows to skip by row number or row comment. Example: `skip_rows=[1, 2, '#', '//']` - rows 1, 2 and all rows started with `#` and `//` will be skipped. See [skip rows](https://github.com/frictionlessdata/tabulator-py#skip-rows) section.
+- `skip_rows (int/str[])` - list of rows to skip by row number or row comment. Example: `skip_rows=[1, 2, -1, -3, '#', '//']` - rows 1, 2 and rows 1, 3 from the end and all rows started with `#` and `//` will be skipped. See [skip rows](https://github.com/frictionlessdata/tabulator-py#skip-rows) section.
 - `post_parse (generator[])` - post parse processors (hooks). Signature to follow is `processor(extended_rows) -> yield (row_number, headers, row)` which should yield one extended row per yield instruction. See [post parse](https://github.com/frictionlessdata/tabulator-py#post-parse) section.
 - `custom_loaders (dict)` - loaders keyed by scheme. See a section below. See [custom loaders](https://github.com/frictionlessdata/tabulator-py#custom-loaders) section.
 - `custom_parsers (dict)` - custom parsers keyed by format. See a section below. See [custom parsers](https://github.com/frictionlessdata/tabulator-py#custom-parsers) section.
@@ -570,14 +570,14 @@ with Stream([[1], 'bad', [3]]) as stream:
 ### Skip rows
 
 It's a very common situation when your tabular data contains some rows you want to skip. It could be blank rows or commented rows. `Stream` constructors accepts `skip_rows` argument to make it possible. Value of this argument should be a list of integers and strings where:
-- integer is a row number starting from 1
+- integer is a row number (1 is the first row, -1 is the last)
 - string is a first row chars indicating that row is a comment
 
-Let's skip first, second and commented by '#' symbol rows:
+Let's skip first, second, last and commented by '#' symbol rows:
 
 ```python
-source = [['John', 1], ['Alex', 2], ['#Sam', 3], ['Mike', 4]]
-with Stream(source, skip_rows=[1, 2, '#']) as stream
+source = [['John', 1], ['Alex', 2], ['#Sam', 3], ['Mike', 4], ['John', 5]]
+with Stream(source, skip_rows=[1, 2, -1, '#']) as stream:
   stream.read() # [['Mike', 4]]
 ```
 

diff --git a/tabulator/stream.py b/tabulator/stream.py
@@ -11,6 +11,7 @@
 import warnings
 from copy import copy
 from itertools import chain
+from collections import deque
 from .loaders.stream import StreamLoader
 from . import exceptions
 from . import helpers
@@ -444,8 +445,37 @@ def builtin_processor(extended_rows):
 
                 yield (row_number, headers, row)
 
+        def skip_negative_rows(extended_rows):
+            """
+            This processor will skip rows which counts from the end, e.g.
+            -1: skip last row, -2: skip pre-last row, etc.
+            Rows to skip are taken from  Stream.__skip_rows_by_numbers
+            """
+            rows_to_skip = [n for n in self.__skip_rows_by_numbers if n < 0]
+            if not rows_to_skip:
+                for row in extended_rows:
+                    yield row
+            else:
+                buffer_size = abs(min(rows_to_skip)) + 1
+                # collections.deque - takes O[1] time to push/pop values from any side.
+                buffer = deque()
+
+                # use buffer to save last rows
+                for row in extended_rows:
+                    buffer.append(row)
+                    if len(buffer) == buffer_size:
+                        yield buffer.popleft()
+
+                # Now squeeze out the buffer
+                last_row_number = buffer[len(buffer)-1][0]
+                # with last_row_number, we could transform negative row numbers to positive
+                rows_to_skip_positive = [last_row_number + 1 + n for n in rows_to_skip]
+                for row in buffer:
+                    if row[0] not in rows_to_skip_positive:
+                        yield row
+
         # Apply processors to iterator
-        processors = [builtin_processor] + self.__post_parse
+        processors = [builtin_processor, skip_negative_rows] + self.__post_parse
         for processor in processors:
             iterator = processor(iterator)
 

diff --git a/tests/test_stream.py b/tests/test_stream.py
@@ -302,6 +302,21 @@ def test_stream_skip_rows_with_headers():
         assert stream.read() == [['2', '中国人']]
 
 
+def test_stream_skip_rows_from_the_end():
+    source = 'data/special/skip-rows.csv'
+    with Stream(source, skip_rows=[-2, 1]) as stream:
+        assert stream.read() == [['1', 'english'], ['2', '中国人']]
+
+    with Stream(source, skip_rows=[-1, -2]) as stream:
+        assert stream.read() == [['id', 'name'], ['1', 'english']]
+
+
+def test_stream_skip_rows_no_double_skip():
+    source = 'data/special/skip-rows.csv'
+    with Stream(source, skip_rows=[3, -2]) as stream:
+        assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
+
+
 # Post parse
 
 def test_stream_post_parse_headers():