From 523c5e73f1bb24f68cd661029ba02b41ce671f57 Mon Sep 17 00:00:00 2001 From: Andrew Shulgin Date: Wed, 27 Mar 2019 20:30:07 +0200 Subject: [PATCH 1/3] Added CR to CRLF conversion support for ASCII downloads --- pyftpdlib/handlers.py | 31 ++++++++++++++------ pyftpdlib/test/test_functional.py | 47 ++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/pyftpdlib/handlers.py b/pyftpdlib/handlers.py index dcb101b7..41596e34 100644 --- a/pyftpdlib/handlers.py +++ b/pyftpdlib/handlers.py @@ -55,6 +55,7 @@ from .log import logger CR_BYTE = ord('\r') +LF_BYTE = ord('\n') def _import_sendfile(): @@ -1031,7 +1032,7 @@ def __init__(self, file, type): self.file = file self.type = type self._prev_chunk_endswith_cr = False - if type == 'a' and os.linesep != '\r\n': + if type == 'a': self._data_wrapper = self._posix_ascii_data_wrapper else: self._data_wrapper = None @@ -1043,17 +1044,29 @@ def _posix_ascii_data_wrapper(self, chunk): """ chunk = bytearray(chunk) pos = 0 - if self._prev_chunk_endswith_cr and chunk.startswith(b'\n'): + if self._prev_chunk_endswith_cr: + if len(chunk) == 0 or chunk[pos] != LF_BYTE: + chunk.insert(pos, LF_BYTE) pos += 1 + self._prev_chunk_endswith_cr = False while True: - pos = chunk.find(b'\n', pos) - if pos == -1: - break - if chunk[pos - 1] != CR_BYTE: - chunk.insert(pos, CR_BYTE) + cr_pos = chunk.find(b'\r', pos) + lf_pos = chunk.find(b'\n', pos) + if cr_pos != -1 and (lf_pos == -1 or cr_pos < lf_pos): + if cr_pos == len(chunk) - 1: + self._prev_chunk_endswith_cr = True + break + if chunk[cr_pos + 1] != LF_BYTE: + chunk.insert(cr_pos + 1, LF_BYTE) + pos = cr_pos + 1 pos += 1 - pos += 1 - self._prev_chunk_endswith_cr = chunk.endswith(b'\r') + elif lf_pos != -1 and (cr_pos == -1 or lf_pos < cr_pos): + if chunk[lf_pos - 1] != CR_BYTE: + chunk.insert(lf_pos, CR_BYTE) + pos = lf_pos + 1 + pos += 1 + else: + break return chunk def more(self): diff --git a/pyftpdlib/test/test_functional.py b/pyftpdlib/test/test_functional.py index d50bc3cf..6b0e9513 100644 --- a/pyftpdlib/test/test_functional.py +++ b/pyftpdlib/test/test_functional.py @@ -1018,19 +1018,44 @@ def test_retr(self): "retr " + bogus, lambda x: x) def test_retr_ascii(self): - """Test RETR in ASCII mode.""" + """Test ASCII mode RETR for data without line endings.""" - data = (b'abcde12345' + b(os.linesep)) * 100000 + data = b'abcde12345' * 100000 self.file.write(data) self.file.close() self.retrieve_ascii("retr " + TESTFN, self.dummyfile.write) - expected = data.replace(b(os.linesep), b'\r\n') + self.dummyfile.seek(0) + datafile = self.dummyfile.read() + self.assertEqual(len(data), len(datafile)) + self.assertEqual(hash(data), hash(datafile)) + + def test_retr_ascii_cr(self): + """Test ASCII mode RETR for data with CR line endings.""" + + data = b'abcde12345\r' * 100000 + self.file.write(data) + self.file.close() + self.retrieve_ascii("retr " + TESTFN, self.dummyfile.write) + expected = data.replace(b'\r', b'\r\n') self.dummyfile.seek(0) datafile = self.dummyfile.read() self.assertEqual(len(expected), len(datafile)) self.assertEqual(hash(expected), hash(datafile)) - def test_retr_ascii_already_crlf(self): + def test_retr_ascii_lf(self): + """Test ASCII mode RETR for data with LF line endings.""" + + data = b'abcde12345\n' * 100000 + self.file.write(data) + self.file.close() + self.retrieve_ascii("retr " + TESTFN, self.dummyfile.write) + expected = data.replace(b'\n', b'\r\n') + self.dummyfile.seek(0) + datafile = self.dummyfile.read() + self.assertEqual(len(expected), len(datafile)) + self.assertEqual(hash(expected), hash(datafile)) + + def test_retr_ascii_crlf(self): """Test ASCII mode RETR for data with CRLF line endings.""" data = b'abcde12345\r\n' * 100000 @@ -1039,9 +1064,23 @@ def test_retr_ascii_already_crlf(self): self.retrieve_ascii("retr " + TESTFN, self.dummyfile.write) self.dummyfile.seek(0) datafile = self.dummyfile.read() + self.assertEqual(len(data), len(datafile)) self.assertEqual(hash(data), hash(datafile)) + def test_retr_ascii_mixed_eol(self): + """Test ASCII mode RETR for data with mixed line endings.""" + + data = b'abcde12345\r\nabcde12345\nabcde12345\r' * 100000 + self.file.write(data) + self.file.close() + self.retrieve_ascii("retr " + TESTFN, self.dummyfile.write) + expected = b'abcde12345\r\nabcde12345\r\nabcde12345\r\n' * 100000 + self.dummyfile.seek(0) + datafile = self.dummyfile.read() + self.assertEqual(len(expected), len(datafile)) + self.assertEqual(hash(expected), hash(datafile)) + @retry_on_failure() def test_restore_on_retr(self): data = b'abcde12345' * 1000000 From 73756ad45db3ef4ac706a53c52c0a4abb4a2e8b5 Mon Sep 17 00:00:00 2001 From: Andrew Shulgin Date: Wed, 27 Mar 2019 20:50:18 +0200 Subject: [PATCH 2/3] Code style: removed extra empty line --- pyftpdlib/test/test_functional.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyftpdlib/test/test_functional.py b/pyftpdlib/test/test_functional.py index 6b0e9513..e16546c8 100644 --- a/pyftpdlib/test/test_functional.py +++ b/pyftpdlib/test/test_functional.py @@ -1064,7 +1064,6 @@ def test_retr_ascii_crlf(self): self.retrieve_ascii("retr " + TESTFN, self.dummyfile.write) self.dummyfile.seek(0) datafile = self.dummyfile.read() - self.assertEqual(len(data), len(datafile)) self.assertEqual(hash(data), hash(datafile)) From e164a2cf56455fff490c68d587cd316886440c19 Mon Sep 17 00:00:00 2001 From: Andrew Shulgin Date: Wed, 27 Mar 2019 22:31:30 +0200 Subject: [PATCH 3/3] Updated HISTORY.rst and improved FileProducer._posix_ascii_data_wrapper docstring --- HISTORY.rst | 4 ++++ pyftpdlib/handlers.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index f983874c..eef2747f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,10 @@ Bug tracker at https://github.com/giampaolo/pyftpdlib/issues Version: 1.5.5 - 2019-03-27 =========================== +**Enhancements** + +- #494: Implemented CR to CRLF line ending conversion for ASCII downloads. + **Bug fixes** - #492: CRLF line endings are replaced with CRCRLF in ASCII mode downloads. diff --git a/pyftpdlib/handlers.py b/pyftpdlib/handlers.py index 41596e34..45da3861 100644 --- a/pyftpdlib/handlers.py +++ b/pyftpdlib/handlers.py @@ -1040,7 +1040,8 @@ def __init__(self, file, type): def _posix_ascii_data_wrapper(self, chunk): """The data wrapper used for sending data in ASCII mode on systems using a single line terminator, handling those cases - where CRLF ('\r\n') gets delivered in two chunks. + where CRLF ('\r\n') gets delivered in two chunks as well as + cased where a file has mixed line endings. """ chunk = bytearray(chunk) pos = 0