From 11089f1e021f2e074d04c33fc7ffc4b7b52e7045 Mon Sep 17 00:00:00 2001 From: Chris Down Date: Sat, 29 Aug 2020 12:39:57 +0100 Subject: [PATCH] parsing: Accept input with no newline after timestamps on final sub --- srt.py | 2 +- tests/test_srt.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/srt.py b/srt.py index 794ea9a..f781f7f 100755 --- a/srt.py +++ b/srt.py @@ -30,7 +30,7 @@ TS_REGEX = re.compile(RGX_TIMESTAMP_PARSEABLE) MULTI_WS_REGEX = re.compile(r"\n\n+") SRT_REGEX = re.compile( - r"\s*({idx})\s*{eof}({ts}) *-[ -] *> *({ts}) ?({proprietary}){eof}({content})" + r"\s*({idx})\s*{eof}({ts}) *-[ -] *> *({ts}) ?({proprietary})(?:{eof}|\Z)({content})" # Many sub editors don't add a blank line to the end, and many editors and # players accept that. We allow it to be missing in input. # diff --git a/tests/test_srt.py b/tests/test_srt.py index 4bba7b7..4b5a063 100644 --- a/tests/test_srt.py +++ b/tests/test_srt.py @@ -570,8 +570,22 @@ def test_repr_doesnt_crash(sub): assert str(sub.index) in repr(sub) +@given(subtitles(), subtitles()) +def test_parser_accepts_final_no_newline_no_content(sub1, sub2): + # Limit size so we know how much to remove + sub2.content = "" + subs = [sub1, sub2] + + # Remove the last newlines so that there are none. Cannot use rstrip since + # there might be other stuff that gets matched in proprietary + stripped_srt_blocks = srt.compose(subs, reindex=False)[:-2] + + reparsed_subs = srt.parse(stripped_srt_blocks) + subs_eq(reparsed_subs, subs) + + @given(st.lists(subtitles())) -def test_parser_accepts_no_newline_no_content(subs): +def test_parser_accepts_newline_no_content(subs): for sub in subs: # Limit size so we know how many lines to remove sub.content = ""