diff --git a/srt.py b/srt.py index a1f7639..556e84e 100755 --- a/srt.py +++ b/srt.py @@ -22,7 +22,7 @@ RGX_TIMESTAMP_PARSEABLE = r"^{}$".format( RGX_TIMESTAMP_MAGNITUDE_DELIM.join(["(" + RGX_TIMESTAMP_FIELD + ")"] * 4) ) -RGX_INDEX = r"-?[0-9]+" +RGX_INDEX = r"-?[0-9.]+" RGX_PROPRIETARY = r"[^\r\n]*" RGX_CONTENT = r".*?" RGX_POSSIBLE_CRLF = r"\r?\n" @@ -349,6 +349,13 @@ def parse(srt): # finditer and all match groups are mandatory in the regex. content = content.replace("\r\n", "\n") # pytype: disable=attribute-error + try: + raw_index = int(raw_index) + except ValueError: + # Index 123.4. Handled separately, since it's a rare case and we + # don't want to affect general performance. + raw_index = int(raw_index.split(".")[0]) + yield Subtitle( index=int(raw_index), start=srt_timestamp_to_timedelta(raw_start), diff --git a/tests/test_srt.py b/tests/test_srt.py index e14703b..6a76a42 100644 --- a/tests/test_srt.py +++ b/tests/test_srt.py @@ -588,6 +588,20 @@ def test_can_parse_index_trailing_ws(input_subs, whitespace): subs_eq(reparsed_subs, input_subs) +@given(st.lists(subtitles())) +def test_can_parse_index_with_dot(input_subs): + # Seen in Battlestar Galactica subs + out = "" + + for sub in input_subs: + lines = sub.to_srt().split("\n") + lines[0] = lines[0] + "." + lines[0] + out += "\n".join(lines) + + reparsed_subs = srt.parse(out) + subs_eq(reparsed_subs, input_subs) + + @given(st.lists(subtitles()), st.lists(st.just("0"))) def test_can_parse_index_leading_zeroes(input_subs, zeroes): out = ""