Skip to content

Commit

Permalink
parsing: Support dots in index
Browse files Browse the repository at this point in the history
  • Loading branch information
cdown committed Jul 25, 2020
1 parent 97a4ee5 commit 554f76e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
9 changes: 8 additions & 1 deletion srt.py
Expand Up @@ -22,7 +22,7 @@
RGX_TIMESTAMP_PARSEABLE = r"^{}$".format(
RGX_TIMESTAMP_MAGNITUDE_DELIM.join(["(" + RGX_TIMESTAMP_FIELD + ")"] * 4)
)
RGX_INDEX = r"-?[0-9]+"
RGX_INDEX = r"-?[0-9.]+"
RGX_PROPRIETARY = r"[^\r\n]*"
RGX_CONTENT = r".*?"
RGX_POSSIBLE_CRLF = r"\r?\n"
Expand Down Expand Up @@ -349,6 +349,13 @@ def parse(srt):
# finditer and all match groups are mandatory in the regex.
content = content.replace("\r\n", "\n") # pytype: disable=attribute-error

try:
raw_index = int(raw_index)
except ValueError:
# Index 123.4. Handled separately, since it's a rare case and we
# don't want to affect general performance.
raw_index = int(raw_index.split(".")[0])

yield Subtitle(
index=int(raw_index),
start=srt_timestamp_to_timedelta(raw_start),
Expand Down
14 changes: 14 additions & 0 deletions tests/test_srt.py
Expand Up @@ -588,6 +588,20 @@ def test_can_parse_index_trailing_ws(input_subs, whitespace):
subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()))
def test_can_parse_index_with_dot(input_subs):
# Seen in Battlestar Galactica subs
out = ""

for sub in input_subs:
lines = sub.to_srt().split("\n")
lines[0] = lines[0] + "." + lines[0]
out += "\n".join(lines)

reparsed_subs = srt.parse(out)
subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()), st.lists(st.just("0")))
def test_can_parse_index_leading_zeroes(input_subs, zeroes):
out = ""
Expand Down

0 comments on commit 554f76e

Please sign in to comment.