Skip to content

Commit

Permalink
Support SRT data with leading whitespace
Browse files Browse the repository at this point in the history
Closes #50.
  • Loading branch information
cdown committed Jul 25, 2019
1 parent 92b2565 commit 0d7e5fd
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
8 changes: 7 additions & 1 deletion srt.py
Expand Up @@ -24,7 +24,7 @@
RGX_POSSIBLE_CRLF = r"\r?\n"

SRT_REGEX = re.compile(
r"({idx})\s*{eof}({ts}) +-[ -]> +({ts}) ?({proprietary}){eof}({content})"
r"\s*({idx})\s*{eof}({ts}) +-[ -]> +({ts}) ?({proprietary}){eof}({content})"
# Many sub editors don't add a blank line to the end, and many editors and
# players accept that. We allow it to be missing in input.
#
Expand Down Expand Up @@ -374,6 +374,12 @@ def _raise_if_not_contiguous(srt, expected_start, actual_start):
"""
if expected_start != actual_start:
unmatched_content = srt[expected_start:actual_start]

if expected_start == 0 and unmatched_content.isspace():
# #50: Leading whitespace has nowhere to be captured like in an
# intermediate subtitle
return

raise SRTParseError(expected_start, actual_start, unmatched_content)


Expand Down
26 changes: 25 additions & 1 deletion tests/test_srt.py
Expand Up @@ -7,7 +7,7 @@
import string
from io import StringIO

from hypothesis import given, settings, HealthCheck
from hypothesis import given, settings, HealthCheck, assume
import hypothesis.strategies as st
from nose.tools import (
eq_ as eq,
Expand Down Expand Up @@ -215,6 +215,13 @@ def test_parsing_spaced_arrow(subs):
subs_eq(reparsed_subtitles, subs)


@given(st.text(string.whitespace), st.lists(subtitles()))
def test_parsing_leading_whitespace(ws, subs):
prews_block = ws + srt.compose(subs, reindex=False, strict=False)
reparsed_subtitles = srt.parse(prews_block)
subs_eq(reparsed_subtitles, subs)


@given(st.lists(subtitles()))
def test_parsing_content_with_blank_lines(subs):
for subtitle in subs:
Expand Down Expand Up @@ -372,6 +379,23 @@ def test_parser_noncontiguous(subs, fake_idx, garbage, fake_timedelta):
list(srt.parse(composed))


@given(st.lists(subtitles()), st.text(min_size=1))
def test_parser_noncontiguous_leading(subs, garbage):
# Issue #50 permits leading whitespace, see test_parsing_leading_whitespace
assume(not garbage.isspace())

# It also shouldn't just be a number, because then we'd confuse it with our
# index...
assume(not garbage[-1].isdigit())

# Put some garbage at the beginning that should trigger our noncontiguity
# checks
composed = garbage + srt.compose(subs)

with assert_raises(srt.SRTParseError):
list(srt.parse(composed))


@given(
st.lists(subtitles(), min_size=1),
st.integers(min_value=0),
Expand Down

0 comments on commit 0d7e5fd

Please sign in to comment.