Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'srt-tools/master' into develop
- Loading branch information
Showing
10 changed files
with
413 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
========= | ||
srt-tools | ||
========= | ||
|
||
srt-tools is a repo containing utilities written to process SRT files. All | ||
utilities use the Python srt_ library internally. | ||
|
||
.. _srt: https://github.com/cdown/srt | ||
|
||
Utilities | ||
--------- | ||
|
||
- *chinese-lines-only* removes subtitle lines that don't appear to be | ||
Chinese. Useful for turning joing English/Chinese subtitles into Chinese | ||
subtitles only. | ||
- *fix-subtitle-indexing* fixes subtitle indexing. Some badly formed SRT files | ||
will have indexes that occur in a different order than the starting | ||
timestamps for the subtitles they are associated with. This makes some media | ||
players unable to display those subtitles, and they are subsequently lost | ||
into the ether. | ||
- *linear-timeshift* does linear time correction. If you have a movie that | ||
runs slower or faster than the subtitle that you have, it will repeatedly | ||
lose sync. This tool can apply linear time corrections to all subtitles in | ||
the SRT, resyncing it with the video. | ||
- *mux-subs* can mux_ multiple subtitles together into one. For example, if you | ||
have a Chinese subtitle and an English subtitle, and you want to have one | ||
subtitle file that contains both, this tool can do that for you. It also | ||
supports clamping subtitles starting or ending at similar times to the same | ||
time to avoid subtitles jumping around the screen. | ||
- *strip-html* strips HTML formatting from subtitle content. This is especially | ||
prevalant in `SSA/ASS`_ subtitles that have been directly converted to SRT. | ||
|
||
.. _mux: https://en.wikipedia.org/wiki/Multiplexing | ||
.. _`SSA/ASS`: https://en.wikipedia.org/wiki/SubStation_Alpha |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/usr/bin/env python | ||
|
||
from hanzidentifier import has_chinese | ||
import srt | ||
import utils | ||
|
||
|
||
def strip_to_chinese_lines_only(subtitles): | ||
for subtitle in subtitles: | ||
subtitle_lines = subtitle.content.splitlines() | ||
chinese_subtitle_lines = ( | ||
line for line in subtitle_lines | ||
if has_chinese(line) | ||
) | ||
subtitle.content = '\n'.join(chinese_subtitle_lines) | ||
yield subtitle | ||
|
||
|
||
def main(): | ||
args = utils.basic_parser().parse_args() | ||
subtitles_in = srt.parse(args.input.read()) | ||
chinese_subtitles_only = strip_to_chinese_lines_only(subtitles_in) | ||
output = srt.compose(chinese_subtitles_only, strict=args.strict) | ||
args.output.write(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/usr/bin/env python | ||
|
||
import utils | ||
import srt | ||
|
||
|
||
def main(): | ||
args = utils.basic_parser().parse_args() | ||
subtitles_in = srt.parse(args.input.read()) | ||
output = srt.compose(subtitles_in, strict=args.strict) | ||
args.output.write(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env python | ||
|
||
import srt | ||
import datetime | ||
import utils | ||
|
||
|
||
def parse_args(): | ||
parser = utils.basic_parser() | ||
parser.add_argument( | ||
'--seconds', | ||
type=float, | ||
required=True, | ||
help='how many seconds to shift', | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
def scalar_correct_subs(subtitles, seconds_to_shift): | ||
td_to_shift = datetime.timedelta(seconds=seconds_to_shift) | ||
for subtitle in subtitles: | ||
subtitle.start += td_to_shift | ||
subtitle.end += td_to_shift | ||
yield subtitle | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
subtitles_in = srt.parse(args.input.read()) | ||
corrected_subs = scalar_correct_subs(subtitles_in, args.seconds) | ||
output = srt.compose(corrected_subs, strict=args.strict) | ||
args.output.write(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#!/usr/bin/env python | ||
|
||
import srt | ||
import datetime | ||
import utils | ||
|
||
|
||
def timedelta_to_milliseconds(delta): | ||
return delta.days * 86400000 + \ | ||
delta.seconds * 1000 + \ | ||
delta.microseconds / 1000 | ||
|
||
def parse_args(): | ||
def srt_timestamp_to_milliseconds(parser, arg): | ||
try: | ||
delta = srt.srt_timestamp_to_timedelta(arg) | ||
except ValueError: | ||
parser.error('not a valid SRT timestamp: %s' % arg) | ||
else: | ||
return timedelta_to_milliseconds(delta) | ||
|
||
parser = utils.basic_parser() | ||
parser.add_argument( | ||
'--from-start', | ||
'--f1', | ||
type=lambda arg: srt_timestamp_to_milliseconds(parser, arg), | ||
required=True, | ||
help='the first desynchronised timestamp', | ||
) | ||
parser.add_argument( | ||
'--to-start', | ||
'--t1', | ||
type=lambda arg: srt_timestamp_to_milliseconds(parser, arg), | ||
required=True, | ||
help='the first synchronised timestamp', | ||
) | ||
parser.add_argument( | ||
'--from-end', | ||
'--f2', | ||
type=lambda arg: srt_timestamp_to_milliseconds(parser, arg), | ||
required=True, | ||
help='the second desynchronised timestamp', | ||
) | ||
parser.add_argument( | ||
'--to-end', | ||
'--t2', | ||
type=lambda arg: srt_timestamp_to_milliseconds(parser, arg), | ||
required=True, | ||
help='the second synchronised timestamp', | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
def calc_correction(to_start, to_end, from_start, from_end): | ||
angular = (to_end - to_start) / (from_end - from_start) | ||
linear = to_end - angular * from_end | ||
return angular, linear | ||
|
||
|
||
def correct_time(current_msecs, angular, linear): | ||
return round(current_msecs * angular + linear) | ||
|
||
|
||
def correct_timedelta(bad_delta, angular, linear): | ||
bad_msecs = timedelta_to_milliseconds(bad_delta) | ||
good_msecs = correct_time(bad_msecs, angular, linear) | ||
good_delta = datetime.timedelta(milliseconds=good_msecs) | ||
return good_delta | ||
|
||
|
||
def linear_correct_subs(subtitles, angular, linear): | ||
for subtitle in subtitles: | ||
subtitle.start = correct_timedelta(subtitle.start, angular, linear) | ||
subtitle.end = correct_timedelta(subtitle.end, angular, linear) | ||
yield subtitle | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
angular, linear = calc_correction( | ||
args.to_start, args.to_end, | ||
args.from_start, args.from_end, | ||
) | ||
subtitles_in = srt.parse(args.input.read()) | ||
corrected_subs = linear_correct_subs(subtitles_in, angular, linear) | ||
output = srt.compose(corrected_subs, strict=args.strict) | ||
args.output.write(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/usr/bin/env python | ||
|
||
import importlib | ||
import srt | ||
import utils | ||
|
||
|
||
def strip_to_matching_lines_only(subtitles, imports, func_str): | ||
for import_name in imports: | ||
real_import = importlib.import_module(import_name) | ||
globals()[import_name] = real_import | ||
|
||
func = eval(func_str) # pylint: disable-msg=eval-used | ||
|
||
for subtitle in subtitles: | ||
subtitle_lines = subtitle.content.splitlines() | ||
matching_subtitle_lines = ( | ||
line for line in subtitle_lines | ||
if func(line) | ||
) | ||
subtitle.content = '\n'.join(matching_subtitle_lines) | ||
yield subtitle | ||
|
||
|
||
def parse_args(): | ||
parser = utils.basic_parser() | ||
parser.add_argument( | ||
'-f', '--func', | ||
help='a function to use to match lines', | ||
required=True, | ||
) | ||
parser.add_argument( | ||
'-m', '--module', | ||
help='modules to import in the function context', | ||
action='append', default=[], | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
subtitles_in = srt.parse(args.input.read()) | ||
matching_subtitles_only = strip_to_matching_lines_only( | ||
subtitles_in, args.module, args.func, | ||
) | ||
output = srt.compose(matching_subtitles_only, strict=args.strict) | ||
args.output.write(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/usr/bin/env python | ||
|
||
import datetime | ||
import srt | ||
import utils | ||
import logging | ||
import operator | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
def parse_args(): | ||
parser = utils.basic_parser(multi_input=True) | ||
parser.add_argument( | ||
'--ms', metavar='MILLISECONDS', | ||
default=datetime.timedelta(milliseconds=600), | ||
type=lambda ms: datetime.timedelta(milliseconds=int(ms)), | ||
help='if subs being muxed are within this number of milliseconds ' | ||
'of each other, they will get merged (default: 600)', | ||
) | ||
parser.add_argument( | ||
'--width', | ||
default=5, type=int, | ||
help='the number of subs to consider merging (default: %(default)s)', | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
def merge_subs(subs, acceptable_diff, attr, width): | ||
''' | ||
Merge subs with similar start/end times together. This prevents the | ||
subtitles jumping around the screen. | ||
The merge is done in-place. | ||
''' | ||
sorted_subs = sorted(subs, key=operator.attrgetter(attr)) | ||
|
||
for subs in utils.sliding_window(sorted_subs, width=width): | ||
current_sub = subs[0] | ||
future_subs = subs[1:] | ||
current_comp = getattr(current_sub, attr) | ||
|
||
for future_sub in future_subs: | ||
future_comp = getattr(future_sub, attr) | ||
if current_comp + acceptable_diff > future_comp: | ||
log.debug( | ||
"Merging %d's %s time into %d", | ||
future_sub.index, attr, current_sub.index, | ||
) | ||
setattr(future_sub, attr, current_comp) | ||
else: | ||
# Since these are sorted, and this one didn't match, we can be | ||
# sure future ones won't match either. | ||
break | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
logging.basicConfig(level=args.log_level) | ||
|
||
muxed_subs = [] | ||
for file_input in args.input: | ||
muxed_subs.extend(srt.parse(file_input.read())) | ||
|
||
merge_subs(muxed_subs, args.ms, 'start', args.width) | ||
merge_subs(muxed_subs, args.ms, 'end', args.width) | ||
|
||
output = srt.compose(muxed_subs, strict=args.strict) | ||
args.output.write(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
srt | ||
hanzidentifier |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/env python | ||
|
||
import re | ||
import srt | ||
import utils | ||
import logging | ||
|
||
|
||
def strip_html_from_subs(subtitles): | ||
for subtitle in subtitles: | ||
subtitle_lines = subtitle.content.splitlines() | ||
stripped_subtitle_lines = ( | ||
re.sub('<[^<]+?>', '', line) for line in subtitle_lines | ||
) | ||
subtitle.content = '\n'.join(stripped_subtitle_lines) | ||
yield subtitle | ||
|
||
|
||
def main(): | ||
args = utils.basic_parser().parse_args() | ||
logging.basicConfig(level=args.log_level) | ||
subtitles_in = srt.parse(args.input.read()) | ||
stripped_subs = strip_html_from_subs(subtitles_in) | ||
output = srt.compose(stripped_subs, args.strict) | ||
args.output.write(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.