import webvtt
# we can iterate over the captions
for caption in webvtt.read('captions.vtt'):
print(caption.start) # start timestamp in text format
print(caption.end) # end timestamp in text format
print(caption.text) # caption text
print(caption.voice) # voice span if present
# you can also iterate over the lines of a particular caption
for line in vtt[0].lines:
print(line)
# caption text is returned clean without class tags
# we can access the raw text of a caption with raw_text
>>> vtt[0].text
'This is a caption text'
>>> vtt[0].raw_text
'This is a <c.colorE5E5E5>caption</c> text'
# caption identifiers
>>> vtt[0].identifier
'chapter 1'
import webvtt
import requests
from io import StringIO
payload = requests.get('http://subtitles.com/1234.vtt').text()
buffer = StringIO(payload)
for caption in webvtt.from_buffer(buffer):
print(caption.start)
print(caption.end)
print(caption.text)
import webvtt
from io import BytesIO
with open('captions.vtt', 'rb') as f:
buffer = BytesIO(f.read())
for caption in webvtt.from_buffer(buffer):
print(caption.start)
print(caption.end)
print(caption.text)
import webvtt
from io import BytesIO
with open('captions.srt', 'rb') as f:
buffer = BytesIO(f.read())
# formats supported: vtt, srt, sbv
for caption in webvtt.from_buffer(buffer, format='srt'):
print(caption.start)
print(caption.end)
print(caption.text)
import webvtt
import textwrap
vtt = webvtt.from_string(textwrap.dedent("""
WEBVTT
00:00:00.500 --> 00:00:07.000
Caption #1
00:00:07.000 --> 00:00:11.890
Caption #2 line 1
Caption #2 line 2
00:00:11.890 --> 00:00:16.320
Caption #3
""").strip()
)
for caption in vtt:
print(caption.start)
print(caption.end)
print(caption.text)
import webvtt
vtt = webvtt.read('captions.vtt')
for caption in vtt.iter_slice(start='00:00:11.000',
end='00:00:27.000'
)
print(caption.start)
print(caption.end)
print(caption.text)
from webvtt import WebVTT, Caption
vtt = WebVTT()
# creating a caption with a list of lines
caption = Caption(
'00:00:00.500',
'00:00:07.000',
['Caption line 1', 'Caption line 2']
)
# an identifier can be assigned
caption.identifier = 'chapter 1'
# adding a caption
vtt.captions.append(caption)
# creating another caption with a text
caption = Caption(
'00:00:07.000',
'00:00:11.890',
'Caption line 1\nCaption line 2'
)
vtt.captions.append(caption)
import webvtt
vtt = webvtt.read('captions.vtt')
# update start timestamp
vtt[0].start = '00:00:01.250'
# update end timestamp
vtt[0].end = '00:00:03.890'
# update caption text
vtt[0].text = 'New caption text'
# delete a caption
del vtt.captions[2]
import webvtt
vtt = webvtt.read('captions.vtt')
# save to the same file
vtt.save()
# save to a different file
vtt.save('new_captions.vtt')
# you can save to a file path
vtt.save('other/folder/new_captions')
# if there is a filename present in the object we can target a folder
vtt.save('other/folder)
# write to an opened file
with open('other_captions.vtt', 'w') as f:
vtt.write(f)
WebVTT content can be retrieved without an output file:
import webvtt
vtt = webvtt.read('captions.vtt')
# print the content in WebVTT format
print(vtt.content)
You can read captions from the following formats:
- SubRip (.srt)
- YouTube SBV (.sbv)
import webvtt
# read captions from SRT format
vtt = webvtt.from_srt('captions.srt')
# save the captions in WebVTT format
vtt.save()
# the conversion can be done chaining the method calls
webvtt.from_srt('captions.srt').save()
# the same for SBV format
vtt = webvtt.from_sbv('captions.sbv')
Convert WebVTT captions to other formats:
- SubRip (.srt)
import webvtt
# save in SRT format
vtt = webvtt.read('captions.vtt')
vtt.save_as_srt()
# write to an opened file in SRT format
with open('captions.srt', 'w') as f:
vtt.write(f, format='srt')
When the WebVTT file has BOM, saving it will keep the BOM:
import webvtt
vtt = webvtt.read('captions_with_bom.vtt')
# saved file keeps the BOM
vtt.save()
Add a BOM to a file without it:
import webvtt
vtt = webvtt.read('captions_without_bom.vtt',
add_bom=True
)
# saved file has BOM
vtt.save()
Remove the BOM from a file:
import webvtt
vtt = webvtt.read('captions_with_bom.vtt')
# saved file does not have BOM
vtt.save(add_bom=False)
Save file with a different encoding:
import webvtt
vtt = webvtt.read('captions.vtt')
vtt.save(encoding='utf-32-le')
# save in different encoding with BOM
vtt.save(encoding='utf-32-le',
add_bom=True
)
import webvtt
vtt = webvtt.read('captions.vtt')
for style in vtt.styles:
print(style.text)
# retrieve list of lines
print(style.lines)
Adding styles:
import webvtt
vtt = webvtt.read('captions.vtt')
vtt.styles.append(
webvtt.Style('::cue(b) {\n color: peachpuff;\n}')
)
# list of lines is supported
vtt.styles.append(
webvtt.Style(['::cue(b) {',
' color: peachpuff;',
'}'
])
)
Updating styles:
import webvtt
vtt = webvtt.read('captions.vtt')
vtt.styles[0].lines[1] = ' color: papayawhip;'
Comments can be added or retrieved from different items:
import webvtt
vtt = webvtt.read('captions.vtt')
# comments from the top of the file
print(vtt.header_comments)
# comments from the bottom of the file
print(vtt.footer_comments)
# comments in a style
print(vtt.styles[0].comments)
# comments in a caption
print(vtt.captions[0].comments)
# comments are just a list of strings
vtt.captions[5].comments.append('caption for review')