Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 273 lines (242 sloc) 8.866 kB
7aaf9f6 @yanncoupin First release !
yanncoupin authored
1 #!/usr/bin/env python
2 # -*- encoding=utf8 -*-
3 #
4 # stl2srt A program to convert EBU STL subtitle files in the more common SRT format
5 #
6 # Copyright (C) 2011 Yann Coupin
7 #
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License
10 # as published by the Free Software Foundation; either version 2
11 # of the License, or (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 #
22
23 import struct
24 import codecs
25 import logging
26 import unicodedata
27
28 class SRT:
29 '''A class that behaves like a file object and writes an SRT file'''
30 def __init__(self, pathOrFile):
31 if isinstance(pathOrFile, file):
32 self.file = pathOrFile
33 else:
34 self.file = open(pathOrFile, 'wb')
35 self.counter = 1
36 self.file.write(codecs.BOM_UTF8)
37
38 def _formatTime(self, timestamp):
39 return "%02u:%02u:%02u,%03u" % (
40 timestamp / 3600,
41 (timestamp / 60) % 60,
42 timestamp % 60,
43 (timestamp * 1000) % 1000
44 )
45
46 def write(self, start, end, text):
47 text = "\n".join(filter(lambda (x): bool(x), text.split("\n")))
48 self.file.write("%0u\n%s --> %s\n%s\n\n" % (self.counter, self._formatTime(start), self._formatTime(end), text.encode('utf8')))
49 self.counter += 1
50
51 class iso6937(codecs.Codec):
52 '''A class to implement the somewhat exotic iso-6937 encoding which STL files often use'''
53
54 identical = set(range(0x20, 0x7e))
55 identical |= set((0xa0, 0xa1, 0xa2, 0xa3, 0xa5, 0xa7, 0xab, 0xb0, 0xb1, 0xb2, 0xb3, 0xb5, 0xb6, 0xb7, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf))
56 direct_mapping = {
57 0x8a: 0x000a, # line break
58
59 0xa8: 0x00a4, # ¤
60 0xa9: 0x2018, # ‘
61 0xaa: 0x201C, # “
62 0xab: 0x00AB, # «
63 0xac: 0x2190, # ←
64 0xad: 0x2191, # ↑
65 0xae: 0x2192, # →
66 0xaf: 0x2193, # ↓
67
68 0xb4: 0x00D7, # ×
69 0xb8: 0x00F7, # ÷
70 0xb9: 0x2019, # ’
71 0xba: 0x201D, # ”
72 0xbc: 0x00BC, # ¼
73 0xbd: 0x00BD, # ½
74 0xbe: 0x00BE, # ¾
75 0xbf: 0x00BF, # ¿
76
77 0xd0: 0x2015, # ―
78 0xd1: 0x00B9, # ¹
79 0xd2: 0x00AE, # ®
80 0xd3: 0x00A9, # ©
81 0xd4: 0x2122, # ™
82 0xd5: 0x266A, # ♪
83 0xd6: 0x00AC, # ¬
84 0xd7: 0x00A6, # ¦
85 0xdc: 0x215B, # ⅛
86 0xdd: 0x215C, # ⅜
87 0xde: 0x215D, # ⅝
88 0xdf: 0x215E, # ⅞
89
90 0xe0: 0x2126, # Ohm Ω
91 0xe0: 0x00C6, # Æ
92 0xe0: 0x0110, # Đ
93 0xe0: 0x00AA, # ª
94 0xe0: 0x0126, # Ħ
95 0xe0: 0x0132, # IJ
96 0xe0: 0x013F, # Ŀ
97 0xe0: 0x0141, # Ł
98 0xe0: 0x00D8, # Ø
99 0xe0: 0x0152, # Œ
100 0xe0: 0x00BA, # º
101 0xe0: 0x00DE, # Þ
102 0xe0: 0x0166, # Ŧ
103 0xe0: 0x014A, # Ŋ
104 0xe0: 0x0149, # ʼn
105
106 0xf0: 0x0138, # ĸ
107 0xf0: 0x00E6, # æ
108 0xf0: 0x0111, # đ
109 0xf0: 0x00F0, # ð
110 0xf0: 0x0127, # ħ
111 0xf0: 0x0131, # ı
112 0xf0: 0x0133, # ij
113 0xf0: 0x0140, # ŀ
114 0xf0: 0x0142, # ł
115 0xf0: 0x00F8, # ø
116 0xf0: 0x0153, # œ
117 0xf0: 0x00DF, # ß
118 0xf0: 0x00FE, # þ
119 0xf0: 0x0167, # ŧ
120 0xf0: 0x014B, # ŋ
121 0xf0: 0x00AD, # Soft hyphen
122 }
123 diacritic = {
124 0xc1: 0x0300, # grave accent
125 0xc2: 0x0301, # acute accent
126 0xc3: 0x0302, # circumflex
127 0xc4: 0x0303, # tilde
128 0xc5: 0x0304, # macron
129 0xc6: 0x0306, # breve
130 0xc7: 0x0307, # dot
131 0xc8: 0x0308, # umlaut
132 0xca: 0x030A, # ring
133 0xcb: 0x0327, # cedilla
134 0xcd: 0x030B, # double acute accent
135 0xce: 0x0328, # ogonek
136 0xcf: 0x030C, # caron
137 }
138
139
140 def decode(self, input):
141 output = []
142 state = None
143 count = 0
144 for char in input:
145 char = ord(char)
146 # End of a subtitle text
147 if char == 0x8f:
148 break
149 count += 1
150 if not state and char in range(0x20, 0x7e):
151 output.append(char)
152 elif not state and char in self.direct_mapping:
153 output.append(self.direct_mapping[char])
154 elif not state and char in self.diacritic:
155 state = self.diacritic[char]
156 elif state:
157 combined = unicodedata.normalize('NFC', unichr(char) + unichr(state))
158 if combined and len(combined) == 1:
159 output.append(ord(combined))
160 state = None
161 return (''.join(map(unichr, output)), len(input))
162
163 def search(self, name):
164 if name in ('iso6937', 'iso_6937-2'):
165 return codecs.CodecInfo(self.encode, self.decode, name='iso_6937-2')
166
167 def encode(self, input):
168 pass
169
170 codecs.register(iso6937().search)
171
172 class STL:
173 '''A class that behaves like a file object and reads an STL file'''
174
3b7fe82 @yanncoupin Cache the exploded field list
yanncoupin authored
175 GSIfields = 'CPN DFC DSC CCT LC OPT OET TPT TET TN TCD SLR CD RD RN TNB TNS TNG MNC MNR TCS TCP TCF TND DSN CO PUB EN ECD UDA'.split(' ')
176 TTIfields = 'SGN SN EBN CS TCIh TCIm TCIs TCIf TCOh TCOm TCOs TCOf VP JC CF TF'.split(' ')
177
178
89d63cf @digi604 added start offset option
authored
179 def __init__(self, pathOrFile, offset):
180 self.offset = float(offset)/1000
7aaf9f6 @yanncoupin First release !
yanncoupin authored
181 if isinstance(pathOrFile, file):
182 self.file = pathOrFile
183 else:
184 self.file = open(pathOrFile, 'rb')
185 self._readGSI()
186
187 def _readGSI(self):
188 self.GSI = dict(zip(
3b7fe82 @yanncoupin Cache the exploded field list
yanncoupin authored
189 self.GSIfields,
7aaf9f6 @yanncoupin First release !
yanncoupin authored
190 struct.unpack('3s8sc2s2s32s32s32s32s32s32s16s6s6s2s5s5s3s2s2s1s8s8s1s1s3s32s32s32s75x576s', self.file.read(1024))
191 ))
192 GSI = self.GSI
193 logging.debug(GSI)
194 #self.gsiCodePage = 'cp%s' % GSI['CPN']
195 if GSI['DFC'] == 'STL25.01':
196 self.fps = 25
197 elif GSI['DFC'] == 'STL30.01':
198 self.fps = 30
199 else:
200 raise Exception('Invalid CPN')
201 self.codePage = {
202 '00': 'iso_6937-2',
203 '01': 'iso-8859-5',
204 '02': 'iso-8859-6',
205 '03': 'iso-8859-7',
206 '04': 'iso-8859-8',
207 }[GSI['CCT']]
208 self.numberOfTTI = int(GSI['TNB'])
209 self.startTime = self.__timecodeDecode(
210 int(GSI['TCF'][0:2]),
211 int(GSI['TCF'][2:4]),
212 int(GSI['TCF'][4:6]),
213 int(GSI['TCF'][6:8])
89d63cf @digi604 added start offset option
authored
214 ) - self.offset
7aaf9f6 @yanncoupin First release !
yanncoupin authored
215 logging.debug(self.__dict__)
216
217 def __timecodeDecode(self, h, m, s, f):
218 return 3600 * h + 60 * m + s + float(f) / self.fps
219
220 def _readTTI(self):
221 while (True):
222 tci = None
223 tco = None
224 txt = []
225
226 while (True):
227 data = self.file.read(128)
228 if not data:
229 raise StopIteration()
230 TTI = dict(zip(
3b7fe82 @yanncoupin Cache the exploded field list
yanncoupin authored
231 self.TTIfields,
7aaf9f6 @yanncoupin First release !
yanncoupin authored
232 struct.unpack('<BHBBBBBBBBBBBBB112s', data)
233 ))
234 logging.debug(TTI)
235 # if comment skip
236 if TTI['CF']:
237 continue
238 if not tci:
239 tci = self.__timecodeDecode(TTI['TCIh'], TTI['TCIm'], TTI['TCIs'], TTI['TCIf']) - self.startTime
240 tco = self.__timecodeDecode(TTI['TCOh'], TTI['TCOm'], TTI['TCOs'], TTI['TCOf']) - self.startTime
241 txt += TTI['TF'].decode(self.codePage).strip()
242 if TTI['EBN'] == 255:
243 # skip empty subtitles and those before the start of the show
244 if txt and tci >= 0:
245 return (tci, tco, ''.join(txt))
246 break
247
248 def __iter__(self):
249 return self
250
251 def next(self):
252 return self._readTTI()
253
254 if __name__ == '__main__':
255 from optparse import OptionParser
256 import sys
257
258 parser = OptionParser(usage = 'usage: %prog [options] input output')
259 parser.add_option('-d', '--debug', dest='debug_level', action='store_const', const=logging.DEBUG, default=logging.ERROR)
89d63cf @digi604 added start offset option
authored
260 parser.add_option('-o', '--offset', dest='offset', help='start offset in milliseconds', default=0)
7aaf9f6 @yanncoupin First release !
yanncoupin authored
261 (options, args) = parser.parse_args()
262 if len(args) != 2:
263 parser.print_help()
264 sys.exit(1)
265
266 logging.basicConfig(level=options.debug_level)
267
89d63cf @digi604 added start offset option
authored
268 stl = STL(args[0], int(options.offset))
7aaf9f6 @yanncoupin First release !
yanncoupin authored
269 c = SRT(args[1])
270 for sub in stl:
271 (tci, tco, txt) = sub
272 c.write(tci, tco, txt)
Something went wrong with that request. Please try again.