From 4e3e6726ea74ba7ad44e4d552c9c853ea31da97a Mon Sep 17 00:00:00 2001 From: Jake Holland Date: Fri, 22 May 2015 17:34:01 -0700 Subject: [PATCH 1/3] Fix for https://github.com/globocom/m3u8/issues/49: Added optional scrict=True parameter to avoid silently accepting things like html by raising ParseError. (Defaults to false for backward compatibility.) --- m3u8/__init__.py | 4 ++-- m3u8/model.py | 4 ++-- m3u8/parser.py | 20 +++++++++++++++++++- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/m3u8/__init__.py b/m3u8/__init__.py index 68328327..80a5da00 100644 --- a/m3u8/__init__.py +++ b/m3u8/__init__.py @@ -20,10 +20,10 @@ from m3u8.model import M3U8, Playlist, IFramePlaylist, Media, Segment -from m3u8.parser import parse, is_url +from m3u8.parser import parse, is_url, ParseError __all__ = ('M3U8', 'Playlist', 'IFramePlaylist', 'Media', - 'Segment', 'loads', 'load', 'parse') + 'Segment', 'loads', 'load', 'parse', 'ParseError') def loads(content): ''' diff --git a/m3u8/model.py b/m3u8/model.py index b820fad5..3a677caf 100644 --- a/m3u8/model.py +++ b/m3u8/model.py @@ -120,9 +120,9 @@ class M3U8(object): ('playlist_type', 'playlist_type') ) - def __init__(self, content=None, base_path=None, base_uri=None): + def __init__(self, content=None, base_path=None, base_uri=None, strict=False): if content is not None: - self.data = parser.parse(content) + self.data = parser.parse(content, strict) else: self.data = {} self._base_uri = base_uri diff --git a/m3u8/parser.py b/m3u8/parser.py index c16844e5..0a04d962 100644 --- a/m3u8/parser.py +++ b/m3u8/parser.py @@ -8,6 +8,7 @@ import itertools import re from m3u8 import protocol +import exceptions ''' http://tools.ietf.org/html/draft-pantos-http-live-streaming-08#section-3.2 @@ -21,7 +22,15 @@ def cast_date_time(value): def format_date_time(value): return value.isoformat() -def parse(content): +class ParseError(exceptions.Exception): + def __init__(self, lineno, line): + self.lineno = lineno + self.line = line + + def __str__(self): + return 'Syntax error in manifest on line %d: %s' % (self.lineno, self.line) + +def parse(content, strict=False): ''' Given a M3U8 playlist content returns a dictionary with all data found ''' @@ -42,7 +51,9 @@ def parse(content): 'expect_playlist': False, } + lineno = 0 for line in string_to_lines(content): + lineno += 1 line = line.strip() if line.startswith(protocol.ext_x_byterange): @@ -100,6 +111,13 @@ def parse(content): elif line.startswith(protocol.ext_x_endlist): data['is_endlist'] = True + elif line.startswith('#'): + # comment + pass + + elif strict: + raise ParseError(lineno, line) + return data def _parse_key(line): From 56a029bbabd644d856e484c24f75cd3c9cd41176 Mon Sep 17 00:00:00 2001 From: Jake Holland Date: Thu, 16 Jul 2015 13:30:53 -0700 Subject: [PATCH 2/3] fixed parsing with EXT-X-BYTERANGE before EXTINF --- m3u8/parser.py | 28 +++++++++++++++++++--------- tests/playlists.py | 19 +++++++++++++++++++ tests/test_model.py | 8 ++++++++ 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/m3u8/parser.py b/m3u8/parser.py index 0a04d962..9b54602d 100644 --- a/m3u8/parser.py +++ b/m3u8/parser.py @@ -60,27 +60,24 @@ def parse(content, strict=False): _parse_byterange(line, state) state['expect_segment'] = True - elif state['expect_segment']: - _parse_ts_chunk(line, data, state) - state['expect_segment'] = False - - elif state['expect_playlist']: - _parse_variant_playlist(line, data, state) - state['expect_playlist'] = False - elif line.startswith(protocol.ext_x_targetduration): _parse_simple_parameter(line, data, float) + elif line.startswith(protocol.ext_x_media_sequence): _parse_simple_parameter(line, data, int) + elif line.startswith(protocol.ext_x_program_date_time): _, program_date_time = _parse_simple_parameter_raw_value(line, cast_date_time) if not data.get('program_date_time'): data['program_date_time'] = program_date_time state['current_program_date_time'] = program_date_time + elif line.startswith(protocol.ext_x_discontinuity): state['discontinuity'] = True + elif line.startswith(protocol.ext_x_version): _parse_simple_parameter(line, data) + elif line.startswith(protocol.ext_x_allow_cache): _parse_simple_parameter(line, data) @@ -115,6 +112,14 @@ def parse(content, strict=False): # comment pass + elif state['expect_segment']: + _parse_ts_chunk(line, data, state) + state['expect_segment'] = False + + elif state['expect_playlist']: + _parse_variant_playlist(line, data, state) + state['expect_playlist'] = False + elif strict: raise ParseError(lineno, line) @@ -130,7 +135,10 @@ def _parse_key(line): def _parse_extinf(line, data, state): duration, title = line.replace(protocol.extinf + ':', '').split(',') - state['segment'] = {'duration': float(duration), 'title': remove_quotes(title)} + if 'segment' not in state: + state['segment'] = {} + state['segment']['duration'] = float(duration) + state['segment']['title'] = remove_quotes(title) def _parse_ts_chunk(line, data, state): segment = state.pop('segment') @@ -188,6 +196,8 @@ def _parse_variant_playlist(line, data, state): data['playlists'].append(playlist) def _parse_byterange(line, state): + if 'segment' not in state: + state['segment'] = {} state['segment']['byterange'] = line.replace(protocol.ext_x_byterange + ':', '') def _parse_simple_parameter_raw_value(line, cast_to=str, normalize=False): diff --git a/tests/playlists.py b/tests/playlists.py index c0966861..a5b88567 100644 --- a/tests/playlists.py +++ b/tests/playlists.py @@ -150,6 +150,25 @@ #EXT-X-ENDLIST ''' +# reversing byterange and extinf from IFRAME. +IFRAME_PLAYLIST2 = ''' +#EXTM3U +#EXT-X-VERSION:4 +#EXT-X-TARGETDURATION:10 +#EXT-X-PLAYLIST-TYPE:VOD +#EXT-X-I-FRAMES-ONLY +#EXT-X-BYTERANGE:9400@376 +#EXTINF:4.12, +segment1.ts +#EXT-X-BYTERANGE:7144@47000 +#EXTINF:3.56, +segment1.ts +#EXT-X-BYTERANGE:10340@1880 +#EXTINF:3.82, +segment2.ts +#EXT-X-ENDLIST +''' + PLAYLIST_USING_BYTERANGES = ''' #EXTM3U #EXT-X-VERSION:4 diff --git a/tests/test_model.py b/tests/test_model.py index 99344019..b841d7a2 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -383,6 +383,14 @@ def test_dump_should_work_for_iframe_playlists(): assert expected == obj.dumps().strip() + obj = m3u8.M3U8(playlists.IFRAME_PLAYLIST2) + + expected = playlists.IFRAME_PLAYLIST.strip() + + # expected that dump will reverse EXTINF and EXT-X-BYTERANGE, + # hence IFRAME_PLAYLIST dump from IFRAME_PLAYLIST2 parse. + assert expected == obj.dumps().strip() + def test_dump_should_include_program_date_time(): obj = m3u8.M3U8(playlists.SIMPLE_PLAYLIST_WITH_PROGRAM_DATE_TIME) From 0408f48701b459208e8c99e998daf984e7bf82a1 Mon Sep 17 00:00:00 2001 From: Jake Holland Date: Thu, 16 Jul 2015 15:27:07 -0700 Subject: [PATCH 3/3] fix to accept blank lines in strict mode --- m3u8/parser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m3u8/parser.py b/m3u8/parser.py index 9b54602d..80a99fe2 100644 --- a/m3u8/parser.py +++ b/m3u8/parser.py @@ -112,6 +112,10 @@ def parse(content, strict=False): # comment pass + elif line.strip() == '': + # blank lines are legal + pass + elif state['expect_segment']: _parse_ts_chunk(line, data, state) state['expect_segment'] = False