forked from ytdl-org/youtube-dl
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GlobalPlayer] Add site extractors back-ported from yt-dlp
* from yt-dlp/yt-dlp#6903, thanks garret1317
- Loading branch information
Showing
2 changed files
with
296 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
# coding: utf-8 | ||
from __future__ import unicode_literals | ||
|
||
from .common import InfoExtractor | ||
from ..utils import ( | ||
clean_html, | ||
join_nonempty, | ||
merge_dicts, | ||
parse_duration, | ||
str_or_none, | ||
T, | ||
traverse_obj, | ||
unified_strdate, | ||
unified_timestamp, | ||
urlhandle_detect_ext, | ||
) | ||
|
||
|
||
class GlobalPlayerBaseIE(InfoExtractor): | ||
|
||
import re | ||
|
||
@classmethod | ||
def _match_valid_url(cls, url): | ||
return cls.re.match(cls._VALID_URL, url) | ||
|
||
def _search_nextjs_data(self, webpage, video_id, **kw): | ||
return self._parse_json( | ||
self._search_regex( | ||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', | ||
webpage, 'next.js data', **kw), | ||
video_id, **kw) | ||
|
||
def _get_page_props(self, url, video_id): | ||
webpage = self._download_webpage(url, video_id) | ||
return self._search_nextjs_data(webpage, video_id)['props']['pageProps'] | ||
|
||
def _request_ext(self, url, video_id): | ||
return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests | ||
url, video_id, note='Determining source extension')) | ||
|
||
def _extract_audio(self, episode, series): | ||
|
||
def clean_desc(x): | ||
x = clean_html(x) | ||
if x: | ||
x = x.replace('\xa0', ' ') | ||
return x | ||
|
||
return merge_dicts({ | ||
'vcodec': 'none', | ||
}, traverse_obj(series, { | ||
'series': 'title', | ||
'series_id': 'id', | ||
'thumbnail': 'imageUrl', | ||
'uploader': 'itunesAuthor', # podcasts only | ||
}), traverse_obj(episode, { | ||
'id': 'id', | ||
'description': ('description', T(clean_desc)), | ||
'duration': ('duration', T(parse_duration)), | ||
'thumbnail': 'imageUrl', | ||
'url': 'streamUrl', | ||
'timestamp': (('pubDate', 'startDate'), T(unified_timestamp)), | ||
'title': 'title', | ||
}, get_all=False), rev=True) | ||
|
||
|
||
class GlobalPlayerLiveIE(GlobalPlayerBaseIE): | ||
_VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+' | ||
_TESTS = [{ | ||
'url': 'https://www.globalplayer.com/live/smoothchill/uk/', | ||
'info_dict': { | ||
'id': '2mx1E', | ||
'ext': 'aac', | ||
'display_id': 'smoothchill-uk', | ||
'title': 're:^Smooth Chill.+$', | ||
'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png', | ||
'description': 'Music To Chill To', | ||
# 'live_status': 'is_live', | ||
'is_live': True, | ||
}, | ||
}, { | ||
# national station | ||
'url': 'https://www.globalplayer.com/live/heart/uk/', | ||
'info_dict': { | ||
'id': '2mwx4', | ||
'ext': 'aac', | ||
'description': 'turn up the feel good!', | ||
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png', | ||
# 'live_status': 'is_live', | ||
'is_live': True, | ||
'title': 're:^Heart UK.+$', | ||
'display_id': 'heart-uk', | ||
}, | ||
}, { | ||
# regional variation | ||
'url': 'https://www.globalplayer.com/live/heart/london/', | ||
'info_dict': { | ||
'id': 'AMqg', | ||
'ext': 'aac', | ||
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png', | ||
'title': 're:^Heart London.+$', | ||
# 'live_status': 'is_live', | ||
'is_live': True, | ||
'display_id': 'heart-london', | ||
'description': 'turn up the feel good!', | ||
}, | ||
}] | ||
|
||
def _real_extract(self, url): | ||
video_id = self._match_id(url) | ||
station = self._get_page_props(url, video_id)['station'] | ||
stream_url = station['streamUrl'] | ||
|
||
return merge_dicts({ | ||
'id': station['id'], | ||
'display_id': ( | ||
join_nonempty('brandSlug', 'slug', from_dict=station) | ||
or station.get('legacyStationPrefix')), | ||
'url': stream_url, | ||
'ext': self._request_ext(stream_url, video_id), | ||
'vcodec': 'none', | ||
'is_live': True, | ||
}, { | ||
'title': self._live_title(traverse_obj( | ||
station, (('name', 'brandName'), T(str_or_none)), | ||
get_all=False)), | ||
}, traverse_obj(station, { | ||
'description': 'tagline', | ||
'thumbnail': 'brandLogo', | ||
}), rev=True) | ||
|
||
|
||
class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): | ||
_VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)' | ||
_TESTS = [{ | ||
# "live playlist" | ||
'url': 'https://www.globalplayer.com/playlists/8bLk/', | ||
'info_dict': { | ||
'id': '8bLk', | ||
'ext': 'aac', | ||
# 'live_status': 'is_live', | ||
'is_live': True, | ||
'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d', | ||
'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=', | ||
'title': 're:^Classic FM Hall of Fame.+$' | ||
}, | ||
}] | ||
|
||
def _real_extract(self, url): | ||
video_id = self._match_id(url) | ||
station = self._get_page_props(url, video_id)['playlistData'] | ||
stream_url = station['streamUrl'] | ||
|
||
return merge_dicts({ | ||
'id': video_id, | ||
'url': stream_url, | ||
'ext': self._request_ext(stream_url, video_id), | ||
'vcodec': 'none', | ||
'is_live': True, | ||
}, traverse_obj(station, { | ||
'title': 'title', | ||
'description': 'description', | ||
'thumbnail': 'image', | ||
}), rev=True) | ||
|
||
|
||
class GlobalPlayerAudioIE(GlobalPlayerBaseIE): | ||
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])' | ||
_TESTS = [{ | ||
# podcast | ||
'url': 'https://www.globalplayer.com/podcasts/42KuaM/', | ||
'playlist_mincount': 5, | ||
'info_dict': { | ||
'id': '42KuaM', | ||
'title': 'Filthy Ritual', | ||
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', | ||
'categories': ['Society & Culture', 'True Crime'], | ||
'uploader': 'Global', | ||
'description': 'md5:da5b918eac9ae319454a10a563afacf9', | ||
}, | ||
}, { | ||
# radio catchup | ||
'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/', | ||
'playlist_mincount': 3, | ||
'info_dict': { | ||
'id': '46vyD7z', | ||
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.', | ||
'title': 'Nick Ferrari', | ||
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', | ||
}, | ||
}] | ||
|
||
def _real_extract(self, url): | ||
video_id, podcast = self._match_valid_url(url).group('id', 'podcast') | ||
props = self._get_page_props(url, video_id) | ||
series = props['podcastInfo'] if podcast else props['catchupInfo'] | ||
|
||
return merge_dicts({ | ||
'_type': 'playlist', | ||
'id': video_id, | ||
'entries': [self._extract_audio(ep, series) for ep in traverse_obj( | ||
series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))], | ||
'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None, | ||
}, traverse_obj(series, { | ||
'description': 'description', | ||
'thumbnail': 'imageUrl', | ||
'title': 'title', | ||
'uploader': 'itunesAuthor', # podcasts only | ||
}), rev=True) | ||
|
||
|
||
class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE): | ||
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])' | ||
_TESTS = [{ | ||
# podcast | ||
'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/', | ||
'info_dict': { | ||
'id': '7DrfNnE', | ||
'ext': 'mp3', | ||
'title': 'Filthy Ritual - Trailer', | ||
'description': 'md5:1f1562fd0f01b4773b590984f94223e0', | ||
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', | ||
'duration': 225.0, | ||
'timestamp': 1681254900, | ||
'series': 'Filthy Ritual', | ||
'series_id': '42KuaM', | ||
'upload_date': '20230411', | ||
'uploader': 'Global', | ||
}, | ||
}, { | ||
# radio catchup | ||
'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/', | ||
'only_matching': True, | ||
# expired: refresh the details with a current show for a full test | ||
'info_dict': { | ||
'id': '2zGq26Vcv1fCWhddC4JAwETXWe', | ||
'ext': 'm4a', | ||
'timestamp': 1682056800, | ||
'series': 'Nick Ferrari', | ||
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', | ||
'upload_date': '20230421', | ||
'series_id': '46vyD7z', | ||
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.', | ||
'title': 'Nick Ferrari', | ||
'duration': 10800.0, | ||
}, | ||
}] | ||
|
||
def _real_extract(self, url): | ||
video_id, podcast = self._match_valid_url(url).group('id', 'podcast') | ||
props = self._get_page_props(url, video_id) | ||
episode = props['podcastEpisode'] if podcast else props['catchupEpisode'] | ||
|
||
return self._extract_audio( | ||
episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {}) | ||
|
||
|
||
class GlobalPlayerVideoIE(GlobalPlayerBaseIE): | ||
_VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)' | ||
_TESTS = [{ | ||
'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/', | ||
'info_dict': { | ||
'id': '2JsSZ7Gm2uP', | ||
'ext': 'mp4', | ||
'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd', | ||
'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550', | ||
'upload_date': '20230420', | ||
'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live', | ||
}, | ||
}] | ||
|
||
def _real_extract(self, url): | ||
video_id = self._match_id(url) | ||
meta = self._get_page_props(url, video_id)['videoData'] | ||
|
||
return merge_dicts({ | ||
'id': video_id, | ||
}, traverse_obj(meta, { | ||
'url': 'url', | ||
'thumbnail': ('image', 'url'), | ||
'title': 'title', | ||
'upload_date': ('publish_date', T(unified_strdate)), | ||
'description': 'description', | ||
}), rev=True) |