Skip to content

Commit

Permalink
[CI] auto update yt_dlp to upstream commit 12d8ea8246fa901de302ff5cc7…
Browse files Browse the repository at this point in the history
…48caddadc82f41
  • Loading branch information
github-actions[bot] committed May 17, 2024
1 parent e48cb8e commit 5439057
Show file tree
Hide file tree
Showing 8 changed files with 481 additions and 197 deletions.
6 changes: 5 additions & 1 deletion lib/yt_dlp/cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from .utils._utils import _YDLLogger
from .utils.networking import normalize_url

CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}


Expand Down Expand Up @@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
}[browser_name]

elif sys.platform == 'darwin':
Expand All @@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata, 'Microsoft Edge'),
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
'vivaldi': os.path.join(appdata, 'Vivaldi'),
'whale': os.path.join(appdata, 'Naver/Whale'),
}[browser_name]

else:
Expand All @@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(config, 'microsoft-edge'),
'opera': os.path.join(config, 'opera'),
'vivaldi': os.path.join(config, 'vivaldi'),
'whale': os.path.join(config, 'naver-whale'),
}[browser_name]

# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
Expand All @@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
'whale': 'Whale',
}[browser_name]

browsers_without_profiles = {'opera'}
Expand Down
438 changes: 308 additions & 130 deletions lib/yt_dlp/extractor/bbc.py

Large diffs are not rendered by default.

62 changes: 38 additions & 24 deletions lib/yt_dlp/extractor/cda.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
merge_dicts,
multipart_encode,
parse_duration,
random_birthday,
traverse_obj,
try_call,
try_get,
Expand Down Expand Up @@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'crash404',
'view_count': int,
'average_rating': float,
'duration': 137,
'age_limit': 0,
'upload_date': '20160220',
'timestamp': 1455968218,
}
}, {
# Age-restricted
'url': 'http://www.cda.pl/video/1273454c4',
# Age-restricted with vfilm redirection
'url': 'https://www.cda.pl/video/8753244c4',
'md5': 'd8eeb83d63611289507010d3df3bb8b3',
'info_dict': {
'id': '1273454c4',
'id': '8753244c4',
'ext': 'mp4',
'title': 'Bronson (2008) napisy HD 1080p',
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
'height': 1080,
'uploader': 'boniek61',
'uploader': 'arhn eu',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 5554,
'duration': 991,
'age_limit': 18,
'view_count': int,
'average_rating': float,
},
'timestamp': 1633888264,
'upload_date': '20211010',
}
}, {
# Age-restricted without vfilm redirection
'url': 'https://www.cda.pl/video/17028157b8',
'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
'info_dict': {
'id': '17028157b8',
'ext': 'mp4',
'title': 'STENDUPY MICHAŁ OGIŃSKI',
'description': 'md5:5851f3272bfc31f762d616040a1d609a',
'height': 480,
'uploader': 'oginski',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 18855,
'age_limit': 18,
'average_rating': float,
'timestamp': 1699705901,
'upload_date': '20231111',
}
}, {
'url': 'http://ebd.cda.pl/0x0/5749950c',
'only_matching': True,
}]

def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
form_data = random_birthday('rok', 'miesiac', 'dzien')
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
data, content_type = multipart_encode(form_data)
data, content_type = multipart_encode({'age_confirm': ''})
return self._download_webpage(
urljoin(url, '/a/validatebirth'), video_id, *args,
url, video_id, *args,
data=data, headers={
'Referer': url,
'Content-Type': content_type,
Expand Down Expand Up @@ -164,7 +182,7 @@ def _real_extract(self, url):
if 'Authorization' in self._API_HEADERS:
return self._api_extract(video_id)
else:
return self._web_extract(video_id, url)
return self._web_extract(video_id)

def _api_extract(self, video_id):
meta = self._download_json(
Expand Down Expand Up @@ -197,9 +215,9 @@ def _api_extract(self, video_id):
'view_count': meta.get('views'),
}

def _web_extract(self, video_id, url):
def _web_extract(self, video_id):
self._set_cookie('cda.pl', 'cda.player', 'html5')
webpage = self._download_webpage(
webpage, urlh = self._download_webpage_handle(
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)

if 'Ten film jest dostępny dla użytkowników premium' in webpage:
Expand All @@ -209,10 +227,10 @@ def _web_extract(self, video_id, url):
self.raise_geo_restricted()

need_confirm_age = False
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
webpage, 'birthday validate form', default=None):
webpage = self._download_age_confirm_page(
url, video_id, note='Confirming age')
urlh.url, video_id, note='Confirming age')
need_confirm_age = True

formats = []
Expand All @@ -222,9 +240,6 @@ def _web_extract(self, video_id, url):
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
''', webpage, 'uploader', default=None, group='uploader')
view_count = self._search_regex(
r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
'view_count', default=None)
average_rating = self._search_regex(
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
Expand All @@ -235,7 +250,6 @@ def _web_extract(self, video_id, url):
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'uploader': uploader,
'view_count': int_or_none(view_count),
'average_rating': float_or_none(average_rating),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
Expand Down
17 changes: 11 additions & 6 deletions lib/yt_dlp/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,7 +957,8 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
if urlh is False:
assert not fatal
return False
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
encoding=encoding, data=data)
return (content, urlh)

@staticmethod
Expand Down Expand Up @@ -1005,8 +1006,10 @@ def __check_blocked(self, content):
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)

def _request_dump_filename(self, url, video_id):
basen = f'{video_id}_{url}'
def _request_dump_filename(self, url, video_id, data=None):
if data is not None:
data = hashlib.md5(data).hexdigest()
basen = join_nonempty(video_id, data, url, delim='_')
trim_length = self.get_param('trim_file_name') or 240
if len(basen) > trim_length:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
Expand All @@ -1028,16 +1031,18 @@ def __decode_webpage(self, webpage_bytes, encoding, headers):
except LookupError:
return webpage_bytes.decode('utf-8', 'replace')

def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
prefix=None, encoding=None, data=None):
webpage_bytes = urlh.read()
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
url_or_request = self._create_request(url_or_request, data)
if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.url)
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
if self.get_param('write_pages'):
filename = self._request_dump_filename(urlh.url, video_id)
filename = self._request_dump_filename(urlh.url, video_id, url_or_request.data)
self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
Expand Down Expand Up @@ -1098,7 +1103,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
impersonate=None, require_impersonation=False):
if self.get_param('load_pages'):
url_or_request = self._create_request(url_or_request, data, headers, query)
filename = self._request_dump_filename(url_or_request.url, video_id)
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
self.to_screen(f'Loading request from {filename}')
try:
with open(filename, 'rb') as dumpf:
Expand Down
28 changes: 13 additions & 15 deletions lib/yt_dlp/extractor/tiktok.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,18 @@ class TikTokBaseIE(InfoExtractor):
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
'aid': '0',
}
_KNOWN_APP_INFO = [
'7351144126450059040',
'7351149742343391009',
'7351153174894626592',
]
_APP_INFO_POOL = None
_APP_INFO = None
_APP_USER_AGENT = None

@property
def _KNOWN_APP_INFO(self):
return self._configuration_arg('app_info', ie_key=TikTokIE)

@property
def _API_HOSTNAME(self):
return self._configuration_arg(
'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0]

def _get_next_app_info(self):
if self._APP_INFO_POOL is None:
Expand All @@ -66,13 +65,10 @@ def _get_next_app_info(self):
for key, default in self._APP_INFO_DEFAULTS.items()
if key != 'iid'
}
app_info_list = (
self._configuration_arg('app_info', ie_key=TikTokIE)
or random.sample(self._KNOWN_APP_INFO, len(self._KNOWN_APP_INFO)))
self._APP_INFO_POOL = [
{**defaults, **dict(
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
)} for app_info in app_info_list
)} for app_info in self._KNOWN_APP_INFO
]

if not self._APP_INFO_POOL:
Expand Down Expand Up @@ -757,11 +753,13 @@ class TikTokIE(TikTokBaseIE):

def _real_extract(self, url):
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
e.expected = True
self.report_warning(f'{e}; trying with webpage')

if self._KNOWN_APP_INFO:
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
e.expected = True
self.report_warning(f'{e}; trying with webpage')

url = self._create_url(user_id, video_id)
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
Expand Down
27 changes: 26 additions & 1 deletion lib/yt_dlp/extractor/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class TwitterBaseIE(InfoExtractor):
_NETRC_MACHINE = 'twitter'
_API_BASE = 'https://api.twitter.com/1.1/'
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
_flow_token = None
Expand Down Expand Up @@ -1191,6 +1191,31 @@ class TwitterIE(TwitterBaseIE):
'age_limit': 0,
'_old_archive_ids': ['twitter 1724884212803834154'],
},
}, {
# x.com
'url': 'https://x.com/historyinmemes/status/1790637656616943991',
'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
'info_dict': {
'id': '1790637589910654976',
'ext': 'mp4',
'title': 'Historic Vids - One of the most intense moments in history',
'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
'display_id': '1790637656616943991',
'uploader': 'Historic Vids',
'uploader_id': 'historyinmemes',
'uploader_url': 'https://twitter.com/historyinmemes',
'channel_id': '855481986290524160',
'upload_date': '20240515',
'timestamp': 1715756260.0,
'duration': 15.488,
'tags': [],
'comment_count': int,
'repost_count': int,
'like_count': int,
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
'age_limit': 0,
'_old_archive_ids': ['twitter 1790637656616943991'],
}
}, {
# onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
Expand Down
Loading

0 comments on commit 5439057

Please sign in to comment.