[bot] AutoMerging: merge all upstream's changes:

* https://github.com/ytdl-org/youtube-dl: [DashSegmentsFD] Correctly detect errors when `fragment_retries` == 0 [YouTube] Simplify signature patterns [core] Sanitize info dict before dumping JSON (fixes fe7e130) (ytdl-org#32032)
hellopony · Apr 13, 2023 · b352b75 · b352b75
2 parents 6cff09a + 26035bd
commit b352b75
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 14 deletions.
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
@@ -1777,7 +1777,7 @@ def print_optional(field):
             self.to_stdout(formatSeconds(info_dict['duration']))
         print_mandatory('format')
         if self.params.get('forcejson', False):
-            self.to_stdout(json.dumps(info_dict))
+            self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
 
     def process_info(self, info_dict):
         """Process a single resolved IE result."""
@@ -2091,7 +2091,7 @@ def download(self, url_list):
                 raise
             else:
                 if self.params.get('dump_single_json', False):
-                    self.to_stdout(json.dumps(res))
+                    self.to_stdout(json.dumps(self.sanitize_info(res)))
 
         return self._download_retcode
 
@@ -2100,6 +2100,7 @@ def download_with_info_file(self, info_filename):
                 [info_filename], mode='r',
                 openhook=fileinput.hook_encoded('utf-8'))) as f:
             # FileInput doesn't have a read method, we can't call json.load
+            # TODO: let's use io.open(), then
             info = self.filter_requested_info(json.loads('\n'.join(f)))
         try:
             self.process_ie_result(info, download=True)

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
@@ -38,12 +38,13 @@ def real_download(self, filename, info_dict):
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             fatal = frag_index == 1 or not skip_unavailable_fragments
+            fragment_url = fragment.get('url')
+            if not fragment_url:
+                assert fragment_base_url
+                fragment_url = urljoin(fragment_base_url, fragment['path'])
+            success = False
             for count in itertools.count():
                 try:
-                    fragment_url = fragment.get('url')
-                    if not fragment_url:
-                        assert fragment_base_url
-                        fragment_url = urljoin(fragment_base_url, fragment['path'])
                     success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                     if not success:
                         return False
@@ -63,14 +64,13 @@ def real_download(self, filename, info_dict):
                     # itself since it has its own retry settings
                     if fatal:
                         raise
-                    self.report_skip_fragment(frag_index)
                 break
 
-            if count >= fragment_retries:
+            if not success:
                 if not fatal:
                     self.report_skip_fragment(frag_index)
                     continue
-                self.report_error('giving up after %s fragment retries' % fragment_retries)
+                self.report_error('giving up after %s fragment retries' % count)
                 return False
 
         self._finish_frag_download(ctx)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
@@ -19,6 +19,7 @@
     compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_urlparse,
+    compat_zip as zip,
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
@@ -1555,17 +1556,14 @@ def _parse_sig_js(self, jscode):
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
              # Obsolete patterns
-             r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')