[FragmentFD] Respect --no-continue

* discard partial fragment on `--no-continue` * continue with correct progress display otherwise Resolves ytdl-org#21467
dirkf · Mar 13, 2023 · cc9ef44 · cc9ef44
1 parent e734fe4
commit cc9ef44
Show file tree

Hide file tree

Showing 4 changed files with 51 additions and 37 deletions.
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
@@ -88,17 +88,21 @@ def format_percent(percent):
             return '---.-%'
         return '%6s' % ('%3.1f%%' % percent)
 
-    @staticmethod
-    def calc_eta(start, now, total, current):
+    @classmethod
+    def calc_eta(cls, start_or_rate, now_or_remaining, *args):
+        if len(args) < 2:
+            rate, remaining = (start_or_rate, now_or_remaining)
+            if None in (rate, remaining):
+                return None
+            return int(float(remaining) / rate)
+        start, now = (start_or_rate, now_or_remaining)
+        total, current = args
         if total is None:
             return None
         if now is None:
             now = time.time()
-        dif = now - start
-        if current == 0 or dif < 0.001:  # One millisecond
-            return None
-        rate = float(current) / dif
-        return int((float(total) - float(current)) / rate)
+        rate = cls.calc_speed(start, now, current)
+        return rate and int((float(total) - float(current)) / rate)
 
     @staticmethod
     def format_eta(eta):
@@ -123,6 +127,12 @@ def format_speed(speed):
     def format_retries(retries):
         return 'inf' if retries == float('inf') else '%.0f' % retries
 
+    @staticmethod
+    def filesize_or_none(unencoded_filename):
+        fn = encodeFilename(unencoded_filename)
+        if os.path.isfile(fn):
+            return os.path.getsize(fn)
+
     @staticmethod
     def best_block_size(elapsed_time, bytes):
         new_min = max(bytes / 2.0, 1.0)

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
@@ -38,8 +38,7 @@ def real_download(self, filename, info_dict):
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             fatal = i == 0 or not skip_unavailable_fragments
-            count = 0
-            while count <= fragment_retries:
+            for count in range(fragment_retries + 1):
                 try:
                     fragment_url = fragment.get('url')
                     if not fragment_url:
@@ -57,9 +56,8 @@ def real_download(self, filename, info_dict):
                     # is usually enough) thus allowing to download the whole file successfully.
                     # To be future-proof we will retry all fragments that fail with any
                     # HTTP error.
-                    count += 1
-                    if count <= fragment_retries:
-                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
+                    if count < fragment_retries:
+                        self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
                 except DownloadError:
                     # Don't retry fragment if error occurred during HTTP downloading
                     # itself since it has own retry settings
@@ -68,7 +66,7 @@ def real_download(self, filename, info_dict):
                         break
                     raise
 
-            if count > fragment_retries:
+            if count >= fragment_retries:
                 if not fatal:
                     self.report_skip_fragment(frag_index)
                     continue

diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
@@ -71,7 +71,7 @@ def _prepare_and_start_frag_download(self, ctx):
 
     @staticmethod
     def __do_ytdl_file(ctx):
-        return not ctx['live'] and not ctx['tmpfilename'] == '-'
+        return ctx['live'] is not True and ctx['tmpfilename'] != '-'
 
     def _read_ytdl_file(self, ctx):
         assert 'ytdl_corrupt' not in ctx
@@ -101,8 +101,13 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
             'url': frag_url,
             'http_headers': headers or info_dict.get('http_headers'),
         }
-        if ctx['fragment_index'] > 0:
-            ctx['dl'].params['continuedl'] = True
+        frag_resume_len = 0
+        if ctx['dl'].params.get('continuedl', True):
+            frag_resume_len = self.filesize_or_none(
+                self.temp_name(fragment_filename))
+        fragment_info_dict['frag_resume_len'] = frag_resume_len
+        ctx['frag_resume_len'] = frag_resume_len or 0
+
         success = ctx['dl'].download(fragment_filename, fragment_info_dict)
         if not success:
             return False, None
@@ -126,9 +131,7 @@ def _append_fragment(self, ctx, frag_content):
             del ctx['fragment_filename_sanitized']
 
     def _prepare_frag_download(self, ctx):
-        if 'live' not in ctx:
-            ctx['live'] = False
-        if not ctx['live']:
+        if not ctx.setdefault('live', False):
             total_frags_str = '%d' % ctx['total_frags']
             ad_frags = ctx.get('ad_frags', 0)
             if ad_frags:
@@ -153,18 +156,18 @@ def _prepare_frag_download(self, ctx):
         )
         tmpfilename = self.temp_name(ctx['filename'])
         open_mode = 'wb'
-        resume_len = 0
 
         # Establish possible resume length
-        if os.path.isfile(encodeFilename(tmpfilename)):
+        resume_len = self.filesize_or_none(tmpfilename) or 0
+        if resume_len > 0:
             open_mode = 'ab'
-            resume_len = os.path.getsize(encodeFilename(tmpfilename))
 
         # Should be initialized before ytdl file check
         ctx.update({
             'tmpfilename': tmpfilename,
             'fragment_index': 0,
         })
+
         if self.__do_ytdl_file(ctx):
             ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
             if continuedl and ytdl_file_exists:
@@ -181,6 +184,7 @@ def _prepare_frag_download(self, ctx):
                     if 'ytdl_corrupt' in ctx:
                         del ctx['ytdl_corrupt']
                     self._write_ytdl_file(ctx)
+
             else:
                 if not continuedl:
                     if ytdl_file_exists:
@@ -216,6 +220,7 @@ def _start_frag_download(self, ctx):
         start = time.time()
         ctx.update({
             'started': start,
+            'fragment_started': start,
             # Amount of fragment's bytes downloaded by the time of the previous
             # frag progress hook invocation
             'prev_frag_downloaded_bytes': 0,
@@ -225,6 +230,9 @@ def frag_progress_hook(s):
             if s['status'] not in ('downloading', 'finished'):
                 return
 
+            if not total_frags and ctx.get('fragment_count'):
+                state['fragment_count'] = ctx['fragment_count']
+
             time_now = time.time()
             state['elapsed'] = time_now - start
             frag_total_bytes = s.get('total_bytes') or 0
@@ -239,16 +247,17 @@ def frag_progress_hook(s):
                 ctx['fragment_index'] = state['fragment_index']
                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+                ctx['speed'] = state['speed'] = self.calc_speed(
+                    ctx['fragment_started'], time_now, frag_total_bytes)
+                ctx['fragment_started'] = time.time()
                 ctx['prev_frag_downloaded_bytes'] = 0
             else:
                 frag_downloaded_bytes = s['downloaded_bytes']
                 state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+                ctx['speed'] = state['speed'] = self.calc_speed(
+                    ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
                 if not ctx['live']:
-                    state['eta'] = self.calc_eta(
-                        start, time_now, estimated_size - resume_len,
-                        state['downloaded_bytes'] - resume_len)
-                state['speed'] = s.get('speed') or ctx.get('speed')
-                ctx['speed'] = state['speed']
+                    state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
                 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
             self._hook_progress(state)
 
@@ -275,7 +284,7 @@ def _finish_frag_download(self, ctx):
                         os.utime(ctx['filename'], (time.time(), filetime))
                     except Exception:
                         pass
-            downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+            downloaded_bytes = self.filesize_or_none(ctx['filename']) or 0
 
         self._hook_progress({
             'downloaded_bytes': downloaded_bytes,

diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
@@ -58,9 +58,9 @@ class DownloadContext(dict):
 
         if self.params.get('continuedl', True):
             # Establish possible resume length
-            if os.path.isfile(encodeFilename(ctx.tmpfilename)):
-                ctx.resume_len = os.path.getsize(
-                    encodeFilename(ctx.tmpfilename))
+            ctx.resume_len = info_dict.get('frag_resume_len')
+            if ctx.resume_len is None:
+                ctx.resume_len = self.filesize_or_none(ctx.tmpfilename) or 0
 
         ctx.is_resume = ctx.resume_len > 0
 
@@ -115,9 +115,9 @@ def establish_connection():
                         raise RetryDownload(err)
                     raise err
                 # When trying to resume, Content-Range HTTP header of response has to be checked
-                # to match the value of requested Range HTTP header. This is due to a webservers
+                # to match the value of requested Range HTTP header. This is due to webservers
                 # that don't support resuming and serve a whole file with no Content-Range
-                # set in response despite of requested Range (see
+                # set in response despite requested Range (see
                 # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
                 if has_range:
                     content_range = ctx.data.headers.get('Content-Range')
@@ -293,10 +293,7 @@ def retry(e):
 
                 # Progress message
                 speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
-                if ctx.data_len is None:
-                    eta = None
-                else:
-                    eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))
 
                 self._hook_progress({
                     'status': 'downloading',