# Douyin Live Recorder

Notebook นี้ใช้สำหรับรัน DouyinLiveRecorder เพื่อบันทึกวิดีโอไลฟ์จาก Douyin (TikTok จีน)

In [None]:
# Clone repositories
!git clone https://github.com/ihmily/DouyinLiveRecorder.git
!git clone https://github.com/chaiz64/l777k.git

In [None]:
# Copy configuration files
!cp l777k/config.ini DouyinLiveRecorder/config/
!cp l777k/URL_config.ini DouyinLiveRecorder/config/

In [None]:
# เข้าไปยังโฟลเดอร์โปรเจค
%cd DouyinLiveRecorder

# ติดตั้ง dependencies
!pip3 install -r requirements.txt

In [None]:
# ติดตั้ง ffmpeg
!apt update
!apt install ffmpeg -y
!pip install --upgrade --no-cache-dir pandas pyarrow

In [None]:
# รันโปรแกรม
!python main.py

In [None]:
import time
import datetime

def keep_colab_alive(duration_hours=24):
    """
    Keeps the Google Colab runtime active by running a loop.

    Args:
        duration_hours (int): The duration in hours for which the script
                              should attempt to keep the runtime alive.
                              Default is 24 hours. Set to 0 for indefinite run
                              (requires manual stop).
    """
    print(f"Script will attempt to keep Colab alive for {duration_hours} hours. (Set to 0 for indefinite run)")
    start_time = datetime.datetime.now()
    end_time = start_time + datetime.timedelta(hours=duration_hours)

    counter = 0
    try:
        while True:
            current_time = datetime.datetime.now()
            if duration_hours > 0 and current_time >= end_time:
                print(f"Maximum duration of {duration_hours} hours reached. Stopping script.")
                break

            counter += 1
            print(f"[{current_time.strftime('%Y-%m-%d %H:%M:%S')}] Colab is alive! Iteration: {counter}")
            # Sleep for a short period to prevent excessive CPU usage
            time.sleep(60) # Sleep for 60 seconds (1 minute)

    except KeyboardInterrupt:
        print("\nScript stopped manually by user (Ctrl+C).")
    except Exception as e:
        print(f"\nAn error occurred: {e}")
    finally:
        print("Colab keep-alive script finished.")

# --- How to use ---
# Call the function to start keeping Colab alive.
# You can specify the duration in hours.
# For example, to keep it alive for 12 hours:
# keep_colab_alive(duration_hours=12)

# To run indefinitely until manually stopped (Ctrl+C):
keep_colab_alive(duration_hours=0) # Set to 0 for indefinite run


In [None]:
# @title
import os
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import google.colab.files
import time
import subprocess # For running ffprobe
import shutil # For checking if ffprobe is installed
import json # For ffprobe JSON output (if chosen)

# --- Setup and Initialization ---
# Path to your download folder
# Initial default path, user can change this via UI
default_download_dir = "/content/DouyinLiveRecorder/downloads/抖音直播/" # Example folder path
download_limit_per_batch = 1 # Limit downloads per batch (can be adjusted)
# List of common video file extensions to support
video_extensions = ('.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.webm', '.ogg', '.3gp', '.ts')

# File name for saving downloaded URLs
download_links_file = "downloaded_links.txt"

# Ensure the download links file exists
if not os.path.exists(download_links_file):
    with open(download_links_file, "w") as f:
        f.write("") # Create an empty file

# --- Helper Functions ---

def format_duration(seconds):
    """
    Converts duration from seconds into a human-readable format (e.g., 1h 2m 3s).
    """
    if seconds is None or seconds < 0:
        return "N/A"
    total_seconds = int(seconds)
    hours = total_seconds // 3600
    minutes = (total_seconds % 3600) // 60
    secs = total_seconds % 60

    parts = []
    if hours > 0:
        parts.append(f"{hours}h")
    if minutes > 0 or (hours > 0 and secs > 0): # Show minutes if hours exist or if seconds exist without hours
        parts.append(f"{minutes}m")
    if secs > 0 or (hours == 0 and minutes == 0): # Always show seconds if duration is very short, or as last part
        parts.append(f"{secs}s")

    return " ".join(parts) if parts else "0s"

def format_bytes(bytes_value):
    """
    Converts file size from bytes into a human-readable format (KB, MB, GB, TB, PB).
    """
    if bytes_value is None or bytes_value < 0:
        return "0 B"

    units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
    size = float(bytes_value)

    for i in range(len(units)):
        if size < 1024:
            return f"{size:.2f} {units[i]}"
        size /= 1024
    return f"{size:.2f} {units[-1]}" # If it's larger than PB


def _simulate_duration_fallback(file_name, file_path_for_size):
    """
    Helper function to simulate duration if ffprobe fails or is not found.
    This is a fallback mechanism.
    """
    duration_seconds = 0.0
    print(f"    [Fallback] Attempting to simulate duration for: {file_name}")
    try:
        # 1. Try from filename (if it has a pattern like video_duration_300s.mp4)
        # Improve regex or parsing logic based on actual filename patterns if needed
        duration_parts = [part for part in file_name.split('_') if 's' in part and part[:-1].isdigit()]
        if duration_parts:
            duration_match_str = duration_parts[-1] # Take the last one in case of multiple _XXs_
            duration_seconds = float(duration_match_str.replace('s', ''))
            print(f"    [Fallback] Simulated duration from filename ('{duration_match_str}'): {duration_seconds:.2f}s")
            return duration_seconds

        # 2. If no pattern in filename, try to simulate from file size
        if os.path.exists(file_path_for_size):
            file_size_bytes = os.path.getsize(file_path_for_size)
            # Simulation formula: Assume at least 10 minutes + remainder from dividing by 600
            # Or use approximate bitrate (e.g., 2MB per minute -> 2*1024*1024 bytes / 60 sec)
            # Assume 1 MBps -> duration = size_in_MB
            # Assume 0.5 MBps -> duration = size_in_MB * 2
            # This is a very rough estimation
            estimated_duration = (file_size_bytes / (0.5 * 1024 * 1024)) # Approximately 0.5MB/sec
            duration_seconds = max(600.0, estimated_duration) # At least 10 minutes (600 seconds) or calculated
            print(f"    [Fallback] Simulated duration from file size ({file_size_bytes} bytes): {duration_seconds:.2f}s")
            return duration_seconds
        else:
            print(f"    [Fallback] Cannot access file for size-based simulation. Defaulting duration.")
            duration_seconds = 600.0 # Default to 10 minutes (600 seconds)
            return duration_seconds
    except Exception as e:
        print(f"    [Fallback] Error during simulation for {file_name}: {e}. Defaulting duration.")
        duration_seconds = 600.0
    return duration_seconds

def get_video_info(file_path):
    """
    Function to retrieve video information (size and actual duration using ffprobe) - "Epic MVP" version.
    """
    file_name = os.path.basename(file_path)
    file_size = 0
    duration_seconds = 0.0 # Duration should be float

    if not os.path.exists(file_path):
        print(f"⚠️ File not found during info retrieval: {file_path}")
        return None

    try:
        file_size = os.path.getsize(file_path) # File size in bytes
        ffprobe_path = shutil.which('ffprobe')

        if not ffprobe_path:
            # print(f"⚠️ ffprobe not found. Simulating duration for {file_name}.") # Might be too verbose
            duration_seconds = _simulate_duration_fallback(file_name, file_path)
        else:
            # Using -show_entries format=duration -print_format json is better than -sexagesimal
            command = [
                ffprobe_path,
                '-v', 'error',
                '-show_entries', 'format=duration',
                '-of', 'json', # Output as JSON for easy parsing
                file_path
            ]
            try:
                # Timeout to prevent indefinite hangs
                result = subprocess.run(command, capture_output=True, text=True, check=False, timeout=20)

                if result.returncode == 0 and result.stdout:
                    try:
                        data = json.loads(result.stdout)
                        if 'format' in data and 'duration' in data['format']:
                            duration_seconds = float(data['format']['duration'])
                            if duration_seconds < 0: # Should not be negative
                                print(f"    ffprobe returned negative duration for {file_name}. Using fallback.")
                                duration_seconds = _simulate_duration_fallback(file_name, file_path)
                        else:
                            print(f"    ffprobe output for {file_name} missing duration field. Using fallback.")
                            duration_seconds = _simulate_duration_fallback(file_name, file_path)
                    except json.JSONDecodeError:
                        print(f"    ffprobe output for {file_name} was not valid JSON. Using fallback.")
                        duration_seconds = _simulate_duration_fallback(file_name, file_path)
                    except ValueError:
                        print(f"    ffprobe returned non-numeric duration for {file_name}. Using fallback.")
                        duration_seconds = _simulate_duration_fallback(file_name, file_path)
                else:
                    error_msg = result.stderr.strip() if result.stderr else "Unknown ffprobe error."
                    # print(f"    ffprobe failed for {file_name} (Code: {result.returncode}, Error: {error_msg}). Using fallback.") # May be too verbose
                    duration_seconds = _simulate_duration_fallback(file_name, file_path)
            except subprocess.TimeoutExpired:
                print(f"    ffprobe timed out processing {file_name}. Using fallback.")
                duration_seconds = _simulate_duration_fallback(file_name, file_path)
            except Exception as e:
                print(f"    Unexpected error using ffprobe for {file_name}: {e}. Using fallback.")
                duration_seconds = _simulate_duration_fallback(file_name, file_path)
    except FileNotFoundError:
        print(f"⚠️ File not found (getsize or other access): {file_path}")
        return None
    except Exception as e:
        print(f"⚠️ General error getting info for {file_path}: {e}. Using fallback.")
        duration_seconds = _simulate_duration_fallback(file_name, file_path if 'file_path' in locals() else "")

    # Final check to ensure duration_seconds is not 0 if possible
    if duration_seconds == 0.0 and file_name and file_path:
        # print(f"    Final check: Duration is 0 for {file_name}. Attempting fallback simulation.")
        duration_seconds = _simulate_duration_fallback(file_name, file_path)

    return {
        "name": file_name,
        "path": file_path,
        "size_bytes": file_size, # Keep as bytes for format_bytes to handle
        "formatted_size": format_bytes(file_size), # Added formatted size
        "duration_min": round(duration_seconds / 60, 2), # Still available if minutes needed
        "formatted_duration": format_duration(duration_seconds), # Added formatted duration
        "raw_duration_sec": duration_seconds
    }

def create_dummy_files(base_dir, num_files=10):
    """Creates dummy files for testing if the actual folder is empty"""
    # This function is for testing, ensure it reflects realistic filenames if using duration simulation
    print(f"✨ Creating dummy files in {base_dir} for testing...")
    os.makedirs(base_dir, exist_ok=True)
    for i in range(1, num_files + 1):
        # Example of filename with duration pattern
        duration_sec_in_name = 500 + (i * 50) if i % 2 == 0 else 700 + (i * 20)
        if i % 4 == 0 : duration_sec_in_name = 300 # Test short duration
        ext = video_extensions[i % len(video_extensions)]
        # Simulate size based on duration for more "realistic" dummy files if ffprobe fails
        # Approx 0.5 MB per second for simulation; size = duration_in_sec * 0.5 * 1024 * 1024
        simulated_size_bytes = int(duration_sec_in_name * 0.25 * 1024 * 1024) # Smaller size for dummy
        simulated_size_bytes = max(simulated_size_bytes, 1024 * 1024) # At least 1MB

        file_name = f"video_{i}_dummy_duration_{duration_sec_in_name}s_id{i*10}{ext}"
        file_path = os.path.join(base_dir, file_name)
        try:
            with open(file_path, "wb") as f:
                f.write(os.urandom(simulated_size_bytes)) # Write random data of specified size
            # For testing ffprobe, it's better if dummy files are actual (short) valid videos.
            # Creating truly valid video files programmatically is complex with just Python stdlib.
            # These os.urandom files will likely make ffprobe fail, correctly testing the fallback.
        except Exception as e:
            print(f"Error creating dummy file {file_name}: {e}")
    print(f"✅ Created {num_files} dummy files successfully. ffprobe will likely use fallback for these.")

# --- Main Application Class ---
class ColabVideoDownloader:
    def __init__(self, download_dir, download_limit_per_batch, video_extensions, download_links_file):
        self.download_dir = download_dir
        self.download_limit_per_batch = download_limit_per_batch
        self.video_extensions = video_extensions
        self.download_links_file = download_links_file
        self.video_files_info = []
        self.skip_checkboxes = [] # Stores the checkbox widget instances
        self.checkbox_file_map = {} # Maps checkbox object id to file_info
        self.current_filter_text = ""
        self.ffprobe_available = bool(shutil.which('ffprobe')) # Check once at init

        # UI Elements
        self.output_area = widgets.Output()
        self.dir_input = widgets.Text(
            value=self.download_dir,
            placeholder='Enter your download folder path',
            description='Folder:',
            layout=widgets.Layout(width='auto')
        )
        self.refresh_button = widgets.Button(
            description="🔄 Refresh Files",
            button_style='primary',
            tooltip="Scan the download folder again"
        )
        self.clear_links_button = widgets.Button(
            description="🗑️ Clear Download History",
            button_style='danger',
            tooltip="Clear downloaded_links.txt file"
        )
        self.filter_input = widgets.Text(
            value="",
            placeholder='Search filename...',
            description='Search:',
            layout=widgets.Layout(width='auto')
        )
        self.status_label = widgets.HTML(value="") # For ffprobe status, etc.
        self.single_download_container = widgets.VBox([])
        self.batch_download_container = widgets.VBox([])
        self.download_history_output = widgets.Output(layout=widgets.Layout(max_height='150px', overflow_y='auto'))


        self._setup_ui()
        self._setup_event_handlers()
        if not self.ffprobe_available:
            self.status_label.value = "<p style='color:orange;'>⚠️ <b>Warning:</b> `ffprobe` not found. Video duration simulation will be used, which may not be accurate.</p>"
        else:
            self.status_label.value = "<p style='color:green;'>✅ `ffprobe` is available for accurate video duration.</p>"

        self._scan_files() # Initial scan
        self._update_ui_display()
        self._display_download_history()


    def _setup_ui(self):
        """Sets up the main UI layout."""
        header = widgets.HTML("<h1>🚀 Colab Video Downloader (Epic MVP + FFprobe)</h1>")
        instructions = widgets.HTML("""
            <p>Welcome to the advanced video downloader! You can:</p>
            <ul>
                <li><b>Specify Download Folder:</b> Enter the path to your video folder.</li>
                <li><b>Refresh Files:</b> Scan for new video files (duration from ffprobe if available).</li>
                <li><b>Download Single File:</b> Click the download button for each video.</li>
                <li><b>Batch Download:</b> Select files to skip and download the rest.</li>
                <li><b>Search:</b> Filter the video list by name.</li>
                <li><b>View Download History:</b> Check which files have already been downloaded.</li>
            </ul>
            <p><b>Note:</b> Videos shorter than 10 minutes (600 seconds) are skipped by default.</p>
        """)

        dir_control_box = widgets.HBox([self.dir_input, self.refresh_button])
        filter_box = widgets.HBox([self.filter_input, self.clear_links_button])

        self.main_layout = widgets.VBox([
            header,
            instructions,
            self.status_label, # Show ffprobe status
            dir_control_box,
            filter_box,
            widgets.HTML("<h2>--- ⬇️ Single File Download ---</h2>"),
            self.single_download_container,
            widgets.HTML("<h2>--- 📦 Batch Download ---</h2>"),
            self.batch_download_container,
            widgets.HTML("<h2>--- 📜 Download History ---</h2>"),
            self.download_history_output,
            self.output_area # For general messages and download feedback
        ])

    def _setup_event_handlers(self):
        self.refresh_button.on_click(self._on_refresh_button_click)
        self.clear_links_button.on_click(self._on_clear_links_button_click)
        self.filter_input.observe(self._on_filter_input_change, names='value')
        self.dir_input.observe(self._on_dir_input_change, names='value')

    def _on_dir_input_change(self, change):
        self.download_dir = change['new']
        with self.output_area:
            clear_output(wait=True)
            print(f"Download folder changed to: {self.download_dir}")
        self._scan_files()
        self._update_ui_display()

    def _on_refresh_button_click(self, b):
        with self.output_area:
            clear_output(wait=True)
            print("🔄 Refreshing file list...")
        self._scan_files()
        self._update_ui_display()
        with self.output_area: # Append to existing output
            print("✅ File list refreshed!")


    def _on_clear_links_button_click(self, b):
        with self.output_area:
            clear_output(wait=True)
            try:
                with open(self.download_links_file, "w") as f:
                    f.write("")
                print("✅ Download history cleared!")
                self._display_download_history() # Refresh history display
            except Exception as e:
                print(f"❌ Error clearing history: {e}")

    def _on_filter_input_change(self, change):
        self.current_filter_text = change['new'].lower()
        self._update_ui_display()

    def _scan_files(self):
        self.video_files_info = []
        if not os.path.exists(self.download_dir):
            with self.output_area:
                # clear_output(wait=True) # Avoid clearing if just appending
                print(f"⚠️ Folder {self.download_dir} not found!")
            # Optionally, create dummy files if the intent is testing even with a non-existent dir initially
            # if not os.path.exists(default_download_dir): # Use the default for dummy creation
            #     os.makedirs(default_download_dir, exist_ok=True)
            # if not os.listdir(default_download_dir) and default_download_dir == self.download_dir :
            #     print(f"    Creating sample files in {default_download_dir}...")
            #     create_dummy_files(default_download_dir, num_files=5)
            return # Stop scanning if dir doesn't exist

        if not os.listdir(self.download_dir): # Check if directory is empty
            with self.output_area:
                print(f"ℹ️ Folder {self.download_dir} is empty.")
            # Optionally create dummy files in the current self.download_dir if it's empty
            # print(f"    Creating sample files in {self.download_dir}...")
            # create_dummy_files(self.download_dir, num_files=5)
            # No return here, list will just be empty


        files_found_for_scan = []
        for root, _, files in os.walk(self.download_dir):
            for file in files:
                if file.lower().endswith(self.video_extensions):
                    files_found_for_scan.append(os.path.join(root, file))

        if not files_found_for_scan:
            with self.output_area:
                print(f"🤔 No video files ({', '.join(self.video_extensions)}) found in {self.download_dir}.")
            return

        with self.output_area:
            # clear_output(wait=True) # Avoid clearing if user is watching other messages
            print(f"🔍 Found {len(files_found_for_scan)} potential video files. Retrieving info (this may take a moment)...")
            if not self.ffprobe_available:
                print("    (ffprobe not found, duration will be simulated)")

        temp_video_list = []
        for i, file_path in enumerate(files_found_for_scan):
            # Update progress in output_area without clearing previous important messages
            # This can be tricky; for now, just print. A dedicated progress bar widget might be better for many files.
            # print(f"    ({i+1}/{len(files_found_for_scan)}) Processing: {os.path.basename(file_path)}...")
            info = get_video_info(file_path) # This now uses ffprobe or fallback
            if info and info.get("raw_duration_sec", 0) >= 600: # Default 10 minutes (600 seconds)
                temp_video_list.append(info)
            elif info:
                print(f"    ⏩ Skipping file {info['name']} (Duration: {info['formatted_duration']} - too short)")
            # else: info is None, error already printed by get_video_info

        self.video_files_info = sorted(temp_video_list, key=lambda x: x['name'])
        with self.output_area:
            print(f"🎬 Found {len(self.video_files_info)} video files (length >= 10 minutes) ready for download:")
            if not self.video_files_info:
                print("    ❌ No video files matching criteria in the specified folder.")
            # else: # Listing all can be too verbose, UI will show them
            #     for video in self.video_files_info:
            #         print(f"    - {video['name']} | Size: {video['formatted_size']} | Duration: {video['formatted_duration']}")

    def _update_ui_display(self):
        """Updates the displayed file lists based on current filters and scan results."""
        self.checkbox_file_map.clear() # Clear map before repopulating
        self.skip_checkboxes = [] # Clear old checkbox widget instances

        filtered_files = [
            file_info for file_info in self.video_files_info
            if self.current_filter_text in file_info['name'].lower()
        ]

        # --- Update Single File Download section ---
        single_widgets_children = []
        if not filtered_files:
            single_widgets_children.append(widgets.HTML("<p>🚫 No video files for single download.</p>"))
        else:
            for video in filtered_files:
                single_widgets_children.append(self._create_single_download_widget(video))
        self.single_download_container.children = single_widgets_children

        # --- Update Batch Download section ---
        batch_checkbox_widgets = []
        if not filtered_files:
            self.batch_download_container.children = [widgets.HTML("<p>🚫 No video files for batch download.</p>")]
        else:
            for video in filtered_files:
                display_name = video['name']
                if len(display_name) > 60: # Truncate for display in checkbox
                    display_name = display_name[:57] + "..."

                # Tooltip with full details
                tooltip_text = (f"Full Name: {video['name']}\n"
                                f"Size: {video['formatted_size']}\n"
                                f"Duration: {video['formatted_duration']} ({video['raw_duration_sec']:.1f} seconds)\n"
                                f"Path: {video['path']}")

                checkbox_description = f"Skip: {display_name} ({video['formatted_size']}, {video['formatted_duration']})"

                checkbox = widgets.Checkbox(
                    value=False,
                    description=checkbox_description,
                    indent=False,
                    tooltip=tooltip_text,
                    layout=widgets.Layout(width='98%', margin='1px 0') # Ensure checkbox text has space
                )
                self.skip_checkboxes.append(checkbox) # Add to list of widget instances
                self.checkbox_file_map[id(checkbox)] = video # Map widget ID to file_info
                batch_checkbox_widgets.append(checkbox)

            # Scrollable area for checkboxes
            skip_selection_vbox = widgets.VBox(children=batch_checkbox_widgets)
            scrollable_checkbox_area = widgets.Box(
                children=[skip_selection_vbox],
                layout=widgets.Layout(
                    max_height='300px', # Adjust height as needed
                    overflow_y='auto',
                    border='1px solid #ccc',
                    padding='5px',
                    width='100%' # Take full width of its container
                )
            )

            download_batch_button = widgets.Button(
                description=f"🚀 Start Selected Download ({self.download_limit_per_batch} files/batch)",
                button_style='success',
                tooltip=f"Download files *not* marked 'Skip', up to {self.download_limit_per_batch} files per click"
            )
            download_all_button = widgets.Button(
                description=f"⬇️ Download All Displayed ({self.download_limit_per_batch} files/batch)",
                button_style='warning',
                tooltip=f"Download *all* files currently displayed (ignores 'Skip' checkboxes), up to {self.download_limit_per_batch} files per click"
            )

            # Clear previous handlers to prevent multiple triggers if UI is updated multiple times
            # A bit hacky, ideally manage handlers more robustly if this becomes an issue
            if hasattr(download_batch_button, '_click_handlers') and download_batch_button._click_handlers:
                download_batch_button._click_handlers.callbacks = []
            if hasattr(download_all_button, '_click_handlers') and download_all_button._click_handlers:
                download_all_button._click_handlers.callbacks = []

            download_batch_button.on_click(self._on_download_batch_click)
            download_all_button.on_click(self._on_download_all_click)

            self.batch_download_container.children = [
                scrollable_checkbox_area,
                widgets.HBox([download_batch_button, download_all_button], layout=widgets.Layout(margin='5px 0'))
            ]

    def _create_single_download_widget(self, file_info):
        """Creates a button and info for single file download."""
        # Using HTML for better layout control of the label
        file_label_html = (f"<div style='padding-left: 5px; font-size:0.9em;'>"
                           f"📁 <b>{file_info['name']}</b><br>"
                           f"    <small>Size: {file_info['formatted_size']} | "
                           f"Duration: {file_info['formatted_duration']}"
                           f" ({file_info['raw_duration_sec']:.1f} seconds)</small>" # Still show raw sec in parentheses for detail
                           f"</div>")
        file_label = widgets.HTML(value=file_label_html)

        download_button = widgets.Button(description="⬇️ Download", button_style='info', layout=widgets.Layout(width='120px'))

        # Closure to capture the correct file_info for this specific button
        def on_button_click_specific(b, current_file_info=file_info):
            b.disabled = True # Disable button during download attempt
            with self.output_area:
                clear_output(wait=True) # Clear previous messages in output area
                print(f"⏳ Preparing to download: {current_file_info['name']}...")
                try:
                    google.colab.files.download(current_file_info['path'])
                    # Record download after initiation
                    with open(self.download_links_file, "a") as f:
                        f.write(f"{current_file_info['path']}\n")
                    print(f"✅ Download of {current_file_info['name']} initiated! (Path saved)")
                    self._display_download_history() # Update history display
                except Exception as e:
                    print(f"❌ Error downloading {current_file_info['name']}: {e}")
                finally:
                    b.disabled = False # Re-enable button

        # Clear previous handlers if any (important if this function is called multiple times for the same conceptual button)
        if hasattr(download_button, '_click_handlers') and download_button._click_handlers:
            download_button._click_handlers.callbacks = []
        download_button.on_click(on_button_click_specific)

        return widgets.HBox([download_button, file_label], layout=widgets.Layout(margin='2px 0'))

    def _on_download_batch_click(self, b):
        """Handles the batch download button click (skipping selected)."""
        b.disabled = True # Disable button during operation

        files_to_download = []
        # Iterate through the current skip_checkboxes and use the map to get corresponding file_info
        for checkbox in self.skip_checkboxes:
            if not checkbox.value: # If checkbox is NOT checked (i.e., not skipped)
                file_info = self.checkbox_file_map.get(id(checkbox))
                if file_info:
                    files_to_download.append(file_info)

        with self.output_area:
            clear_output(wait=True)
            if not files_to_download:
                print("🚫 No files selected for download (or all files marked 'Skip').")
                b.disabled = False # Re-enable button
                return

        self._initiate_batch_download(files_to_download, b)


    def _on_download_all_click(self, b):
        """Handles the 'Download All Displayed' button click."""
        b.disabled = True

        # Download all currently filtered files, ignoring skip checkboxes for this action
        # This means we need to re-evaluate filtered_files as _update_ui_display might have changed since last call
        files_to_download = [
            file_info for file_info in self.video_files_info
            if self.current_filter_text in file_info['name'].lower()
        ]

        with self.output_area:
            clear_output(wait=True)
            if not files_to_download:
                print("🚫 No files available for download in the current view.")
                b.disabled = False
                return
        self._initiate_batch_download(files_to_download, b)


    def _initiate_batch_download(self, files_to_download_list, button_widget):
        """Initiates the batch download process."""
        # This function assumes files_to_download_list is already prepared
        with self.output_area: # Output area should have been cleared by calling function
            print(f"📦 Preparing {len(files_to_download_list)} files for batch download...")

            downloaded_this_session = 0
            files_actually_triggered_for_download = []

            try:
                for i, file_info in enumerate(files_to_download_list):
                    if downloaded_this_session >= self.download_limit_per_batch:
                        print(f"\n⚠️ Batch download limit reached ({self.download_limit_per_batch} files).")
                        print("    Please complete the pending downloads, then click the download button again for the next batch.")
                        break # Stop processing more files in this batch

                    print(f"\n({downloaded_this_session + 1}/{min(len(files_to_download_list), self.download_limit_per_batch)}) Downloading: {file_info['name']}...")
                    try:
                        google.colab.files.download(file_info['path'])
                        with open(self.download_links_file, "a") as f:
                            f.write(f"{file_info['path']}\n")
                        print(f"✅ Download of {file_info['name']} initiated! (Path saved)")
                        files_actually_triggered_for_download.append(file_info['path'])
                        self._display_download_history() # Update history display immediately
                        downloaded_this_session += 1
                        # Add a small delay to allow browser to catch up if multiple downloads are rapid
                        if downloaded_this_session < self.download_limit_per_batch and (i + 1) < len(files_to_download_list):
                            time.sleep(1.5) # Slightly longer delay for user to manage download prompts
                    except Exception as e:
                        print(f"❌ Error downloading {file_info['name']}: {e}")

                # --- After loop summary ---
                remaining_in_selection = len(files_to_download_list) - len(files_actually_triggered_for_download)
                if not files_actually_triggered_for_download and files_to_download_list:
                    print("\n🚫 No downloads occurred in this batch (check errors above or limit).")
                elif remaining_in_selection > 0 :
                    print(f"\n🎉 Batch download of {len(files_actually_triggered_for_download)} files initiated! {remaining_in_selection} files remaining in selected list.")
                    if downloaded_this_session >= self.download_limit_per_batch:
                        print("    Click the download button again for the next batch.")
                else: # All selected files processed (or attempted)
                    print(f"\n🎉 All selected files ({len(files_actually_triggered_for_download)} files) have been processed for download!")

            finally:
                button_widget.disabled = False # Re-enable the button that triggered this

    def _display_download_history(self):
        """Displays the content of the downloaded links file."""
        with self.download_history_output: # Use the dedicated output widget
            clear_output(wait=True)
            try:
                with open(self.download_links_file, "r") as f:
                    history_content = f.read().strip()
                if history_content:
                    # Display only unique file basenames for brevity and clarity
                    history_paths = history_content.split('\n')
                    unique_downloaded_files = sorted(list(set([os.path.basename(p) for p in history_paths if p.strip()])))

                    if unique_downloaded_files:
                        display_text = "Files previously initiated for download (filenames):\n- " + "\n- ".join(unique_downloaded_files)
                        print(display_text)
                    else:
                        print("No files in download history yet.")
                else:
                    print("No files in download history yet.")
            except FileNotFoundError:
                print("Download history file not found (will be created on first download).")
            except Exception as e:
                print(f"❌ An error occurred in history display: {e}")

# Instantiate and display the app
app = ColabVideoDownloader(default_download_dir, download_limit_per_batch, video_extensions, download_links_file)
display(app.main_layout)


In [None]:
from google.colab import drive
import os
import shutil
import subprocess
from datetime import timedelta
import pandas as pd
import logging
from tqdm.notebook import tqdm # Use tqdm.notebook for Google Colab
import hashlib # For checksum verification

# --- Configuration ---
# Source directory path where your videos are located
SOURCE_DIR = '/content/DouyinLiveRecorder/downloads/抖音直播'
# Target directory path in Google Drive where backups will be stored
TARGET_DIR = '/content/drive/MyDrive/8888/ColabDL' # Can be modified as needed

# Advanced Configuration
ENABLE_CHECKSUM_VERIFICATION = True # Set to True to verify file integrity after copying/moving
LOG_TO_FILE = True # Set to True to save logs to a file in the target directory
LOG_FILE_NAME = 'backup_log.log' # Name of the log file if LOG_TO_FILE is True
DEFAULT_OPERATION = 'move' # Default operation: 'copy' or 'move'
# --------------------

# Set up logging
# Remove any existing handlers to prevent duplicate logs if the cell is run multiple times
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Function to install FFmpeg if not present
def install_ffmpeg():
    """
    Checks if ffprobe is installed and installs FFmpeg if it's not found.
    FFmpeg includes ffprobe, which is necessary for getting video durations.
    """
    try:
        subprocess.run(['ffprobe', '-h'], check=True, capture_output=True)
        logging.info("FFmpeg (ffprobe) is already installed.")
    except (subprocess.CalledProcessError, FileNotFoundError):
        logging.warning("FFmpeg (ffprobe) not found. Attempting to install FFmpeg...")
        try:
            # Update apt-get and install ffmpeg
            subprocess.run(['apt-get', 'update'], check=True, capture_output=True)
            subprocess.run(['apt-get', 'install', '-y', 'ffmpeg'], check=True, capture_output=True)
            logging.info("FFmpeg installed successfully.")
        except subprocess.CalledCalledProcessError as e:
            logging.error(f"Failed to install FFmpeg: {e.stderr.decode()}. Please install it manually.")
            raise RuntimeError("FFmpeg installation failed.")

# Function to calculate MD5 checksum of a file
def calculate_md5(file_path, chunk_size=8192):
    """
    Calculates the MD5 checksum of a given file.

    Args:
        file_path (str): The path to the file.
        chunk_size (int): The size of chunks to read from the file.

    Returns:
        str: The MD5 checksum in hexadecimal format, or None if an error occurs.
    """
    try:
        hasher = hashlib.md5()
        with open(file_path, 'rb') as f:
            for chunk in iter(lambda: f.read(chunk_size), b''):
                hasher.update(chunk)
        return hasher.hexdigest()
    except Exception as e:
        logging.error(f"Error calculating MD5 for {file_path}: {e}")
        return None

# Mount Google Drive
logging.info("Mounting Google Drive...")
try:
    drive.mount('/content/drive')
    logging.info("Google Drive mounted successfully.")
except Exception as e:
    logging.error(f"Failed to mount Google Drive: {e}")
    raise

# Ensure FFmpeg is installed
install_ffmpeg()

# Check if source directory exists
if not os.path.exists(SOURCE_DIR):
    logging.error(f"Source directory not found: {SOURCE_DIR}")
    raise FileNotFoundError(f"Source directory not found: {SOURCE_DIR}")
logging.info(f"Source directory found: {SOURCE_DIR}")

# Create target directory (if it doesn't exist)
try:
    os.makedirs(TARGET_DIR, exist_ok=True)
    logging.info(f"Checked and created target directory: {TARGET_DIR}")
except Exception as e:
    logging.error(f"Failed to create target directory: {e}")
    raise

# Configure file logging if enabled
if LOG_TO_FILE:
    log_file_path = os.path.join(TARGET_DIR, LOG_FILE_NAME)
    file_handler = logging.FileHandler(log_file_path)
    file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    logging.getLogger().addHandler(file_handler)
    logging.info(f"Logging to file enabled: {log_file_path}")

# Get video file duration (using ffprobe)
def get_video_duration(file_path):
    """
    Retrieves the duration of a video file using ffprobe.

    Args:
        file_path (str): The path to the video file.

    Returns:
        str: The duration in HH:MM:SS format, or "N/A" if an error occurs.
    """
    try:
        cmd = [
            'ffprobe', '-v', 'error', '-show_entries',
            'format=duration', '-of',
            'default=noprint_wrappers=1:nokey=1', file_path
        ]
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        duration_str = result.stdout.strip()
        duration = float(duration_str)
        return duration # Return duration in seconds
    except (subprocess.CalledProcessError, ValueError) as e:
        logging.warning(f"Could not get duration for file {file_path}: {e}. Returning 0.0.")
        return 0.0 # Return 0.0 for error cases to allow filtering
    except FileNotFoundError:
        # This error should ideally be caught by install_ffmpeg, but kept for robustness
        logging.error(" 'ffprobe' not found. Please ensure FFmpeg is installed.")
        return 0.0

def format_duration(seconds):
    """
    Formats a duration in seconds into HH:MM:SS string format, without milliseconds.
    """
    td = timedelta(seconds=seconds)
    # Convert timedelta to string, then split to remove milliseconds
    s = str(td)
    if '.' in s:
        s = s.split('.')[0]
    return s

# Scan video files and return as DataFrame
def scan_video_files(directory):
    """
    Scans a directory for video files (.mp4, .ts) and collects their information.

    Args:
        directory (str): The directory to scan.

    Returns:
        pd.DataFrame: A DataFrame containing video file information.
    """
    logging.info(f"Scanning video files in: {directory}")
    video_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(('.mp4', '.ts')):
                file_path = os.path.join(root, file)
                try:
                    size = os.path.getsize(file_path) / (1024 * 1024) # MB
                    duration_seconds = get_video_duration(file_path)
                    video_files.append({
                        "File Name": file,
                        "Path": file_path,
                        "Size (MB)": round(size, 2),
                        "DurationSeconds": duration_seconds, # Store raw seconds
                        "Duration": format_duration(duration_seconds) # Store formatted duration for display
                    })
                except OSError as e:
                    logging.warning(f"Could not access file {file_path}: {e}. Skipping this file.")
    logging.info(f"Scan complete. Found {len(video_files)} video files.")
    return pd.DataFrame(video_files)

# Scan and display file information
df_raw = scan_video_files(SOURCE_DIR)

# Filter out files shorter than 10 minutes (600 seconds)
MIN_DURATION_SECONDS = 600
df = df_raw[df_raw['DurationSeconds'] >= MIN_DURATION_SECONDS].copy()

if len(df_raw) > 0:
    logging.info(f"Found {len(df_raw)} video files in total. {len(df)} files meet the minimum duration requirement (>= {MIN_DURATION_SECONDS/60} minutes).")
    print("❌ No MP4/TS video files found in the source directory.") # Keep print for user visibility

# Prompt user for operation type (copy or move)
print(f"\n❓ Do you want to 'copy' or 'move' the files? (default: {DEFAULT_OPERATION})")
logging.info("Prompting user for operation type.")
operation_choice = input("Your choice (copy/move): ").strip().lower()
if operation_choice not in ['copy', 'move']:
    operation_choice = DEFAULT_OPERATION
    print(f"Invalid choice. Defaulting to '{DEFAULT_OPERATION}'.")
logging.info(f"User selected operation: {operation_choice}")

# Initialize counters based on operation type
operation_performed_count = 0
skipped_count = 0
failed_count = 0
verified_count = 0
checksum_mismatch_count = 0

if len(df) > 0:
    print("\nAvailable video files (Index, File Name, Size (MB), Duration):")

    # Fixed column widths based on user's example
    idx_col_width = 9
    name_col_width = 50
    size_col_width = 12
    duration_col_width = 12

    # Print header
    print(f"|{'-'*idx_col_width}|{'-'*name_col_width}|{'-'*size_col_width}|{'-'*duration_col_width}|")
    print(f"|{'Index'.center(idx_col_width)}|{'File Name'.center(name_col_width)}|{'Size (MB)'.center(size_col_width)}|{'Duration'.center(duration_col_width)}|")
    print(f"|{'-'*idx_col_width}|{'-'*name_col_width}|{'-'*size_col_width}|{'-'*duration_col_width}|")

    # Print data rows by iterating through the DataFrame
    for index, row in df.iterrows():
        idx_val = str(index)
        file_name_val = str(row['File Name'])
        size_val = f"{row['Size (MB)']:.2f}"
        duration_val = str(row['Duration'])

        # Truncate file name if too long
        if len(file_name_val) > name_col_width:
            file_name_val = file_name_val[:name_col_width-3] + "..."

        # Pad each value to its column width, centering them
        idx_padded = idx_val.center(idx_col_width)
        name_padded = file_name_val.center(name_col_width)
        size_padded = size_val.center(size_col_width)
        duration_padded = duration_val.center(duration_col_width)

        print(f"|{idx_padded}|{name_padded}|{size_padded}|{duration_padded}|")

    # Print footer
    print(f"|{'-'*idx_col_width}|{'-'*name_col_width}|{'-'*size_col_width}|{'-'*duration_col_width}|")

    print(f"\n🔄 Please select files to {operation_choice} (enter Index numbers, comma-separated, or 'all' to {operation_choice} all):")
    logging.info("Prompting user for file selection.")
    choice = input("Your choice: ").strip()

    selected_files_paths = []
    if choice.lower() == 'all':
        selected_files_paths = df['Path'].tolist()
    else:
        processed_indices = set() # To store unique valid indices
        for item in choice.split(','):
            try:
                idx = int(item.strip())
                if 0 <= idx < len(df):
                    if idx not in processed_indices: # Avoid adding duplicates if user enters same index multiple times
                        selected_files_paths.append(df.iloc[idx]['Path'])
                        processed_indices.add(idx)
                else:
                    print(f"⚠️ Invalid Index number {idx}. It will be skipped.")
                    logging.warning(f"User entered invalid Index: {idx}")
            except ValueError:
                print(f"❌ Invalid input '{item.strip()}'. Please enter Index numbers or 'all'.")
                logging.error(f"User entered invalid input part: {item.strip()}")

    if selected_files_paths:
        logging.info(f"Starting {operation_choice} of {len(selected_files_paths)} files.")
        print(f"\n{operation_choice.capitalize()}ing {len(selected_files_paths)} files...") # Keep print for user visibility
        for src in tqdm(selected_files_paths, desc=operation_choice.capitalize()):
            file_name = os.path.basename(src)
            dst = os.path.join(TARGET_DIR, file_name)

            if os.path.exists(dst):
                logging.info(f"Skipping existing file: {file_name}")
                print(f"⏩ Skipping: '{file_name}' already exists in the target.") # Keep print for user visibility
                skipped_count += 1
                continue

            try:
                # Calculate checksum before operation if verification is enabled
                src_md5 = None
                if ENABLE_CHECKSUM_VERIFICATION:
                    src_md5 = calculate_md5(src)

                if operation_choice == 'copy':
                    shutil.copy2(src, dst)
                    logging.info(f"Successfully copied: {file_name}")
                    operation_performed_count += 1
                else: # 'move'
                    shutil.move(src, dst)
                    logging.info(f"Successfully moved: {file_name}")
                    operation_performed_count += 1

                if ENABLE_CHECKSUM_VERIFICATION:
                    dst_md5 = calculate_md5(dst)

                    if src_md5 and dst_md5 and src_md5 == dst_md5:
                        logging.info(f"Checksum verified for {file_name}: MD5 Match.")
                        print(f"✅ {operation_choice.capitalize()}ed and Verified: '{file_name}' (MD5 Match)") # Keep print for user visibility
                        verified_count += 1
                    else:
                        logging.warning(f"Checksum mismatch for {file_name}. Source MD5: {src_md5}, Dest MD5: {dst_md5}")
                        print(f"⚠️ {operation_choice.capitalize()}ed but Checksum Mismatch: '{file_name}' (Source MD5: {src_md5}, Dest MD5: {dst_md5})") # Keep print for user visibility
                        checksum_mismatch_count += 1
                else:
                    print(f"✅ {operation_choice.capitalize()}ed: '{file_name}'") # Keep print for user visibility

            except Exception as e:
                logging.error(f"Failed to {operation_choice}: {file_name} - {e}")
                print(f"❌ Failed to {operation_choice}: '{file_name}' - {e}") # Keep print for user visibility
                failed_count += 1
    else:
        logging.info(f"No files selected for {operation_choice}ing.")
        print(f"No files selected for {operation_choice}ing.") # Keep print for user visibility

    # Option to delete source files if operation was 'copy'
    if operation_choice == 'copy' and operation_performed_count > 0:
        print("\n❓ Do you want to delete the successfully copied source files? (y/n)")
        logging.info("Prompting user for source file deletion choice after copy operation.")
        delete_choice = input("Your choice: ").strip().lower()
        if delete_choice == 'y':
            deleted_count = 0
            logging.info("User chose to delete source files. Starting deletion...")
            print("\nDeleting source files...") # Keep print for user visibility
            # Filter selected_files_paths to only include those that were successfully copied
            files_to_delete = [
                src for src in selected_files_paths
                if os.path.exists(os.path.join(TARGET_DIR, os.path.basename(src)))
            ]

            for src in tqdm(files_to_delete, desc="Deleting"):
                file_name = os.path.basename(src)
                try:
                    os.remove(src)
                    logging.info(f"Successfully deleted source file: {file_name}")
                    print(f"🗑️ Deleted: '{file_name}'") # Keep print for user visibility
                    deleted_count += 1
                except Exception as e:
                    logging.warning(f"Failed to delete source file: {file_name} - {e}")
                    print(f"⚠️ Failed to delete: '{file_name}' - {e}") # Keep print for user visibility
            logging.info(f"Deletion summary: Deleted {deleted_count} files.")
            print(f"\n🗑️ Deleted {deleted_count} source files.") # Keep print for user visibility
        else:
            logging.info("User chose not to delete source files.")
            print("Source files were not deleted.") # Keep print for user visibility

# Summary Report
logging.info(f"Operation Summary: {operation_choice.capitalize()}ed={operation_performed_count}, Verified={verified_count}, Mismatch={checksum_mismatch_count}, Skipped={skipped_count}, Failed={failed_count}")
print("\n--- Summary Report ---") # Keep print for user visibility
print(f"✅ Successfully {operation_choice}ed: {operation_performed_count} files") # Keep print for user visibility
if ENABLE_CHECKSUM_VERIFICATION:
    print(f"✔️ Successfully verified (MD5 Match): {verified_count} files") # Keep print for user visibility
    print(f"❗ Checksum Mismatch ({operation_choice}ed but not verified): {checksum_mismatch_count} files") # Keep print for user visibility
print(f"⏩ Skipped (already exists in target): {skipped_count} files") # Keep print for user visibility
print(f"❌ Failed to {operation_choice}: {failed_count} files") # Keep print for user visibility

# Unmount Google Drive
logging.info("Unmounting Google Drive...")
try:
    drive.flush_and_unmount()
    logging.info("Google Drive unmounted successfully.")
except Exception as e:
    logging.error(f"Failed to unmount Google Drive: {e}")

print("\n🎉 Operation complete!")
