# Douyin Live Recorder

Notebook นี้ใช้สำหรับรัน DouyinLiveRecorder เพื่อบันทึกวิดีโอไลฟ์จาก Douyin (TikTok จีน)

In [None]:
# Clone repositories
!git clone https://github.com/ihmily/DouyinLiveRecorder.git
!git clone https://github.com/chaiz64/l777k.git

In [None]:
# Copy configuration files
!cp l777k/config.ini DouyinLiveRecorder/config/
!cp l777k/URL_config.ini DouyinLiveRecorder/config/

In [None]:
# เข้าไปยังโฟลเดอร์โปรเจค
%cd DouyinLiveRecorder

# ติดตั้ง dependencies
!pip3 install -r requirements.txt

In [None]:
# ติดตั้ง ffmpeg
!apt update
!apt install ffmpeg -y

In [None]:
# รันโปรแกรม
!python main.py

In [None]:
import shutil
from google.colab import files

# กำหนดชื่อไฟล์ ZIP
zip_filename = "/content/douyin_live_backup.zip"
source_folder = "/content/DouyinLiveRecorder/downloads/抖音直播"

# บีบอัดโฟลเดอร์เป็นไฟล์ ZIP
shutil.make_archive(zip_filename.replace(".zip", ""), 'zip', source_folder)

# ดาวน์โหลดไฟล์ ZIP
files.download(zip_filename)


In [None]:
import os
import IPython.display as display

# ฟังก์ชันแปลงขนาดไฟล์เป็น GB หรือ TB
def convert_size(size_in_bytes):
    if size_in_bytes < 1024:
        return f"{size_in_bytes} B"
    elif size_in_bytes < 1024**2:
        return f"{size_in_bytes / 1024:.2f} KB"
    elif size_in_bytes < 1024**3:
        return f"{size_in_bytes / 1024**2:.2f} MB"
    elif size_in_bytes < 1024**4:
        return f"{size_in_bytes / 1024**3:.2f} GB"
    else:
        return f"{size_in_bytes / 1024**4:.2f} TB"

# กำหนดโฟลเดอร์หลักที่เก็บไฟล์
base_folder = "/content/DouyinLiveRecorder/downloads/抖音直播"

# ตรวจสอบว่าโฟลเดอร์มีอยู่หรือไม่
if not os.path.exists(base_folder):
    print("❌ ไม่พบโฟลเดอร์ที่กำหนด")
else:
    # ค้นหาไฟล์ทั้งหมดในโฟลเดอร์และโฟลเดอร์ย่อย
    file_paths = []
    for root, _, files in os.walk(base_folder):
        for file in files:
            if file.endswith(('.mp4', '.ts')):  # เลือกเฉพาะไฟล์ .mp4 และ .ts
                full_path = os.path.join(root, file)
                relative_path = full_path.replace(base_folder + "/", "")  # แสดงพาธย่อ
                file_paths.append((full_path, relative_path))

    # ตรวจสอบว่าพบไฟล์หรือไม่
    if not file_paths:
        print("❌ ไม่พบไฟล์ .mp4 หรือ .ts ในโฟลเดอร์")
    else:
        print(f"✅ พบ {len(file_paths)} ไฟล์ในโฟลเดอร์ '{base_folder}'\n")

        # แสดงลิงก์ดาวน์โหลดและขนาดไฟล์
        for full_path, relative_path in sorted(file_paths):
            file_size = os.path.getsize(full_path)
            file_size_str = convert_size(file_size)  # แปลงขนาดไฟล์เป็นหน่วยที่เหมาะสม
            display.display(display.HTML(
                f'<a href="/files{full_path}" download="{relative_path}" target="_blank">📥 ดาวน์โหลด {relative_path} ({file_size_str})</a><br>'
            ))


In [None]:
import os
from google.colab import files
import time
import math # For math.ceil

# --- Configuration ---
# Define configuration settings for the download process.
config = {
    'base_download_directory': '/content/DouyinLiveRecorder/downloads/抖音直播/',
    'DOWNLOAD_LIMIT': 1, # Maximum number of files to download in each batch (now 1 file per step for continuous download).
    'PROGRESS_FILE': '/content/download_progress.txt' # File to save download progress.
}

def get_last_downloaded_index(progress_file):
    """
    Reads the index of the last downloaded file from the progress file.
    Returns 0 if the file doesn't exist or is unreadable, to start from the beginning.
    """
    try:
        with open(progress_file, 'r') as f:
            content = f.read().strip()
            if content.isdigit():
                return int(content)
            return 0 # If content is not a digit, start from 0.
    except FileNotFoundError:
        return 0 # If file not found, start from 0.
    except Exception as e:
        print(f"Warning: Could not read progress file '{progress_file}'. Starting from beginning. Error: {e}")
        return 0

def update_last_downloaded_index(progress_file, index):
    """
    Saves the index of the next file to be downloaded to the progress file.
    """
    try:
        with open(progress_file, 'w') as f:
            f.write(str(index))
        print(f"Download progress saved: Next file index is {index}.")
    except Exception as e:
        print(f"Error: Could not save download progress to '{progress_file}'. Error: {e}")

def format_time(seconds):
    """
    Formats a given number of seconds into a human-readable string (e.g., 1h 2m 3s).
    """
    if seconds is None or seconds < 0:
        return "N/A"
    seconds = int(seconds)
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    remaining_seconds = seconds % 60

    time_parts = []
    if hours > 0:
        time_parts.append(f"{hours}h")
    if minutes > 0:
        time_parts.append(f"{minutes}m")
    if remaining_seconds > 0 or not time_parts: # Ensure at least seconds are shown if value is 0
        time_parts.append(f"{remaining_seconds}s")

    return " ".join(time_parts) if time_parts else "0s"

def format_size(bytes_size):
    """
    Formats a given number of bytes into a human-readable string (e.g., 1.23 GB).
    """
    if bytes_size is None or bytes_size < 0:
        return "N/A"

    units = ['B', 'KB', 'MB', 'GB', 'TB']
    if bytes_size == 0:
        return "0 B"

    i = int(math.floor(math.log(bytes_size, 1024)))
    p = math.pow(1024, i)
    s = round(bytes_size / p, 2)
    return f"{s} {units[i]}"


def scan_and_download_files(config):
    """
    Scans the specified directory and its subdirectories for files.
    Then, downloads them one by one to the local machine, limited by batch size.
    Saves download progress to a file for continuous operation.
    Displays download speed, estimated time remaining, total elapsed time, and file size.

    Args:
        config (dict): A dictionary containing configuration parameters, including
                       'base_download_directory', 'DOWNLOAD_LIMIT', and 'PROGRESS_FILE'.
    """
    base_directory = config.get('base_download_directory')
    download_limit = config.get('DOWNLOAD_LIMIT', 1) # Ensure default is 1 for this continuous mode
    progress_file = config.get('PROGRESS_FILE', '/content/download_progress.txt')

    if not base_directory:
        print("Error: 'base_download_directory' is not specified in the configuration.")
        return

    if not os.path.isdir(base_directory):
        print(f"Error: Directory '{base_directory}' not found.")
        print("Please ensure the path is correct and the directory exists in your Colab environment.")
        return

    print(f"Scanning for files in: {base_directory}")
    found_files_info = [] # Store tuple of (full_path, size_bytes)

    # Use os.walk to traverse directories and subdirectories.
    # Use sorted(filenames) to ensure consistent file order across runs.
    for root, _, filenames in os.walk(base_directory):
        for filename in sorted(filenames):
            full_path = os.path.join(root, filename)
            try:
                file_size = os.path.getsize(full_path)
                found_files_info.append((full_path, file_size))
            except OSError as e:
                print(f"Warning: Could not get size for '{full_path}': {e}. Skipping this file.")
                continue # Skip files that cannot be accessed

    if not found_files_info:
        print(f"No files found in '{base_directory}' or its subdirectories.")
        return

    # --- Initial Prompt (only once at the beginning) ---
    start_index = get_last_downloaded_index(progress_file)

    if start_index >= len(found_files_info):
        print("\nAll available files have been downloaded!")
        print(f"The progress file '{progress_file}' indicates completion. To restart, you can manually delete this file in your Colab files.")
        return # Exit if all files are already downloaded

    print(f"\nFound {len(found_files_info)} file(s) in total (starting from index {start_index}):")
    # Display all files with their status and size.
    for i, (f_path, f_size) in enumerate(found_files_info):
        relative_path = os.path.relpath(f_path, base_directory)
        prefix = ""
        if i < start_index:
            prefix = "  [DOWNLOADED] " # Files already downloaded.
        elif i >= start_index and i < start_index + download_limit:
            prefix = "  [TO DOWNLOAD] " # Files to be downloaded in this batch.
        else:
            prefix = "  [QUEUED] " # Files queued for subsequent batches.
        print(f"{prefix}{i+1}. {relative_path} ({format_size(f_size)})")

    # Prompt for initial confirmation before starting the continuous download.
    user_input = input("\nDo you want to start downloading files? (yes/no/reset): ").lower().strip()
    if user_input == 'reset':
        update_last_downloaded_index(progress_file, 0)
        print("Download progress reset. Please run the cell again to start from the beginning.")
        return # Exit the function to allow a fresh start.
    elif user_input != 'yes':
        print("Download cancelled by user.")
        return # Exit if cancelled.

    print("\nStarting continuous download process...")

    # Initialize metrics for the current session
    session_start_time = time.time()
    files_downloaded_in_session = 0
    total_processing_time_in_session = 0 # Time spent actually initiating downloads + sleep

    # --- Continuous Download Loop ---
    while True:
        current_start_index = get_last_downloaded_index(progress_file)

        if current_start_index >= len(found_files_info):
            print("\nAll available files have been downloaded!")
            print(f"The progress file '{progress_file}' indicates completion. To restart, you can manually delete this file in your Colab files.")
            break # Exit the loop if all files are downloaded

        # --- Select files for the current batch (always 1 file now) ---
        # Note: files_to_download_current_batch now contains (full_path, size_bytes) tuples
        files_to_download_current_batch = found_files_info[current_start_index : current_start_index + download_limit]

        if not files_to_download_current_batch:
            print("No files left to download in this batch based on the current progress and limit.")
            print(f"Current start index: {current_start_index}. Total files: {len(found_files_info)}. Download limit: {download_limit}.")
            break # Exit if no files are selected for the current batch.

        files_downloaded_in_batch = 0
        for i, (file_path, file_size) in enumerate(files_to_download_current_batch):
            file_processing_start_time = time.time() # Start time for this specific file's processing

            try:
                relative_path = os.path.relpath(file_path, base_directory)
                # Displaying actual index for clarity in continuous mode
                actual_file_number = current_start_index + i + 1
                print(f"[{actual_file_number}/{len(found_files_info)}] Downloading: {relative_path} ({format_size(file_size)})")

                files.download(file_path)
                print(f"  --> Download initiated for '{relative_path}'.")
                files_downloaded_in_batch += 1
                files_downloaded_in_session += 1

                # Add a longer delay to prevent issues with rapid consecutive downloads.
                # คุณสามารถปรับค่า 10 ให้เป็นค่าที่เหมาะสมกับเบราว์เซอร์ของคุณ
                time.sleep(10) # <--- Delay here!

            except FileNotFoundError:
                print(f"  Error: File not found at '{file_path}'. Skipping.")
            except Exception as e:
                print(f"  An unexpected error occurred during download of '{file_path}': {e}. Skipping.")
            finally:
                file_processing_end_time = time.time()
                time_taken_for_file_processing = file_processing_end_time - file_processing_start_time
                total_processing_time_in_session += time_taken_for_file_processing

            # --- Display Metrics ---
            elapsed_time = time.time() - session_start_time

            avg_time_per_file_processing = None
            if files_downloaded_in_session > 0:
                avg_time_per_file_processing = total_processing_time_in_session / files_downloaded_in_session

            remaining_files_count = len(found_files_info) - (current_start_index + files_downloaded_in_batch)
            estimated_time_remaining = None
            if avg_time_per_file_processing is not None:
                estimated_time_remaining = avg_time_per_file_processing * remaining_files_count

            print(f"  Metrics: Elapsed: {format_time(elapsed_time)} | Avg. Step Time: {format_time(avg_time_per_file_processing)} | Est. Remaining: {format_time(estimated_time_remaining)}")


        # --- Update progress after the current batch is completed ---
        next_start_index = current_start_index + files_downloaded_in_batch
        update_last_downloaded_index(progress_file, next_start_index)

        # No prompt here, the loop will automatically continue to the next file
        # if there are more files remaining.
        if next_start_index < len(found_files_info):
            print(f"\nProceeding to the next file...")
        else:
            print("\nAll available files have been downloaded!")
            print(f"The progress file '{progress_file}' indicates completion. To restart, you can manually delete this file.")
            break # Exit the loop if all files are downloaded.

# --- Run the function ---
scan_and_download_files(config)

