In [5]:
import requests
import os
import threading
from urllib.parse import urljoin

def download_ts_file(url, save_path):
    """Download a single .ts file with error handling."""
    try:
        response = requests.get(url, timeout=50)
        response.raise_for_status()
        with open(save_path, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded: {save_path}")
    except Exception as e:
        print(f"Error downloading {url}: {e}")

def parse_m3u8(m3u8_url):
    """Parse M3U8 file and extract .ts file URLs."""
    try:
        response = requests.get(m3u8_url)
        response.raise_for_status()
        
        # Base URL for resolving relative paths
        base_url = m3u8_url.rsplit('/', 1)[0] + '/'
        
        # Extract .ts file URLs
        ts_urls = []
        for line in response.text.splitlines():
            if line.strip() and not line.startswith('#'):
                # Resolve relative or absolute URLs
                ts_url = urljoin(base_url, line.strip())
                ts_urls.append(ts_url)
        
        return ts_urls
        print(ts_urls)
    except Exception as e:
        print(f"Error parsing M3U8: {e}")
        return []

def download_video(m3u8_url, output_dir='video_parts'):
    """Download all .ts files from an M3U8 playlist."""
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Get .ts file URLs
    ts_urls = parse_m3u8(m3u8_url)
    
    # Parallel download with threading
    threads = []
    for i, ts_url in enumerate(ts_urls):
        save_path = os.path.join(output_dir, f'part_{i:04d}.ts')
        thread = threading.Thread(target=download_ts_file, args=(ts_url, save_path))
        thread.start()
        threads.append(thread)
    
    # Wait for all downloads to complete
    for thread in threads:
        thread.join()
    
    print(f"Downloaded {len(ts_urls)} video parts")

# Example usage
if __name__ == '__main__':
    m3u8_url = 'https://n3.tmo.livebox.cz/vod/tvs-vod/03fa885050e7e03bfb93df3d201a33af/mp4:./1325fd43-3a8c-4b6e-ba2f-c45eda0a64df-2.mp4/chunklist-v1-a1.m3u8'
    download_video(m3u8_url)

Downloaded: video_parts/part_0000.ts
Downloaded 1 video parts


In [7]:
import requests
import os

def download_file_from_network(webpage_url, file_name, output_dir='downloads'):
    """Download a specific file from network traffic."""
    try:
        # Send a GET request to the webpage
        response = requests.get(webpage_url)
        response.raise_for_status()
        
        # Check if the file is in the response content
        if file_name in response.text:
            file_url = response.url
            
            # Download the file
            file_response = requests.get(file_url)
            file_response.raise_for_status()
            
            # Create output directory if it doesn't exist
            os.makedirs(output_dir, exist_ok=True)
            
            # Save the file
            save_path = os.path.join(output_dir, file_name)
            with open(save_path, 'wb') as f:
                f.write(file_response.content)
            
            print(f"Downloaded: {save_path}")
        else:
            print(f"File {file_name} not found in the network traffic.")
    
    except Exception as e:
        print(f"Error downloading file from network: {e}")

# Example usage
webpage_url = 'https://player.tvs.livebox.cz/d734dddf-3312-4ec6-adb1-2ed39443a653'
file_name = 'play-over.svg'
download_file_from_network(webpage_url, file_name)


Error downloading file from network: 404 Client Error: Not Found for url: https://player.tvs.livebox.cz/d734dddf-3312-4ec6-adb1-2ed39443a653


This will load all the .ts files from a specific webpage/server path.

In [28]:
import requests
import os

def download_file(url, output_filename=None):
    # If no output filename is provided, use the last part of the URL
    if output_filename is None:
        output_filename = url.split('/')[-1]
    
    # Set up headers to mimic the browser request
    headers = {
        'authority': 'n3.tmo.livebox.cz',
        'method': 'GET',
        'scheme': 'https',
        'accept': '*/*',
        'accept-encoding': 'gzip, deflate, br, zstd',
        'accept-language': 'cs-CZ,cs;q=0.9',
        'origin': 'https://player.tvs.livebox.cz',
        'priority': 'u=1, i',
        'referer': 'https://player.tvs.livebox.cz/',
        'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-site',
        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
    }
    
    try:
        # Send GET request with headers
        response = requests.get(url, headers=headers, stream=True)
        
        # Raise an error for bad status codes
        response.raise_for_status()
        
        # Get the total file size
        total_size = int(response.headers.get('content-length', 0))
        block_size = 1024  # 1 Kibibyte
        downloaded_size = 0
        
        # Open the file in binary write mode
        with open(output_filename, 'wb') as file:
            for data in response.iter_content(block_size):
                file.write(data)
                downloaded_size += len(data)
                
                # Optional: Print download progress
                progress = (downloaded_size / total_size) * 100 if total_size > 0 else 0
                print(f'Download progress for {output_filename}: {progress:.2f}%', end='\r')
        
        print(f'\nFile downloaded successfully: {output_filename}')
        return True
    
    except requests.RequestException as e:
        print(f"An error occurred while downloading {url}: {e}")
        return False

def download_multiple_files(base_url, start=1, end=20):
    # Create a directory to store downloaded files
    os.makedirs('downloaded_segments', exist_ok=True)
    
    # List to track successful and failed downloads
    successful_downloads = []
    failed_downloads = []
    
    # Download files with numbers from start to end
    for num in range(start, end + 1):
        # Replace the segment number in the URL
        current_url = base_url.replace('seg-1-', f'seg-{num}-')
        
        # Generate output filename
        output_filename = os.path.join('downloaded_segments', f'seg-{num}-v1-a1.ts')
        
        # Attempt to download the file
        success = download_file(current_url, output_filename)
        
        if success:
            successful_downloads.append(f'seg-{num}-v1-a1.ts')
        else:
            failed_downloads.append(f'seg-{num}-v1-a1.ts')
    
    # Print summary
    print("\nDownload Summary:")
    print(f"Successful downloads: {len(successful_downloads)}")
    print("Successful files:", successful_downloads)
    print(f"Failed downloads: {len(failed_downloads)}")
    print("Failed files:", failed_downloads)

# Base URL from the previous example
base_url = 'https://n3.tmo.livebox.cz/vod/tvs-vod/f32a50c844e733ca6612c5125789249a//e5f089cc-3526-48a3-bb62-eabd09248250-2.mp4/seg-1-v1-a1.ts'

# Download files from segment 1 to 16
download_multiple_files(base_url)

Download progress for downloaded_segments/seg-1-v1-a1.ts: 100.00%
File downloaded successfully: downloaded_segments/seg-1-v1-a1.ts
Download progress for downloaded_segments/seg-2-v1-a1.ts: 100.00%
File downloaded successfully: downloaded_segments/seg-2-v1-a1.ts
Download progress for downloaded_segments/seg-3-v1-a1.ts: 100.00%
File downloaded successfully: downloaded_segments/seg-3-v1-a1.ts
Download progress for downloaded_segments/seg-4-v1-a1.ts: 100.00%
File downloaded successfully: downloaded_segments/seg-4-v1-a1.ts
Download progress for downloaded_segments/seg-5-v1-a1.ts: 100.00%
File downloaded successfully: downloaded_segments/seg-5-v1-a1.ts
Download progress for downloaded_segments/seg-6-v1-a1.ts: 100.00%
File downloaded successfully: downloaded_segments/seg-6-v1-a1.ts
Download progress for downloaded_segments/seg-7-v1-a1.ts: 100.00%
File downloaded successfully: downloaded_segments/seg-7-v1-a1.ts
Download progress for downloaded_segments/seg-8-v1-a1.ts: 100.00%
File downloaded s

This part will pars the html code of the page and prepare a file name for the video based on the date of the article and name of the article


In [29]:

import requests
from bs4 import BeautifulSoup
import unicodedata
import re

def remove_diacritics(text):
    """
    Remove diacritics from a string.
    
    :param text: Input string with diacritical marks
    :return: String with diacritical marks removed
    """
    # Normalize the text to decompose combined characters
    normalized = unicodedata.normalize('NFKD', text)
    
    # Keep only ASCII characters (effectively removing diacritics)
    return ''.join(char for char in normalized if not unicodedata.combining(char))

def format_title_for_url(title):
    """
    Format article title for URL:
    1. Remove diacritics
    2. Replace spaces with hyphens
    3. Convert to lowercase
    
    :param title: Original article title
    :return: Formatted title suitable for URL
    """
    # Remove diacritics
    title_without_diacritics = remove_diacritics(title)
    
    # Replace non-alphanumeric characters with hyphens
    title_formatted = re.sub(r'[^a-zA-Z0-9]+', '-', title_without_diacritics)
    
    # Remove leading/trailing hyphens and convert to lowercase
    return title_formatted.strip('-').lower()

def scrape_article_info(html_content):
    """
    Extract article title and date from HTML content.
    
    :param html_content: HTML content of the webpage as a string
    :return: Tuple of (article_title, formatted_url_title, formatted_date)
    """
    # Parse the HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract article title from h1 tag
    article_title = soup.find('h1')
    article_title = article_title.get_text(strip=True) if article_title else "No title found"
    
    # Format title for URL
    formatted_url_title = format_title_for_url(article_title)
    
    # Extract date from datetime attribute of time element within News-date class
    date_element = soup.find(class_='News-date').find('time')
    
    if date_element and date_element.has_attr('datetime'):
        # Extract the full datetime
        full_datetime = date_element['datetime']
        
        # Split and take just the date part (first 10 characters, YYYY-MM-DD)
        formatted_date = full_datetime[:10]
    else:
        formatted_date = "No date found"
    
    formated_file_name = formatted_date+"_"+formatted_url_title
    return article_title, formatted_url_title, formatted_date, formated_file_name


#####################
import os
import subprocess
import shutil

def check_ffmpeg_installed():
    """Check if FFmpeg is installed and available in the system PATH."""
    return shutil.which('ffmpeg') is not None

def merge_ts_segments(
    input_dir='downloaded_segments', 
    #output_dir=None, 
    output_dir='final_video', 
    output_filename= 'test.mp4'
):
    # If no output directory specified, use the current working directory
    if output_dir is None:
        output_dir = os.getcwd()
    
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Create full output path
    full_output_path = os.path.join(output_dir, output_filename)

    # First, check if FFmpeg is installed
    if not check_ffmpeg_installed():
        print("Error: FFmpeg is not installed.")
        print("Please install FFmpeg:")
        print("- macOS: 'brew install ffmpeg'")
        print("- Ubuntu/Debian: 'sudo apt-get install ffmpeg'")
        print("- Windows: Download from ffmpeg.org and add to PATH")
        return False

    try:
        # Get a list of all .ts files in the directory, sorted by number
        ts_files = sorted(
            [f for f in os.listdir(input_dir) if f.endswith('.ts')],
            key=lambda x: int(x.split('-')[1])
        )
        
        # Verify we have files to merge
        if not ts_files:
            print("No .ts files found in the directory.")
            return False
        
        # Create a temporary file with the list of segments
        segments_list_path = os.path.join(input_dir, 'segments_list.txt')
        with open(segments_list_path, 'w') as f:
            for ts_file in ts_files:
                f.write(f"file '{os.path.join(os.path.abspath(input_dir), ts_file)}'\n")
        

        
        ffmpeg_command = [
            'ffmpeg',
            '-f', 'concat',
            '-safe', '0',
            '-i', segments_list_path,
            '-c:v', 'libx264',  # High-quality H.264 codec
            '-preset', 'slow',  # Highest compression/quality setting
            '-crf', '18',  # Constant Rate Factor (lower is higher quality, 18 is near-lossless)
            '-c:a', 'copy',  # Copy audio without re-encoding
            full_output_path
        ]
        
        # Run the FFmpeg command
        print(f"Starting video merge process to {full_output_path}...")
        result = subprocess.run(ffmpeg_command, capture_output=True, text=True)
        
        # Check if the merge was successful
        if result.returncode == 0:
            print(f"Successfully merged {len(ts_files)} segments into {full_output_path}")
            
            # Optional: Clean up temporary files
            os.remove(segments_list_path)
            
            return True
        else:
            print("Error during merge:")
            print(result.stderr)
            return False
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return False






# Example usage
def main():
    # You'll need to replace this with the actual HTML content
    url = 'https://itvs24.cz/kunovice/sneci-vymenili-softbalove-dresy-za-saka'
    
    try:
        # Fetch the webpage
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        
        # Scrape the information
        original_title, formatted_title, date , formated_file_name = scrape_article_info(response.text)
        
        print(f"Original Title: {original_title}")
        print(f"Formatted Title: {formatted_title}")
        print(f"Date: {date}")
        print(f"File Name: {formated_file_name}")
    
    except requests.RequestException as e:
        print(f"Error fetching the webpage: {e}")
    
    merge_ts_segments(output_filename= formated_file_name+'.mp4')

if __name__ == '__main__':
    main()

Original Title: Šneci vyměnili softbalové dresy za saka
Formatted Title: sneci-vymenili-softbalove-dresy-za-saka
Date: 2023-12-20
File Name: 2023-12-20_sneci-vymenili-softbalove-dresy-za-saka
Starting video merge process to final_video/2023-12-20_sneci-vymenili-softbalove-dresy-za-saka.mp4...
Successfully merged 20 segments into final_video/2023-12-20_sneci-vymenili-softbalove-dresy-za-saka.mp4


In [3]:
import os
import subprocess
import shutil

def check_ffmpeg_installed():
    """Check if FFmpeg is installed and available in the system PATH."""
    return shutil.which('ffmpeg') is not None

def merge_ts_segments(
    input_dir='downloaded_segments', 
    #output_dir=None, 
    output_dir='final_video', 
    output_filename='merged_video.mp4'
):
    # If no output directory specified, use the current working directory
    if output_dir is None:
        output_dir = os.getcwd()
    
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Create full output path
    full_output_path = os.path.join(output_dir, output_filename)

    # First, check if FFmpeg is installed
    if not check_ffmpeg_installed():
        print("Error: FFmpeg is not installed.")
        print("Please install FFmpeg:")
        print("- macOS: 'brew install ffmpeg'")
        print("- Ubuntu/Debian: 'sudo apt-get install ffmpeg'")
        print("- Windows: Download from ffmpeg.org and add to PATH")
        return False

    try:
        # Get a list of all .ts files in the directory, sorted by number
        ts_files = sorted(
            [f for f in os.listdir(input_dir) if f.endswith('.ts')],
            key=lambda x: int(x.split('-')[1])
        )
        
        # Verify we have files to merge
        if not ts_files:
            print("No .ts files found in the directory.")
            return False
        
        # Create a temporary file with the list of segments
        segments_list_path = os.path.join(input_dir, 'segments_list.txt')
        with open(segments_list_path, 'w') as f:
            for ts_file in ts_files:
                f.write(f"file '{os.path.join(os.path.abspath(input_dir), ts_file)}'\n")
        
        # Construct the FFmpeg command
        ffmpeg_command = [
            'ffmpeg',
            '-f', 'concat',
            '-safe', '0',
            '-i', segments_list_path,
            '-c', 'copy',
            full_output_path
        ]
        
        # Run the FFmpeg command
        print(f"Starting video merge process to {full_output_path}...")
        result = subprocess.run(ffmpeg_command, capture_output=True, text=True)
        
        # Check if the merge was successful
        if result.returncode == 0:
            print(f"Successfully merged {len(ts_files)} segments into {full_output_path}")
            
            # Optional: Clean up temporary files
            os.remove(segments_list_path)
            
            return True
        else:
            print("Error during merge:")
            print(result.stderr)
            return False
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return False

# Example usage with different output options
def main():
    # Option 1: Use default (current working directory)
    merge_ts_segments()
    
    # Option 2: Specify a custom output directory
    # merge_ts_segments(output_dir='/path/to/your/output/directory')
    
    # Option 3: Specify both output directory and filename
    # merge_ts_segments(
    #     output_dir='/path/to/your/output/directory', 
    #     output_filename='my_custom_video.mp4'
    # )

if __name__ == '__main__':
    main()

Starting video merge process to final_video/merged_video.mp4...
Successfully merged 16 segments into final_video/merged_video.mp4
