In [1]:
import whisper_timestamped as whisper

audio = whisper.load_audio("../data/Stabilizing Large Sparse Mixture-of-Experts Models.wav")

model = whisper.load_model("NbAiLab/whisper-large-v2-nob", device="cuda")

result = whisper.transcribe(model, audio, language="en")

import json
print(json.dumps(result, indent = 2, ensure_ascii = False))


Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 94676/94676 [03:34<00:00, 441.18frames/s]


{
  "text": " All right, so today we're going to be looking at AI and specifically how to make it a whole lot smarter, but without needing, you know, like a giant supercomputer. You're interested in these sparse expert models, right? And specifically this paper about STMOe, stable and transferable mixture of experts. It sounds kind of intimidating. I think the idea is actually really elegant. It is. Think about it this way. Instead of one massive AI brain, you know, trying to process everything. What if you had a team of specialized experts? What if you had a team of people who were able to make a smart AI? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a Each one incredibly good at their own thing. That's the core concept behind these sparse expert models. OK, so it's less like one giant dictionary. 

In [2]:
import io
with io.open('data.json', 'w', encoding='utf-8') as f:
  f.write(json.dumps(result, ensure_ascii=False))

In [3]:
import json

# Load JSON file
with open("data.json", "r", encoding="utf-8") as file:
    data = json.load(file)

# Ensure the JSON is a list of dictionaries (records)
if isinstance(data, dict):
    data = [data]  # Convert to list format if it's a single object

print("Sample Record:", data[0])  

Sample Record: {'text': " All right, so today we're going to be looking at AI and specifically how to make it a whole lot smarter, but without needing, you know, like a giant supercomputer. You're interested in these sparse expert models, right? And specifically this paper about STMOe, stable and transferable mixture of experts. It sounds kind of intimidating. I think the idea is actually really elegant. It is. Think about it this way. Instead of one massive AI brain, you know, trying to process everything. What if you had a team of specialized experts? What if you had a team of people who were able to make a smart AI? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a Each one incredibly good at their own thing. That's the core concept behind these sparse expert models. OK, so it's less like one giant 

In [4]:
import sqlite3

DB_NAME = "transcriptions.db"

def create_tables():
    """Creates SQLite tables for storing transcription data and segments separately."""
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    
    # Table for transcription metadata
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS transcriptions (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        text TEXT,
        language TEXT
    );
    """)

    # Table for individual segments linked to transcriptions
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS segments (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        transcription_id INTEGER,
        start REAL,
        end REAL,
        text TEXT,
        tokens TEXT,
        temperature REAL,
        avg_logprob REAL,
        compression_ratio REAL,
        no_speech_prob REAL,
        confidence REAL,
        words TEXT,
        FOREIGN KEY (transcription_id) REFERENCES transcriptions(id) ON DELETE CASCADE
    );
    """)

    conn.commit()
    conn.close()
    print("Tables created successfully.")

# Run this first to create the tables
create_tables()


Tables created successfully.


In [5]:
import json

def insert_transcription(data):
    """Inserts transcription metadata and segments into separate tables."""
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    
    print("Text:", data["text"])

    # Insert into transcriptions table
    cursor.execute("""
    INSERT INTO transcriptions (text, language) 
    VALUES (?, ?)""",
    (data.get("text", ""), data.get("language", ""))
    )

    # Get the last inserted transcription ID
    transcription_id = cursor.lastrowid

    # Insert segments
    for segment in data.get("segments", []):
        cursor.execute("""
        INSERT INTO segments (
            transcription_id, start, end, text, tokens, temperature, 
            avg_logprob, compression_ratio, no_speech_prob, confidence, words
        ) 
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            transcription_id,
            segment.get("start", 0),
            segment.get("end", 0),
            segment.get("text", ""),
            json.dumps(segment.get("tokens", [])),  # Store as JSON string
            segment.get("temperature", 0),
            segment.get("avg_logprob", 0),
            segment.get("compression_ratio", 0),
            segment.get("no_speech_prob", 0),
            segment.get("confidence", 0),
            json.dumps(segment.get("words", []))  # Store words as JSON string
        ))

    conn.commit()
    conn.close()
    print(f"Transcription and {len(data.get('segments', []))} segments inserted successfully.")


# Insert the sample data
insert_transcription(data[0])


Text:  All right, so today we're going to be looking at AI and specifically how to make it a whole lot smarter, but without needing, you know, like a giant supercomputer. You're interested in these sparse expert models, right? And specifically this paper about STMOe, stable and transferable mixture of experts. It sounds kind of intimidating. I think the idea is actually really elegant. It is. Think about it this way. Instead of one massive AI brain, you know, trying to process everything. What if you had a team of specialized experts? What if you had a team of people who were able to make a smart AI? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a Each one incredibly good at their own thing. That's the core concept behind these sparse expert models. OK, so it's less like one giant dictionary. More li

In [6]:
import pandas as pd
# first few segments
conn = sqlite3.connect(DB_NAME)
query = "SELECT transcription_id , start, end, substr(text, 0, 30) as text FROM segments LIMIT 5"
df = pd.read_sql(query, conn)
print(df.head(5))



   transcription_id  start    end                           text
0                 1   0.00  31.36   All right, so today we're go
1                 1  31.36  36.08   Each one incredibly good at 
2                 1  36.16  47.08   OK, so it's less like one gi
3                 1  47.08  53.11   Earlier attempts at this kin
4                 1  53.11  57.81   reliable and also adaptable,


In [7]:
import sqlite3

def get_text_chunks(transcription_id: int, max_chunk_duration: float = 5.0):
    """
    Retrieve reasonable chunks of text from the database, grouping segments into logical sentence structures.

    Args:
        transcription_id (int): The transcription ID to query.
        max_chunk_duration (float): Maximum duration (in seconds) for each chunk.

    Returns:
        list of dict: A list of text chunks with metadata.
    """
    conn = sqlite3.connect("transcriptions.db")
    cursor = conn.cursor()

    # Fetch segments sorted by start time
    cursor.execute("""
        SELECT id, start, end, text
        FROM segments
        WHERE transcription_id = ?
        ORDER BY start ASC
    """, (transcription_id,))

    segments = cursor.fetchall()
    conn.close()

    chunks = []
    current_chunk = []
    current_start = None
    current_end = None
    current_duration = 0

    for seg in segments:
        seg_id, seg_start, seg_end, seg_text = seg
        seg_duration = seg_end - seg_start

        # If chunk is empty, initialize it
        if not current_chunk:
            current_start = seg_start
            current_end = seg_end
            current_duration = seg_duration
            current_chunk.append(seg_text)
            continue

        # Check if adding this segment exceeds the max chunk duration
        if current_duration + seg_duration > max_chunk_duration:
            # Finalize the current chunk before starting a new one
            chunks.append({
                "start": current_start,
                "end": current_end,
                "text": " ".join(current_chunk)
            })

            # Start a new chunk
            current_chunk = [seg_text]
            current_start = seg_start
            current_end = seg_end
            current_duration = seg_duration
        else:
            # Extend the current chunk
            current_chunk.append(seg_text)
            current_end = seg_end
            current_duration += seg_duration

    # Add the last chunk if it exists
    if current_chunk:
        chunks.append({
            "start": current_start,
            "end": current_end,
            "text": " ".join(current_chunk)
        })

    return chunks

# Example usage
transcription_id = 1  # Replace with actual transcription ID
chunks = get_text_chunks(transcription_id, max_chunk_duration=7.0)

# Print results
for chunk in chunks:
    print(f"[{chunk['start']} - {chunk['end']} sec]: {chunk['text']}")


[0.0 - 31.36 sec]:  All right, so today we're going to be looking at AI and specifically how to make it a whole lot smarter, but without needing, you know, like a giant supercomputer. You're interested in these sparse expert models, right? And specifically this paper about STMOe, stable and transferable mixture of experts. It sounds kind of intimidating. I think the idea is actually really elegant. It is. Think about it this way. Instead of one massive AI brain, you know, trying to process everything. What if you had a team of specialized experts? What if you had a team of people who were able to make a smart AI? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a
[31.36 - 36.08 sec]:  Each one incredibly good at their own thing. That's the core concept behind these sparse expert models.
[36.16 - 47.08 s

In [24]:
import logging
import requests

def chat_with_ollama(prompt, model_name="qwen2.5", ollama_base_url="http://localhost:11434"):
    """Chat with Ollama."""
    try:
        url = f"{ollama_base_url}/api/generate"
        data = {
            "prompt": prompt,
            "model": model_name,
            "stream": False
        }
        response = requests.post(url, json=data)
        
        # Check if the request was successful
        if response.status_code == 200:
            # Parse the JSON response
            response_json = response.json()
            print("Chat Response:")
            pretty_json = json.dumps(response_json, indent=4)
            logging.info(pretty_json)
            result = response_json["response"]
            print(f"For prompt: {prompt}\n result: {result}")
            return response_json["response"]
        else:
            print(f"Failed to generate embeddings. Status code: {response.status_code}")
            print("Response:", response.text)
            return None
    
    except requests.ConnectionError:
        print("Failed to connect to the Ollama server. Make sure it is running locally and the URL is correct.")
        return None
    except json.JSONDecodeError:
        print("Failed to parse JSON response from Ollama server.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [25]:
def store_summary_in_db(text, summary, image_prompt, db_name="summaries.db"):
    """
    Stores the original text and its summary in an SQLite database.

    Args:
        text: The original text.
        summary: The summarized concept.
        db_name: The name of the SQLite database file.
    """

    try:
        conn = sqlite3.connect(db_name)
        cursor = conn.cursor()

        # Create the table if it doesn't exist
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS text_summaries (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                original_text TEXT NOT NULL,
                summary TEXT NOT NULL,
                image_prompt TEXT  -- New column for image prompt
           )
        ''')

        # Insert the text and summary
        cursor.execute("INSERT INTO text_summaries (original_text, summary, image_prompt) VALUES (?,?,?)", (text, summary, image_prompt))

        conn.commit()
        print(f"Stored summary in database: {db_name}")

    except sqlite3.Error as e:
        print(f"Error accessing SQLite database: {e}")
    finally:
        if conn:
            conn.close()


In [28]:
import requests
import json


def summarize_text_concept(text_chunk):  
    """
    Summarizes the core concept of a text chunk using Ollama.

    Args:
        text_chunk: The text to summarize.
        ollama_base_url: The base URL of your Ollama instance.

    Returns:
        A string summarizing the main concept, or None if there's an error.
    """

    try:
        # Construct the prompt for Ollama.  This prompt is crucial for getting a concise, single-concept summary.
        prompt = f"""
        Summarize the following text into a single, overarching concept.  Focus on the main idea, even if multiple topics are touched upon.  Provide a concise, one or two-sentence summary of this core concept.

        Text:
        {text_chunk}

        Summary:
        """

        summary = chat_with_ollama(prompt)


        image_prompt_request = f"""
        Create a detailed and imaginative image prompt for the following text summary.  The prompt should be suitable for a high-quality image generation model like Stable Diffusion or DALL-E 3. Be specific about the style, composition, and key elements of the image. Aim for a visually compelling and evocative description that captures the essence of the summary.

        Summary:
        {summary}

        Image Prompt:
        """

        image_prompt = chat_with_ollama(image_prompt_request)


        store_summary_in_db(prompt, summary, image_prompt)

        return summary, image_prompt

    except requests.exceptions.RequestException as e:
        print(f"Error communicating with Ollama: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Invalid JSON response from Ollama: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None


# Print results
for chunk in chunks:
   summarize_text_concept(chunk['text'])




Chat Response:
For prompt: 
        Summarize the following text into a single, overarching concept.  Focus on the main idea, even if multiple topics are touched upon.  Provide a concise, one or two-sentence summary of this core concept.

        Text:
         All right, so today we're going to be looking at AI and specifically how to make it a whole lot smarter, but without needing, you know, like a giant supercomputer. You're interested in these sparse expert models, right? And specifically this paper about STMOe, stable and transferable mixture of experts. It sounds kind of intimidating. I think the idea is actually really elegant. It is. Think about it this way. Instead of one massive AI brain, you know, trying to process everything. What if you had a team of specialized experts? What if you had a team of people who were able to make a smart AI? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart c

In [None]:
import requests
import json
import base64

def generate_image_sd_webui(prompt, sd_webui_url="http://127.0.0.1:7860"):  # Default SD WebUI URL
    """
    Generates an image using Stable Diffusion WebUI's API.

    Args:
        prompt: The image generation prompt.
        sd_webui_url: The URL of your running SD WebUI instance.

    Returns:
        The generated image as a base64 encoded string, or None on error.
    """
    try:
        payload = {
            "prompt": prompt,
            "steps": 20,  # Number of diffusion steps
            "width": 512,  # Image width
            "height": 512,  # Image height
            # ... other parameters as needed (see API docs) ...
        }

        response = requests.post(f"{sd_webui_url}/sdapi/v1/txt2img", json=payload)  # Use txt2img endpoint
        response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)

        r = response.json()

        if "images" in r and len(r["images"]) > 0:
            image_base64 = r["images"][0] # Get the first image (if multiple were generated)
            return image_base64
        else:
          print(f"Unexpected response format: {r}") # Handle unexpected JSON
          return None

    except requests.exceptions.RequestException as e:
        print(f"Error communicating with SD WebUI: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Invalid JSON response from SD WebUI: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None


def save_image(base64_image, filename="generated_image.png"):
    """Decodes and saves a base64 encoded image."""
    try:
        image_bytes = base64.b64decode(base64_image)
        with open(filename, "wb") as f:
            f.write(image_bytes)
        print(f"Image saved to {filename}")
    except Exception as e:
        print(f"Error saving image: {e}")



Image saved to generated_image.png


In [33]:
import requests
import json
import base64
from PIL import Image
from PIL.ExifTags import TAGS
import io

# ... (generate_image_sd_webui function remains the same) ...

def save_image_with_metadata(base64_image, prompt, filename="generated_image.png"):
    """Decodes, adds metadata, and saves a base64 encoded image."""
    try:
        image_bytes = base64.b64decode(base64_image)
        image = Image.open(io.BytesIO(image_bytes))  # Use BytesIO to open from memory

        # Create or get the EXIF data (if it exists)
        exif = image.getexif() or {}

        # Add the prompt as a user comment (or another EXIF tag)
        exif[37510] = prompt  # 37510 is the tag for user comment

        image.save(filename, exif=exif) # Save with EXIF data
        print(f"Image saved to {filename} with metadata.")

    except Exception as e:
        print(f"Error saving image with metadata: {e}")



# Example usage:
prompt = "A majestic dragon flying over a fantasy landscape"
image_base64 = generate_image_sd_webui(prompt)

if image_base64:
    save_image_with_metadata(image_base64, prompt) # Pass the prompt to the save function
else:
    print("Image generation failed.")

Error saving image with metadata: 'dict' object has no attribute 'startswith'


In [35]:
import sqlite3
import requests
import json
import base64
from PIL import Image
from PIL.ExifTags import TAGS
import io

#... (generate_image_sd_webui and save_image_with_metadata functions remain the same)...

def store_image_in_db(image_data, text_id, image_prompt, db_name="images.db"):
    """
    Stores the image data along with its associated text ID and prompt in an SQLite database.

    Args:
        image_data: The binary image data.
        text_id: The ID of the corresponding text entry in the text summaries database.
        image_prompt: The prompt used to generate the image.
        db_name: The name of the SQLite database file.
    """
    try:
        conn = sqlite3.connect(db_name)
        cursor = conn.cursor()

        # Create the table if it doesn't exist
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS images (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                text_id INTEGER NOT NULL,
                image_prompt TEXT NOT NULL,
                image_data BLOB NOT NULL,
                FOREIGN KEY (text_id) REFERENCES text_summaries(id)  -- Optional foreign key
            )
        ''')

        cursor.execute("INSERT INTO images (text_id, image_prompt, image_data) VALUES (?,?,?)",
                       (text_id, image_prompt, image_data))

        conn.commit()
        print(f"Stored image in database: {db_name}")

    except sqlite3.Error as e:
        print(f"Error accessing SQLite database: {e}")
    finally:
        if conn:
            conn.close()


def generate_images_from_db(db_name="summaries.db", sd_webui_url="http://127.0.0.1:7860"):
    """
    Retrieves prompts from the database, generates images, and stores them in a new database.

    Args:
        db_name: The name of the SQLite database file with text summaries.
        sd_webui_url: The URL of your running SD WebUI instance.
    """
    try:
        conn = sqlite3.connect(db_name)
        cursor = conn.cursor()

        cursor.execute("SELECT id, original_text, summary, image_prompt FROM text_summaries")
        rows = cursor.fetchall()

        for row in rows:
            id, original_text, summary, image_prompt = row
            print(f"Generating image for ID: {id}")

            if image_prompt:
                image_base64 = generate_image_sd_webui(image_prompt, sd_webui_url)
                if image_base64:
                    image_data = base64.b64decode(image_base64)  # Decode the image data
                    store_image_in_db(image_data, id, image_prompt)  # Store in image database
                else:
                    print(f"Image generation failed for ID: {id}")
            else:
                print(f"No image prompt found for ID: {id}")

    except sqlite3.Error as e:
        print(f"Error accessing SQLite database: {e}")
    finally:
        if conn:
            conn.close()


# Example usage:
generate_images_from_db()

Generating image for ID: 1
Stored image in database: images.db
Generating image for ID: 2
Stored image in database: images.db
Generating image for ID: 3
Stored image in database: images.db
Generating image for ID: 4
Stored image in database: images.db
Generating image for ID: 5
Stored image in database: images.db
Generating image for ID: 6
Stored image in database: images.db
Generating image for ID: 7
Stored image in database: images.db
Generating image for ID: 8
Stored image in database: images.db
Generating image for ID: 9
Stored image in database: images.db
Generating image for ID: 10
Stored image in database: images.db
Generating image for ID: 11
Stored image in database: images.db
Generating image for ID: 12
Stored image in database: images.db
Generating image for ID: 13
Stored image in database: images.db
Generating image for ID: 14
Stored image in database: images.db
Generating image for ID: 15
Stored image in database: images.db
Generating image for ID: 16
Stored image in datab

In [46]:
import sqlite3
import os
import io
from PIL import Image  # For image format checking/handling

def extract_images_from_db(db_name="images.db", output_folder="img"):
    """
    Extracts all images from the specified database and saves them to the output folder.

    Args:
        db_name: The name of the SQLite database file.
        output_folder: The path to the folder where images will be saved.
    """

    try:
        conn = sqlite3.connect(db_name)
        cursor = conn.cursor()

        # Create the output folder if it doesn't exist
        os.makedirs(output_folder, exist_ok=True)

        cursor.execute("SELECT id, image_data, image_prompt FROM images")  # Select ID, data, and prompt
        rows = cursor.fetchall()

        for row in rows:
            image_id, image_data, image_prompt = row
            try:
                # Use BytesIO to open image from memory (BLOB data)
                image = Image.open(io.BytesIO(image_data))

                # Determine file extension based on image format (more robust)
                format = image.format.lower() if image.format else "png" # Default to PNG if format is unknown
                filename = f"image_{image_id}.{format}"
                filepath = os.path.join(output_folder, filename)

                image.save(filepath)  # Save the image
                print(f"Image {image_id} saved to {filepath}")

                # Optionally, save the prompt to a text file alongside the image:
                prompt_filename = f"image_{image_id}_prompt.txt"
                prompt_filepath = os.path.join(output_folder, prompt_filename)
                with open(prompt_filepath, "w") as f:
                    f.write(image_prompt)
                print(f"Prompt for image {image_id} saved to {prompt_filepath}")

            except Exception as e:
                print(f"Error processing or saving image {image_id}: {e}")

    except sqlite3.Error as e:
        print(f"Error accessing SQLite database: {e}")
    finally:
        if conn:
            conn.close()


extract_images_from_db()  # Uses default database name and 


Image 1 saved to img\image_1.png
Prompt for image 1 saved to img\image_1_prompt.txt
Image 2 saved to img\image_2.png
Prompt for image 2 saved to img\image_2_prompt.txt
Image 3 saved to img\image_3.png
Prompt for image 3 saved to img\image_3_prompt.txt
Image 4 saved to img\image_4.png
Prompt for image 4 saved to img\image_4_prompt.txt
Image 5 saved to img\image_5.png
Prompt for image 5 saved to img\image_5_prompt.txt
Image 6 saved to img\image_6.png
Prompt for image 6 saved to img\image_6_prompt.txt
Image 7 saved to img\image_7.png
Prompt for image 7 saved to img\image_7_prompt.txt
Image 8 saved to img\image_8.png
Prompt for image 8 saved to img\image_8_prompt.txt
Image 9 saved to img\image_9.png
Prompt for image 9 saved to img\image_9_prompt.txt
Image 10 saved to img\image_10.png
Prompt for image 10 saved to img\image_10_prompt.txt
Image 11 saved to img\image_11.png
Prompt for image 11 saved to img\image_11_prompt.txt
Image 12 saved to img\image_12.png
Prompt for image 12 saved to img\

In [47]:
import sqlite3
import os
import io
from PIL import Image
import subprocess  # For running FFmpeg
import datetime  # For timestamp handling

def extract_and_merge_images(images_db="images.db", text_summaries_db="summaries.db", output_movie="merged_movie.mp4", output_folder="img"):
    """
    Extracts images, merges them into a video using FFmpeg, respecting timestamps.
    """
    try:
        conn_images = sqlite3.connect(images_db)
        cursor_images = conn_images.cursor()

        conn_text = sqlite3.connect(text_summaries_db)
        cursor_text = conn_text.cursor()

        os.makedirs(output_folder, exist_ok=True)  # Create output folder

        # Fetch image data and timestamps, ordered by ID for consistent merging
        cursor_text.execute("SELECT id, created_at FROM text_summaries ORDER BY id")
        text_data = cursor_text.fetchall()

        image_files = []  # List to store image filepaths and durations
        for text_id, timestamp_str in text_data:
            cursor_images.execute("SELECT image_data FROM images WHERE text_id = ?", (text_id,))
            image_row = cursor_images.fetchone()

            if image_row:
                image_data = image_row[0]
                try:
                    image = Image.open(io.BytesIO(image_data))
                    format = image.format.lower() if image.format else "png"
                    filename = f"image_{text_id}.{format}"
                    filepath = os.path.join(output_folder, filename)
                    image.save(filepath)
                    print(f"Image {text_id} saved to {filepath}")

                    # Convert timestamp string to datetime object if needed
                    if isinstance(timestamp_str, str):
                        timestamp = datetime.datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))  # Handle ISO format
                    else:
                        timestamp = timestamp_str  # Assume it's already a datetime object

                    image_files.append((filepath, timestamp))

                except Exception as e:
                    print(f"Error processing image {text_id}: {e}")
            else:
                print(f"No image found for text ID: {text_id}")

        # Create a text file with image filepaths and durations for FFmpeg
        with open("image_list.txt", "w") as f:
            prev_timestamp = None
            for filepath, timestamp in image_files:
                duration = 2  # Default display duration
                if prev_timestamp:
                    time_diff = timestamp - prev_timestamp
                    duration = time_diff.total_seconds() # Calculate the difference in seconds
                f.write(f"file '{filepath}'\n")
                f.write(f"duration {duration}\n")
                prev_timestamp = timestamp

        # Use FFmpeg to create the video from the image list
        command = [
            "ffmpeg",
            "-f", "concat",
            "-safe", "0",  # Allow unsafe file paths (if needed)
            "-i", "image_list.txt",
            "-vf", "scale=1280:720", # Example scaling. Adjust as needed
            "-c:v", "libx264",
            "-pix_fmt", "yuv420p",  # Important for compatibility
            output_movie
        ]

        subprocess.run(command)  # Run FFmpeg

        os.remove("image_list.txt")  # Clean up the list file

        print(f"Video created: {output_movie}")

    except (sqlite3.Error, OSError, ValueError, subprocess.CalledProcessError) as e:
        print(f"An error occurred: {e}")
    finally:
        conn_images.close() if conn_images else None
        conn_text.close() if conn_text else None


# Example usage:
extract_and_merge_images()

An error occurred: no such column: created_at


In [None]:
import sqlite3
import os
import io
from PIL import Image
import subprocess
import datetime

def extract_and_merge_images(images_db="images.db", transcriptions_db="transcriptions.db", output_movie="merged_movie.mp4", output_folder="img"):
    """
    Extracts images, merges them into a video using FFmpeg, respecting timestamps from transcriptions table.
    """
    try:
        conn_images = sqlite3.connect(images_db)
        cursor_images = conn_images.cursor()

        conn_transcriptions = sqlite3.connect(transcriptions_db)
        cursor_transcriptions = conn_transcriptions.cursor()

        os.makedirs(output_folder, exist_ok=True)

        # Fetch image data and timestamps from transcriptions table, ordered by ID
        cursor_transcriptions.execute("SELECT id, start, end FROM segments ORDER BY id")
        transcription_data = cursor_transcriptions.fetchall()

        image_files = []
        for transcription_id, start_time_str, end_time_str in transcription_data:
            cursor_images.execute("SELECT image_data FROM images WHERE text_id = ?", (transcription_id,))
            image_row = cursor_images.fetchone()

            if image_row:
                image_data = image_row[0]
                try:
                    image = Image.open(io.BytesIO(image_data))
                    format = image.format.lower() if image.format else "png"
                    filename = f"image_{transcription_id}.{format}"
                    filepath = os.path.join(output_folder, filename)
                    image.save(filepath)
                    print(f"Image {transcription_id} saved to {filepath}")

                    # Convert time strings to datetime.time objects (or timedelta if appropriate)
                    start_time = int(start_time_str) if start_time_str else 0
                    end_time = int(end_time_str) if end_time_str else 0

                    image_files.append((filepath, start_time, end_time))

                except Exception as e:
                    print(f"Error processing image {transcription_id}: {e}")
            else:
                print(f"No image found for transcription ID: {transcription_id}")

        # Create a text file with image filepaths and durations for FFmpeg
        with open("image_list.txt", "w") as f:
            for filepath, start_time, end_time in image_files:
                if start_time and end_time:
                    # Calculate duration from start and end times
                    duration = (end_time - start_time)
                    f.write(f"file '{filepath}'\n")
                    f.write(f"duration {duration}\n")
                else:
                    print(f"Missing start or end time for image: {filepath}")
                    duration = 5  # Default display duration
                    f.write(f"file '{filepath}'\n")
                    f.write(f"duration {duration}\n")

        # Use FFmpeg to create the video from the image list
        command = [
            "ffmpeg",
            "-f", "concat",
            "-safe", "0",  # Allow unsafe file paths (if needed)
            "-i", "image_list.txt",
            "-vf", "scale=1280:720",  # Example scaling. Adjust as needed
            "-c:v", "libx264",
            "-pix_fmt", "yuv420p",  # Important for compatibility
            "temp_video.mp4"
        ]

        subprocess.run(command, check=True)  # Run FFmpeg, check for errors

        os.remove("image_list.txt")  # Clean up the list file

        audio_file="audio.wav"
        # Add audio using MoviePy
        # Use FFmpeg to add the audio to the video
        audio_command = [
            "ffmpeg",
            "-i", "temp_video.mp4",
            "-i", audio_file,
            "-c:v", "copy",  # Copy video stream (no re-encoding)
            "-c:a", "aac",  # Encode audio to AAC
            "-shortest", # Use shortest stream as reference
            output_movie
        ]
        subprocess.run(audio_command, check=True)

        os.remove("temp_video.mp4")  # Clean up temporary file

        print(f"Video with audio created: {output_movie}")

    except (sqlite3.Error, OSError, ValueError, subprocess.CalledProcessError) as e:
        print(f"An error occurred: {e}")
    finally:
        if conn_images:
            conn_images.close()
        if conn_transcriptions:
            conn_transcriptions.close()


# Example usage:
extract_and_merge_images()

Image 1 saved to img\image_1.png
Image 2 saved to img\image_2.png
Image 3 saved to img\image_3.png
Image 4 saved to img\image_4.png
Image 5 saved to img\image_5.png
Image 6 saved to img\image_6.png
Image 7 saved to img\image_7.png
Image 8 saved to img\image_8.png
Image 9 saved to img\image_9.png
Image 10 saved to img\image_10.png
Image 11 saved to img\image_11.png
Image 12 saved to img\image_12.png
Image 13 saved to img\image_13.png
Image 14 saved to img\image_14.png
Image 15 saved to img\image_15.png
Image 16 saved to img\image_16.png
Image 17 saved to img\image_17.png
Image 18 saved to img\image_18.png
Image 19 saved to img\image_19.png
Image 20 saved to img\image_20.png
Image 21 saved to img\image_21.png
Image 22 saved to img\image_22.png
Image 23 saved to img\image_23.png
Image 24 saved to img\image_24.png
Image 25 saved to img\image_25.png
Image 26 saved to img\image_26.png
Image 27 saved to img\image_27.png
Image 28 saved to img\image_28.png
Image 29 saved to img\image_29.png
Ima