<a href="https://colab.research.google.com/github/alexanderkrauck/alexanderkrauck.github.io/blob/main/TranscribedVideoSlides.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install moviepy SpeechRecognition pydub opencv-python-headless numpy openai opencv-contrib-python fpdf python-docx moviepy

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.11.0-py2.py3-none-any.whl.metadata (28 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading SpeechRecognition-3.11.0-py2.py3-none-any.whl (32.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sh

In [None]:
from google.colab import userdata
openai_api_key = userdata.get('OPENAI_KEY')

In [None]:
import cv2
import os
import numpy as np
from moviepy.editor import VideoFileClip
import openai
from tqdm import tqdm
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from openai import OpenAI
from skimage.metrics import structural_similarity as ssim
import base64
from moviepy.editor import VideoFileClip

# Initialize OpenAI client
client = OpenAI(api_key=openai_api_key)
openai.api_key = openai_api_key

def detect_slide_changes(video_path, sample_rate=10, ssim_threshold=0.9):
    print("Starting slide change detection...")
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    frame_interval = int(fps * sample_rate)
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps

    print(f"Video FPS: {fps}")
    print(f"Total frames: {total_frames}")
    print(f"Video duration: {duration:.2f} seconds")
    print(f"Frame interval: {frame_interval} frames (every {sample_rate} seconds)")

    slide_changes = []
    frames = []
    timestamps = []

    success, prev_frame = video.read()
    if not success:
        print("Failed to read the first frame of the video.")
        return [], [], []

    prev_frame_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    frame_count = frame_interval
    current_time = 0

    slide_changes.append(0)  # Start with the first frame
    frames.append(prev_frame)
    timestamps.append(current_time)

    print("Analyzing frames for slide changes...")
    with tqdm(total=total_frames, desc="Frames Processed", unit="frame") as pbar:
        while frame_count < total_frames:
            video.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
            success, frame = video.read()
            if not success:
                print(f"Failed to read frame at position {frame_count}.")
                break

            current_time = frame_count / fps

            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            score, _ = ssim(prev_frame_gray, frame_gray, full=True)

            if score < ssim_threshold:
                # Significant change detected
                print(f"Slide change detected at {current_time:.2f} seconds (frame {frame_count}). SSIM: {score:.4f}")
                slide_changes.append(current_time)
                frames.append(frame)
                timestamps.append(current_time)
                prev_frame_gray = frame_gray
            else:
                # No significant change
                pass  # You can add additional logging here if desired

            prev_frame_gray = frame_gray
            increment = min(frame_interval, total_frames - frame_count)
            frame_count += frame_interval
            pbar.update(increment)

    video.release()

    # Append the last timestamp if not included
    if slide_changes[-1] != duration:
        slide_changes.append(duration)
        print(f"Adding final slide change at end of video ({duration:.2f} seconds).")

    print(f"Total slide changes detected: {len(slide_changes) - 1}")
    return slide_changes, frames, timestamps


# MoviePy replacement for extracting audio segment
def extract_audio_with_moviepy(video_path, start_time, end_time, output_audio_path):
    try:
        video = VideoFileClip(video_path).subclip(start_time, end_time)
        video.audio.write_audiofile(output_audio_path, codec="pcm_s16le", fps=16000, nbytes=2, buffersize=2000)
        print(f"Audio extracted to {output_audio_path}.")
    except Exception as e:
        print(f"Error extracting audio with moviepy: {e}")

def transcribe_audio(start_time, end_time, segment_index, video_path):
    print(f"Transcribing audio for segment {segment_index} from {start_time:.2f}s to {end_time:.2f}s...")
    temp_audio_file = f"temp_audio_{segment_index}.wav"
    # Use ffmpeg to extract the audio segment
    #ffmpeg_command = f"ffmpeg -y -i \"{video_path}\" -ss {start_time} -to {end_time} -vn -acodec pcm_s16le -ar 16000 -ac 1 \"{temp_audio_file}\" -loglevel quiet"
    #os.system(ffmpeg_command)
    extract_audio_with_moviepy(video_path, start_time, end_time, temp_audio_file)

    try:
        with open(temp_audio_file, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file
            )
        print(f"Transcription for segment {segment_index} completed.")
    except Exception as e:
        print(f"Error during transcription of segment {segment_index}: {e}")
        transcript = None

    os.remove(temp_audio_file)
    return transcript.text if transcript else ""

# Function to encode the frame image to base64
def encode_image_from_frame(frame):
    print("Encoding slide image to base64...")
    # Convert the frame to JPEG format
    _, buffer = cv2.imencode('.jpg', frame)
    base64_image = base64.b64encode(buffer).decode('utf-8')
    print("Image encoding completed.")
    return base64_image

def summarize_slide(transcription, base64_image, segment_index):
    print(f"Generating summary for segment {segment_index}...")
    # Prepare the message content
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Please provide a comprehensive summary that combines the information from the slide image and the transcription."
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}"
                    },
                },
                {
                    "type": "text",
                    "text": f"Transcription:\n{transcription}"
                },
            ],
        }
    ]

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
        summary = response.choices[0].message.content
        print(f"Summary generation for segment {segment_index} completed.")
    except Exception as e:
        print(f"Error during summary generation for segment {segment_index}: {e}")
        summary = ""
    return summary

def main(video_path, sample_rate=10, ssim_threshold=0.9):
    print("Starting main processing...")
    print("Step 1: Detecting slide changes...")
    slide_changes, frames, timestamps = detect_slide_changes(video_path, sample_rate, ssim_threshold)

    if not slide_changes:
        print("No slide changes detected. Exiting.")
        return []

    print("\nStep 2: Processing slides and audio...")
    results = []
    duration = VideoFileClip(video_path).duration

    for i in tqdm(range(len(slide_changes) - 1), desc="Segments Processed", unit="segment"):
        start_time = slide_changes[i]
        end_time = slide_changes[i+1]
        frame = frames[i]
        timestamp = start_time

        print(f"\nProcessing segment {i}: {start_time:.2f}s to {end_time:.2f}s")

        # Transcribe audio
        transcription = transcribe_audio(start_time, end_time, i, video_path)

        # Encode the slide image to base64
        base64_image = encode_image_from_frame(frame)

        # Summarize transcription and slide image
        summary = summarize_slide(transcription, base64_image, i)

        # Collect results
        results.append({
            'timestamp': timestamp,
            'frame': frame,
            'transcription': transcription,
            'summary': summary
        })

    print("\nProcessing completed.")
    return results




  if event.key is 'enter':



In [None]:
from docx import Document
from docx.shared import Inches
import matplotlib.pyplot as plt
import cv2
import os
import tempfile

def get_filename_without_extension(path):
    # Get the base name of the path (filename with extension)
    filename_with_ext = os.path.basename(path)
    # Split the extension from the filename
    filename_without_ext, _ = os.path.splitext(filename_with_ext)
    return filename_without_ext

def export_results(results, output_filename="my_analysis.docx"):
    """
    Export results to DOCX format

    Args:
        results: List of dictionaries containing timestamp, frame, transcription, and summary
        output_filename: Name of output file (should end with .docx)
    """
    # Create temporary directory for images
    with tempfile.TemporaryDirectory() as temp_dir:
        doc = Document()

        for idx, result in enumerate(results):
            # Save the frame as an image
            frame = result['frame']
            img_path = os.path.join(temp_dir, f'frame_{idx}.png')

            # Convert BGR to RGB and save
            plt.figure(figsize=(10, 6))
            plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            plt.axis('off')
            plt.savefig(img_path, bbox_inches='tight', pad_inches=0)
            plt.close()

            # Add timestamp
            doc.add_heading(f"Timestamp: {result['timestamp']:.2f} seconds", level=1)

            # Add image
            doc.add_picture(img_path, width=Inches(6))

            # Add summary
            doc.add_heading("Summary:", level=2)
            doc.add_paragraph(result['summary'])

            # Add transcription if available
            #if result.get('transcription'):
            #    doc.add_heading("Transcription:", level=2)
            #    doc.add_paragraph(result['transcription'])

            # Add separator except for the last item
            if idx < len(results) - 1:
                doc.add_paragraph("---")
                doc.add_page_break()

        # Save the document
        doc.save(output_filename)

        print(f"Document saved as {output_filename}")

# Example usage:
# export_results(results, "my_analysis.docx")

In [None]:
import os
from pathlib import Path
os.makedirs("outputs", exist_ok=True)

In [None]:
# Get all MP4 files in the directory and its subdirectories
directory = "/content"
mp4_files = list(Path(directory).rglob("*.mp4"))

if not mp4_files:
    print(f"No MP4 files found in {directory} or its subdirectories")
else:
    print(f"Found {len(mp4_files)} MP4 files to process")

    # Process each video
    for video_path in mp4_files:
        try:
            print(f"\nProcessing: {str(video_path)}")

            # Generate output filename
            output_file = "/content/outputs/"+ str(video_path).split("/")[-1].split(".")[0] + ".docx"

            # Process the video
            results = main(str(video_path), sample_rate=10, ssim_threshold=0.9)

            # Export results
            export_results(results, output_file)

            print(f"Successfully processed: {str(video_path)}")
            print(f"Output saved as: {output_file}")

        except Exception as e:
            print(f"Error processing {str(video_path)}: {str(e)}")
            continue

    print("\nProcessing complete!")

Found 44 MP4 files to process

Processing: /content/01 Stakeholder-Beziehungsmanagement_ Einführung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 26163
Video duration: 872.10 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   6%|▌         | 1500/26163 [00:04<01:12, 341.66frame/s]

Slide change detected at 50.00 seconds (frame 1500). SSIM: 0.5983


Frames Processed:  14%|█▍        | 3600/26163 [00:08<00:47, 477.36frame/s]

Slide change detected at 120.00 seconds (frame 3600). SSIM: 0.6093


Frames Processed:  29%|██▊       | 7500/26163 [00:16<00:32, 568.28frame/s]

Slide change detected at 250.00 seconds (frame 7500). SSIM: 0.8723


Frames Processed:  39%|███▉      | 10200/26163 [00:22<00:32, 489.14frame/s]

Slide change detected at 340.00 seconds (frame 10200). SSIM: 0.3846


Frames Processed:  45%|████▍     | 11700/26163 [00:24<00:27, 531.78frame/s]

Slide change detected at 390.00 seconds (frame 11700). SSIM: 0.3765


Frames Processed:  58%|█████▊    | 15300/26163 [00:32<00:27, 401.85frame/s]

Slide change detected at 510.00 seconds (frame 15300). SSIM: 0.8797


Frames Processed:  67%|██████▋   | 17400/26163 [00:36<00:18, 471.86frame/s]

Slide change detected at 580.00 seconds (frame 17400). SSIM: 0.8142


Frames Processed:  75%|███████▍  | 19500/26163 [00:40<00:12, 542.43frame/s]

Slide change detected at 650.00 seconds (frame 19500). SSIM: 0.7601


Frames Processed:  80%|████████  | 21000/26163 [00:43<00:10, 485.75frame/s]

Slide change detected at 700.00 seconds (frame 21000). SSIM: 0.7056


Frames Processed:  81%|████████▏ | 21300/26163 [00:44<00:10, 444.68frame/s]

Slide change detected at 710.00 seconds (frame 21300). SSIM: 0.7126


Frames Processed:  83%|████████▎ | 21600/26163 [00:45<00:11, 394.33frame/s]

Slide change detected at 720.00 seconds (frame 21600). SSIM: 0.7143


Frames Processed:  94%|█████████▍| 24600/26163 [00:51<00:03, 517.24frame/s]

Slide change detected at 820.00 seconds (frame 24600). SSIM: 0.7127


Frames Processed:  99%|█████████▉| 25863/26163 [00:54<00:00, 477.61frame/s]


Adding final slide change at end of video (872.10 seconds).
Total slide changes detected: 13

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/13 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 50.00s
Transcribing audio for segment 0 from 0.00s to 50.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   8%|▊         | 1/13 [00:08<01:38,  8.18s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 50.00s to 120.00s
Transcribing audio for segment 1 from 50.00s to 120.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  15%|█▌        | 2/13 [00:17<01:35,  8.65s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 120.00s to 250.00s
Transcribing audio for segment 2 from 120.00s to 250.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  23%|██▎       | 3/13 [00:34<02:07, 12.72s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 250.00s to 340.00s
Transcribing audio for segment 3 from 250.00s to 340.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  31%|███       | 4/13 [00:46<01:50, 12.26s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 340.00s to 390.00s
Transcribing audio for segment 4 from 340.00s to 390.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  38%|███▊      | 5/13 [00:53<01:23, 10.45s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 390.00s to 510.00s
Transcribing audio for segment 5 from 390.00s to 510.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  46%|████▌     | 6/13 [01:07<01:22, 11.79s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 510.00s to 580.00s
Transcribing audio for segment 6 from 510.00s to 580.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  54%|█████▍    | 7/13 [01:18<01:07, 11.27s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 580.00s to 650.00s
Transcribing audio for segment 7 from 580.00s to 650.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  62%|██████▏   | 8/13 [01:28<00:55, 11.04s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 650.00s to 700.00s
Transcribing audio for segment 8 from 650.00s to 700.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  69%|██████▉   | 9/13 [01:38<00:43, 10.79s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 700.00s to 710.00s
Transcribing audio for segment 9 from 700.00s to 710.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  77%|███████▋  | 10/13 [01:47<00:29,  9.98s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 710.00s to 720.00s
Transcribing audio for segment 10 from 710.00s to 720.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  85%|████████▍ | 11/13 [01:56<00:19,  9.67s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 720.00s to 820.00s
Transcribing audio for segment 11 from 720.00s to 820.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  92%|█████████▏| 12/13 [02:07<00:10, 10.35s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 820.00s to 872.10s
Transcribing audio for segment 12 from 820.00s to 872.10s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed: 100%|██████████| 13/13 [02:23<00:00, 11.00s/segment]

Summary generation for segment 12 completed.

Processing completed.





Document saved as /content/outputs/01 Stakeholder-Beziehungsmanagement_ Einführung.docx
Successfully processed: /content/01 Stakeholder-Beziehungsmanagement_ Einführung.mp4
Output saved as: /content/outputs/01 Stakeholder-Beziehungsmanagement_ Einführung.docx

Processing: /content/Video 1_Innovation und Management im Digitalen Zeitalter.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.999988573280913
Total frames: 92765
Video duration: 3092.17 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 598/92765 [00:01<02:58, 516.43frame/s]

Slide change detected at 19.93 seconds (frame 598). SSIM: 0.3873


Frames Processed:   1%|          | 897/92765 [00:01<02:33, 597.91frame/s]

Slide change detected at 29.90 seconds (frame 897). SSIM: 0.5390


Frames Processed:   3%|▎         | 2990/92765 [00:05<02:29, 600.92frame/s]

Slide change detected at 99.67 seconds (frame 2990). SSIM: 0.8808


Frames Processed:  10%|▉         | 9269/92765 [00:15<01:57, 708.22frame/s]

Slide change detected at 308.97 seconds (frame 9269). SSIM: 0.8478


Frames Processed:  13%|█▎        | 11960/92765 [00:18<01:55, 699.51frame/s]

Slide change detected at 398.67 seconds (frame 11960). SSIM: 0.8401


Frames Processed:  19%|█▊        | 17342/92765 [00:28<02:17, 549.73frame/s]

Slide change detected at 578.07 seconds (frame 17342). SSIM: 0.7097


Frames Processed:  24%|██▍       | 22126/92765 [00:37<02:13, 528.32frame/s]

Slide change detected at 737.53 seconds (frame 22126). SSIM: 0.7310


Frames Processed:  28%|██▊       | 26013/92765 [00:43<02:02, 545.51frame/s]

Slide change detected at 867.10 seconds (frame 26013). SSIM: 0.4502


Frames Processed:  30%|██▉       | 27807/92765 [00:47<02:02, 528.91frame/s]

Slide change detected at 926.90 seconds (frame 27807). SSIM: 0.4554


Frames Processed:  36%|███▌      | 33488/92765 [00:57<01:46, 555.70frame/s]

Slide change detected at 1116.27 seconds (frame 33488). SSIM: 0.7176


Frames Processed:  39%|███▉      | 36478/92765 [01:07<03:27, 270.93frame/s]

Slide change detected at 1215.93 seconds (frame 36478). SSIM: 0.7188


Frames Processed:  44%|████▍     | 41262/92765 [01:14<01:27, 587.68frame/s]

Slide change detected at 1375.40 seconds (frame 41262). SSIM: 0.8204


Frames Processed:  51%|█████     | 47541/92765 [01:25<00:59, 758.69frame/s]

Slide change detected at 1584.70 seconds (frame 47541). SSIM: 0.7403


Frames Processed:  52%|█████▏    | 48438/92765 [01:26<01:06, 664.95frame/s]

Slide change detected at 1614.60 seconds (frame 48438). SSIM: 0.7170


Frames Processed:  54%|█████▍    | 50531/92765 [01:30<01:22, 512.22frame/s]

Slide change detected at 1684.37 seconds (frame 50531). SSIM: 0.7509


Frames Processed:  58%|█████▊    | 53820/92765 [01:37<01:17, 500.74frame/s]

Slide change detected at 1794.00 seconds (frame 53820). SSIM: 0.8569


Frames Processed:  62%|██████▏   | 57109/92765 [01:43<00:58, 614.41frame/s]

Slide change detected at 1903.63 seconds (frame 57109). SSIM: 0.7788


Frames Processed:  65%|██████▌   | 60398/92765 [01:49<00:52, 615.62frame/s]

Slide change detected at 2013.27 seconds (frame 60398). SSIM: 0.6832


Frames Processed:  69%|██████▉   | 64285/92765 [01:54<00:39, 717.36frame/s]

Slide change detected at 2142.83 seconds (frame 64285). SSIM: 0.6502


Frames Processed:  73%|███████▎  | 67873/92765 [02:02<00:59, 418.73frame/s]

Slide change detected at 2262.43 seconds (frame 67873). SSIM: 0.7492


Frames Processed:  75%|███████▌  | 69667/92765 [02:05<00:42, 544.70frame/s]

Slide change detected at 2322.23 seconds (frame 69667). SSIM: 0.5720


Frames Processed:  78%|███████▊  | 72657/92765 [02:10<00:33, 595.49frame/s]

Slide change detected at 2421.90 seconds (frame 72657). SSIM: 0.6287


Frames Processed:  79%|███████▊  | 72956/92765 [02:11<00:34, 568.68frame/s]

Slide change detected at 2431.87 seconds (frame 72956). SSIM: 0.7166


Frames Processed:  85%|████████▍ | 78637/92765 [02:20<00:19, 713.88frame/s]

Slide change detected at 2621.23 seconds (frame 78637). SSIM: 0.6806


Frames Processed:  93%|█████████▎| 86411/92765 [02:34<00:11, 565.78frame/s]

Slide change detected at 2880.37 seconds (frame 86411). SSIM: 0.7958


Frames Processed:  98%|█████████▊| 91195/92765 [02:43<00:02, 529.86frame/s]

Slide change detected at 3039.83 seconds (frame 91195). SSIM: 0.8319


Frames Processed: 100%|█████████▉| 92466/92765 [02:45<00:00, 557.93frame/s]


Adding final slide change at end of video (3092.17 seconds).
Total slide changes detected: 27

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/27 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.93s
Transcribing audio for segment 0 from 0.00s to 19.93s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   4%|▎         | 1/27 [00:06<02:52,  6.64s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.93s to 29.90s
Transcribing audio for segment 1 from 19.93s to 29.90s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   7%|▋         | 2/27 [00:15<03:18,  7.93s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 29.90s to 99.67s
Transcribing audio for segment 2 from 29.90s to 99.67s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  11%|█         | 3/27 [00:24<03:24,  8.51s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 99.67s to 308.97s
Transcribing audio for segment 3 from 99.67s to 308.97s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  15%|█▍        | 4/27 [00:40<04:17, 11.20s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 308.97s to 398.67s
Transcribing audio for segment 4 from 308.97s to 398.67s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  19%|█▊        | 5/27 [00:52<04:18, 11.74s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 398.67s to 578.07s
Transcribing audio for segment 5 from 398.67s to 578.07s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  22%|██▏       | 6/27 [01:11<04:55, 14.09s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 578.07s to 737.53s
Transcribing audio for segment 6 from 578.07s to 737.53s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  26%|██▌       | 7/27 [01:26<04:48, 14.41s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 737.53s to 867.10s
Transcribing audio for segment 7 from 737.53s to 867.10s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  30%|██▉       | 8/27 [01:40<04:31, 14.29s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 867.10s to 926.90s
Transcribing audio for segment 8 from 867.10s to 926.90s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  33%|███▎      | 9/27 [01:49<03:49, 12.75s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 926.90s to 1116.27s
Transcribing audio for segment 9 from 926.90s to 1116.27s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  37%|███▋      | 10/27 [02:07<04:03, 14.32s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1116.27s to 1215.93s
Transcribing audio for segment 10 from 1116.27s to 1215.93s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  41%|████      | 11/27 [02:20<03:40, 13.79s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1215.93s to 1375.40s
Transcribing audio for segment 11 from 1215.93s to 1375.40s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  44%|████▍     | 12/27 [02:38<03:44, 15.00s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1375.40s to 1584.70s
Transcribing audio for segment 12 from 1375.40s to 1584.70s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  48%|████▊     | 13/27 [02:55<03:39, 15.68s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1584.70s to 1614.60s
Transcribing audio for segment 13 from 1584.70s to 1614.60s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  52%|█████▏    | 14/27 [03:07<03:08, 14.50s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1614.60s to 1684.37s
Transcribing audio for segment 14 from 1614.60s to 1684.37s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  56%|█████▌    | 15/27 [03:20<02:49, 14.16s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1684.37s to 1794.00s
Transcribing audio for segment 15 from 1684.37s to 1794.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  59%|█████▉    | 16/27 [03:34<02:35, 14.18s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1794.00s to 1903.63s
Transcribing audio for segment 16 from 1794.00s to 1903.63s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  63%|██████▎   | 17/27 [03:48<02:21, 14.18s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1903.63s to 2013.27s
Transcribing audio for segment 17 from 1903.63s to 2013.27s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  67%|██████▋   | 18/27 [04:05<02:14, 14.96s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 2013.27s to 2142.83s
Transcribing audio for segment 18 from 2013.27s to 2142.83s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  70%|███████   | 19/27 [04:24<02:10, 16.30s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 2142.83s to 2262.43s
Transcribing audio for segment 19 from 2142.83s to 2262.43s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  74%|███████▍  | 20/27 [04:41<01:55, 16.44s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 2262.43s to 2322.23s
Transcribing audio for segment 20 from 2262.43s to 2322.23s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  78%|███████▊  | 21/27 [04:52<01:28, 14.81s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 2322.23s to 2421.90s
Transcribing audio for segment 21 from 2322.23s to 2421.90s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  81%|████████▏ | 22/27 [05:07<01:13, 14.77s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 2421.90s to 2431.87s
Transcribing audio for segment 22 from 2421.90s to 2431.87s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  85%|████████▌ | 23/27 [05:17<00:53, 13.28s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 2431.87s to 2621.23s
Transcribing audio for segment 23 from 2431.87s to 2621.23s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  89%|████████▉ | 24/27 [05:37<00:46, 15.50s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 2621.23s to 2880.37s
Transcribing audio for segment 24 from 2621.23s to 2880.37s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  93%|█████████▎| 25/27 [06:07<00:39, 19.74s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 2880.37s to 3039.83s
Transcribing audio for segment 25 from 2880.37s to 3039.83s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  96%|█████████▋| 26/27 [06:25<00:19, 19.22s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 3039.83s to 3092.17s
Transcribing audio for segment 26 from 3039.83s to 3092.17s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed: 100%|██████████| 27/27 [06:40<00:00, 14.83s/segment]

Summary generation for segment 26 completed.

Processing completed.





Document saved as /content/outputs/Video 1_Innovation und Management im Digitalen Zeitalter.docx
Successfully processed: /content/Video 1_Innovation und Management im Digitalen Zeitalter.mp4
Output saved as: /content/outputs/Video 1_Innovation und Management im Digitalen Zeitalter.docx

Processing: /content/Building High Performance Teams - Teil 02.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 9.476199372853793
Total frames: 9544
Video duration: 1007.15 seconds
Frame interval: 94 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   2%|▏         | 188/9544 [00:01<00:53, 175.88frame/s]

Slide change detected at 19.84 seconds (frame 188). SSIM: 0.5132


Frames Processed:   3%|▎         | 282/9544 [00:01<00:41, 224.04frame/s]

Slide change detected at 29.76 seconds (frame 282). SSIM: 0.8375


Frames Processed:   7%|▋         | 658/9544 [00:02<00:29, 298.99frame/s]

Slide change detected at 69.44 seconds (frame 658). SSIM: 0.8717


Frames Processed:  11%|█         | 1034/9544 [00:03<00:26, 324.15frame/s]

Slide change detected at 109.12 seconds (frame 1034). SSIM: 0.6960


Frames Processed:  16%|█▌        | 1504/9544 [00:05<00:24, 329.13frame/s]

Slide change detected at 158.71 seconds (frame 1504). SSIM: 0.6923


Frames Processed:  89%|████████▊ | 8460/9544 [00:29<00:03, 278.13frame/s]

Slide change detected at 892.76 seconds (frame 8460). SSIM: 0.8225


Frames Processed:  99%|█████████▉| 9450/9544 [00:33<00:00, 284.09frame/s]


Adding final slide change at end of video (1007.15 seconds).
Total slide changes detected: 7

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/7 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.84s
Transcribing audio for segment 0 from 0.00s to 19.84s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:  14%|█▍        | 1/7 [00:06<00:39,  6.57s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.84s to 29.76s
Transcribing audio for segment 1 from 19.84s to 29.76s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  29%|██▊       | 2/7 [00:14<00:37,  7.49s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 29.76s to 69.44s
Transcribing audio for segment 2 from 29.76s to 69.44s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  43%|████▎     | 3/7 [00:24<00:34,  8.57s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 69.44s to 109.12s
Transcribing audio for segment 3 from 69.44s to 109.12s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  57%|█████▋    | 4/7 [00:33<00:26,  8.76s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 109.12s to 158.71s
Transcribing audio for segment 4 from 109.12s to 158.71s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  71%|███████▏  | 5/7 [00:43<00:18,  9.19s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 158.71s to 892.76s
Transcribing audio for segment 5 from 158.71s to 892.76s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  86%|████████▌ | 6/7 [01:20<00:18, 18.68s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 892.76s to 1007.15s
Transcribing audio for segment 6 from 892.76s to 1007.15s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed: 100%|██████████| 7/7 [01:33<00:00, 13.34s/segment]

Summary generation for segment 6 completed.

Processing completed.





Document saved as /content/outputs/Building High Performance Teams - Teil 02.docx
Successfully processed: /content/Building High Performance Teams - Teil 02.mp4
Output saved as: /content/outputs/Building High Performance Teams - Teil 02.docx

Processing: /content/Building High Performance Teams - Teil 01.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 56027
Video duration: 1867.57 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 600/56027 [00:01<02:22, 389.25frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.5225


Frames Processed:   5%|▌         | 3000/56027 [00:07<02:00, 440.36frame/s]

Slide change detected at 100.00 seconds (frame 3000). SSIM: 0.4927


Frames Processed:  10%|█         | 5700/56027 [00:10<01:06, 757.29frame/s]

Slide change detected at 190.00 seconds (frame 5700). SSIM: 0.4953


Frames Processed:  18%|█▊        | 9900/56027 [00:15<00:58, 787.76frame/s]

Slide change detected at 330.00 seconds (frame 9900). SSIM: 0.8781


Frames Processed:  23%|██▎       | 12900/56027 [00:21<01:19, 543.99frame/s]

Slide change detected at 430.00 seconds (frame 12900). SSIM: 0.8052


Frames Processed:  29%|██▉       | 16200/56027 [00:26<01:04, 620.69frame/s]

Slide change detected at 540.00 seconds (frame 16200). SSIM: 0.7878


Frames Processed:  33%|███▎      | 18300/56027 [00:30<01:00, 618.67frame/s]

Slide change detected at 610.00 seconds (frame 18300). SSIM: 0.8702


Frames Processed:  36%|███▌      | 20100/56027 [00:34<01:29, 401.25frame/s]

Slide change detected at 670.00 seconds (frame 20100). SSIM: 0.8607


Frames Processed:  37%|███▋      | 21000/56027 [00:37<01:38, 357.01frame/s]

Slide change detected at 700.00 seconds (frame 21000). SSIM: 0.8939


Frames Processed:  52%|█████▏    | 29400/56027 [01:04<01:25, 309.82frame/s]

Slide change detected at 980.00 seconds (frame 29400). SSIM: 0.8479


Frames Processed:  56%|█████▌    | 31500/56027 [01:11<01:24, 291.48frame/s]

Slide change detected at 1050.00 seconds (frame 31500). SSIM: 0.8916


Frames Processed:  60%|█████▉    | 33600/56027 [01:23<02:18, 162.17frame/s]

Slide change detected at 1120.00 seconds (frame 33600). SSIM: 0.8823


Frames Processed:  61%|██████    | 33900/56027 [01:24<02:03, 178.95frame/s]

Slide change detected at 1130.00 seconds (frame 33900). SSIM: 0.8811


Frames Processed:  70%|██████▉   | 39000/56027 [01:46<00:56, 303.30frame/s]

Slide change detected at 1300.00 seconds (frame 39000). SSIM: 0.8072


Frames Processed:  84%|████████▎ | 46800/56027 [02:12<00:29, 310.74frame/s]

Slide change detected at 1560.00 seconds (frame 46800). SSIM: 0.7911


Frames Processed:  87%|████████▋ | 48900/56027 [02:19<00:25, 281.36frame/s]

Slide change detected at 1630.00 seconds (frame 48900). SSIM: 0.7860


Frames Processed:  97%|█████████▋| 54300/56027 [02:38<00:05, 294.98frame/s]

Slide change detected at 1810.00 seconds (frame 54300). SSIM: 0.8712


Frames Processed:  99%|█████████▉| 55727/56027 [02:42<00:00, 342.57frame/s]


Adding final slide change at end of video (1867.57 seconds).
Total slide changes detected: 18

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/18 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 20.00s
Transcribing audio for segment 0 from 0.00s to 20.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   6%|▌         | 1/18 [00:05<01:39,  5.85s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 20.00s to 100.00s
Transcribing audio for segment 1 from 20.00s to 100.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  11%|█         | 2/18 [00:16<02:21,  8.84s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 100.00s to 190.00s
Transcribing audio for segment 2 from 100.00s to 190.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  17%|█▋        | 3/18 [00:29<02:41, 10.80s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 190.00s to 330.00s
Transcribing audio for segment 3 from 190.00s to 330.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  22%|██▏       | 4/18 [00:41<02:34, 11.01s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 330.00s to 430.00s
Transcribing audio for segment 4 from 330.00s to 430.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  28%|██▊       | 5/18 [00:53<02:27, 11.31s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 430.00s to 540.00s
Transcribing audio for segment 5 from 430.00s to 540.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  33%|███▎      | 6/18 [01:08<02:30, 12.56s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 540.00s to 610.00s
Transcribing audio for segment 6 from 540.00s to 610.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  39%|███▉      | 7/18 [01:19<02:12, 12.08s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 610.00s to 670.00s
Transcribing audio for segment 7 from 610.00s to 670.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  44%|████▍     | 8/18 [01:31<02:00, 12.05s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 670.00s to 700.00s
Transcribing audio for segment 8 from 670.00s to 700.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  50%|█████     | 9/18 [01:37<01:33, 10.39s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 700.00s to 980.00s
Transcribing audio for segment 9 from 700.00s to 980.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  56%|█████▌    | 10/18 [02:01<01:56, 14.53s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 980.00s to 1050.00s
Transcribing audio for segment 10 from 980.00s to 1050.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  61%|██████    | 11/18 [02:15<01:39, 14.23s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1050.00s to 1120.00s
Transcribing audio for segment 11 from 1050.00s to 1120.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  67%|██████▋   | 12/18 [02:30<01:27, 14.63s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1120.00s to 1130.00s
Transcribing audio for segment 12 from 1120.00s to 1130.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  72%|███████▏  | 13/18 [02:39<01:03, 12.71s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1130.00s to 1300.00s
Transcribing audio for segment 13 from 1130.00s to 1300.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  78%|███████▊  | 14/18 [02:56<00:56, 14.23s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1300.00s to 1560.00s
Transcribing audio for segment 14 from 1300.00s to 1560.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  83%|████████▎ | 15/18 [03:17<00:48, 16.30s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1560.00s to 1630.00s
Transcribing audio for segment 15 from 1560.00s to 1630.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  89%|████████▉ | 16/18 [03:30<00:30, 15.33s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1630.00s to 1810.00s
Transcribing audio for segment 16 from 1630.00s to 1810.00s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  94%|█████████▍| 17/18 [03:47<00:15, 15.72s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1810.00s to 1867.57s
Transcribing audio for segment 17 from 1810.00s to 1867.57s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed: 100%|██████████| 18/18 [03:58<00:00, 13.27s/segment]

Summary generation for segment 17 completed.

Processing completed.





Document saved as /content/outputs/Building High Performance Teams - Teil 01.docx
Successfully processed: /content/Building High Performance Teams - Teil 01.mp4
Output saved as: /content/outputs/Building High Performance Teams - Teil 01.docx

Processing: /content/Fabasoft Management Academy - Basismodul 1 - Dynamische Stabilität von Organisationen TEIL 2.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.999233609604644
Total frames: 81810
Video duration: 2727.07 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   9%|▉         | 7176/81810 [00:27<04:38, 267.90frame/s]

Slide change detected at 239.21 seconds (frame 7176). SSIM: 0.8822


Frames Processed:  15%|█▌        | 12558/81810 [00:47<04:03, 284.72frame/s]

Slide change detected at 418.61 seconds (frame 12558). SSIM: 0.7730


Frames Processed:  21%|██        | 17342/81810 [01:05<04:04, 263.31frame/s]

Slide change detected at 578.08 seconds (frame 17342). SSIM: 0.8039


Frames Processed:  26%|██▌       | 21229/81810 [01:18<03:44, 270.34frame/s]

Slide change detected at 707.65 seconds (frame 21229). SSIM: 0.8950


Frames Processed:  30%|██▉       | 24219/81810 [01:28<03:12, 299.55frame/s]

Slide change detected at 807.32 seconds (frame 24219). SSIM: 0.8888


Frames Processed:  42%|████▏     | 34086/81810 [01:59<02:19, 341.01frame/s]

Slide change detected at 1136.23 seconds (frame 34086). SSIM: 0.8466


Frames Processed:  49%|████▉     | 40365/81810 [02:16<01:33, 444.72frame/s]

Slide change detected at 1345.53 seconds (frame 40365). SSIM: 0.7064


Frames Processed:  52%|█████▏    | 42159/81810 [02:19<01:11, 553.85frame/s]

Slide change detected at 1405.34 seconds (frame 42159). SSIM: 0.8912


Frames Processed:  58%|█████▊    | 47541/81810 [02:28<00:55, 612.90frame/s]

Slide change detected at 1584.74 seconds (frame 47541). SSIM: 0.8437


Frames Processed:  61%|██████    | 49933/81810 [02:32<00:47, 665.22frame/s]

Slide change detected at 1664.48 seconds (frame 49933). SSIM: 0.8548


Frames Processed:  67%|██████▋   | 55016/81810 [02:40<00:41, 651.97frame/s]

Slide change detected at 1833.91 seconds (frame 55016). SSIM: 0.7134


Frames Processed:  72%|███████▏  | 58903/81810 [02:49<01:02, 365.00frame/s]

Slide change detected at 1963.48 seconds (frame 58903). SSIM: 0.8586


Frames Processed:  76%|███████▌  | 61893/81810 [02:55<00:36, 543.61frame/s]

Slide change detected at 2063.15 seconds (frame 61893). SSIM: 0.8877


Frames Processed:  76%|███████▋  | 62491/81810 [02:56<00:35, 550.88frame/s]

Slide change detected at 2083.09 seconds (frame 62491). SSIM: 0.8648


Frames Processed:  80%|████████  | 65481/81810 [03:02<00:39, 418.04frame/s]

Slide change detected at 2182.76 seconds (frame 65481). SSIM: 0.8745


Frames Processed:  80%|████████  | 65780/81810 [03:03<00:36, 436.29frame/s]

Slide change detected at 2192.72 seconds (frame 65780). SSIM: 0.8667


Frames Processed:  82%|████████▏ | 67275/81810 [03:05<00:25, 565.18frame/s]

Slide change detected at 2242.56 seconds (frame 67275). SSIM: 0.8645


Frames Processed:  85%|████████▌ | 69667/81810 [03:09<00:19, 627.97frame/s]

Slide change detected at 2322.29 seconds (frame 69667). SSIM: 0.8471


Frames Processed:  88%|████████▊ | 71760/81810 [03:12<00:15, 650.52frame/s]

Slide change detected at 2392.06 seconds (frame 71760). SSIM: 0.8856


Frames Processed:  90%|█████████ | 73853/81810 [03:16<00:14, 546.44frame/s]

Slide change detected at 2461.83 seconds (frame 73853). SSIM: 0.8768


Frames Processed:  97%|█████████▋| 79235/81810 [03:24<00:03, 753.11frame/s]

Slide change detected at 2641.23 seconds (frame 79235). SSIM: 0.8994


Frames Processed: 100%|█████████▉| 81511/81810 [03:27<00:00, 392.01frame/s]


Adding final slide change at end of video (2727.07 seconds).
Total slide changes detected: 22

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/22 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 239.21s
Transcribing audio for segment 0 from 0.00s to 239.21s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▍         | 1/22 [00:18<06:18, 18.03s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 239.21s to 418.61s
Transcribing audio for segment 1 from 239.21s to 418.61s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   9%|▉         | 2/22 [00:38<06:27, 19.40s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 418.61s to 578.08s
Transcribing audio for segment 2 from 418.61s to 578.08s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  14%|█▎        | 3/22 [00:56<05:56, 18.77s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 578.08s to 707.65s
Transcribing audio for segment 3 from 578.08s to 707.65s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  18%|█▊        | 4/22 [01:19<06:06, 20.37s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 707.65s to 807.32s
Transcribing audio for segment 4 from 707.65s to 807.32s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  23%|██▎       | 5/22 [01:42<06:04, 21.47s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 807.32s to 1136.23s
Transcribing audio for segment 5 from 807.32s to 1136.23s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  27%|██▋       | 6/22 [02:08<06:09, 23.12s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 1136.23s to 1345.53s
Transcribing audio for segment 6 from 1136.23s to 1345.53s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  32%|███▏      | 7/22 [02:34<06:00, 24.00s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 1345.53s to 1405.34s
Transcribing audio for segment 7 from 1345.53s to 1405.34s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  36%|███▋      | 8/22 [02:48<04:50, 20.77s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 1405.34s to 1584.74s
Transcribing audio for segment 8 from 1405.34s to 1584.74s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  41%|████      | 9/22 [03:06<04:16, 19.71s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 1584.74s to 1664.48s
Transcribing audio for segment 9 from 1584.74s to 1664.48s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  45%|████▌     | 10/22 [03:21<03:41, 18.42s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1664.48s to 1833.91s
Transcribing audio for segment 10 from 1664.48s to 1833.91s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  50%|█████     | 11/22 [03:47<03:48, 20.78s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1833.91s to 1963.48s
Transcribing audio for segment 11 from 1833.91s to 1963.48s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  55%|█████▍    | 12/22 [04:08<03:29, 20.90s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1963.48s to 2063.15s
Transcribing audio for segment 12 from 1963.48s to 2063.15s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  59%|█████▉    | 13/22 [04:22<02:48, 18.77s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 2063.15s to 2083.09s
Transcribing audio for segment 13 from 2063.15s to 2083.09s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  64%|██████▎   | 14/22 [04:38<02:21, 17.75s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 2083.09s to 2182.76s
Transcribing audio for segment 14 from 2083.09s to 2182.76s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  68%|██████▊   | 15/22 [04:55<02:03, 17.69s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 2182.76s to 2192.72s
Transcribing audio for segment 15 from 2182.76s to 2192.72s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  73%|███████▎  | 16/22 [05:06<01:33, 15.52s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 2192.72s to 2242.56s
Transcribing audio for segment 16 from 2192.72s to 2242.56s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  77%|███████▋  | 17/22 [05:17<01:11, 14.30s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 2242.56s to 2322.29s
Transcribing audio for segment 17 from 2242.56s to 2322.29s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  82%|████████▏ | 18/22 [05:28<00:53, 13.32s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 2322.29s to 2392.06s
Transcribing audio for segment 18 from 2322.29s to 2392.06s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  86%|████████▋ | 19/22 [05:46<00:44, 14.68s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 2392.06s to 2461.83s
Transcribing audio for segment 19 from 2392.06s to 2461.83s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  91%|█████████ | 20/22 [06:03<00:30, 15.26s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 2461.83s to 2641.23s
Transcribing audio for segment 20 from 2461.83s to 2641.23s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  95%|█████████▌| 21/22 [06:23<00:16, 16.77s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 2641.23s to 2727.07s
Transcribing audio for segment 21 from 2641.23s to 2727.07s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed: 100%|██████████| 22/22 [06:38<00:00, 18.13s/segment]

Summary generation for segment 21 completed.

Processing completed.





Document saved as /content/outputs/Fabasoft Management Academy - Basismodul 1 - Dynamische Stabilität von Organisationen TEIL 2.docx
Successfully processed: /content/Fabasoft Management Academy - Basismodul 1 - Dynamische Stabilität von Organisationen TEIL 2.mp4
Output saved as: /content/outputs/Fabasoft Management Academy - Basismodul 1 - Dynamische Stabilität von Organisationen TEIL 2.docx

Processing: /content/Building High Performance Teams - Teil 03.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 54355
Video duration: 1811.83 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 600/54355 [00:03<05:09, 173.87frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.5253


Frames Processed:   4%|▍         | 2400/54355 [00:09<03:02, 284.26frame/s]

Slide change detected at 80.00 seconds (frame 2400). SSIM: 0.8568


Frames Processed:   6%|▌         | 3300/54355 [00:12<02:59, 284.97frame/s]

Slide change detected at 110.00 seconds (frame 3300). SSIM: 0.8386


Frames Processed:   7%|▋         | 3900/54355 [00:15<03:32, 237.89frame/s]

Slide change detected at 130.00 seconds (frame 3900). SSIM: 0.8970


Frames Processed:  10%|▉         | 5400/54355 [00:20<02:55, 279.46frame/s]

Slide change detected at 180.00 seconds (frame 5400). SSIM: 0.8868


Frames Processed:  15%|█▍        | 8100/54355 [00:25<01:19, 582.58frame/s]

Slide change detected at 270.00 seconds (frame 8100). SSIM: 0.8580


Frames Processed:  19%|█▉        | 10500/54355 [00:31<01:35, 460.45frame/s]

Slide change detected at 350.00 seconds (frame 10500). SSIM: 0.4795


Frames Processed:  21%|██        | 11400/54355 [00:32<01:22, 517.75frame/s]

Slide change detected at 380.00 seconds (frame 11400). SSIM: 0.5009


Frames Processed:  26%|██▋       | 14400/54355 [00:38<01:12, 551.65frame/s]

Slide change detected at 480.00 seconds (frame 14400). SSIM: 0.8587


Frames Processed:  33%|███▎      | 17700/54355 [00:47<01:37, 377.82frame/s]

Slide change detected at 590.00 seconds (frame 17700). SSIM: 0.8714


Frames Processed:  53%|█████▎    | 28800/54355 [01:08<00:53, 481.09frame/s]

Slide change detected at 960.00 seconds (frame 28800). SSIM: 0.8831


Frames Processed:  54%|█████▍    | 29400/54355 [01:10<01:00, 409.63frame/s]

Slide change detected at 980.00 seconds (frame 29400). SSIM: 0.7779


Frames Processed:  56%|█████▋    | 30600/54355 [01:12<00:48, 491.31frame/s]

Slide change detected at 1020.00 seconds (frame 30600). SSIM: 0.8188


Frames Processed:  68%|██████▊   | 36900/54355 [01:24<00:40, 427.18frame/s]

Slide change detected at 1230.00 seconds (frame 36900). SSIM: 0.8727


Frames Processed:  73%|███████▎  | 39600/54355 [01:29<00:26, 554.47frame/s]

Slide change detected at 1320.00 seconds (frame 39600). SSIM: 0.7710


Frames Processed:  76%|███████▌  | 41400/54355 [01:32<00:23, 556.39frame/s]

Slide change detected at 1380.00 seconds (frame 41400). SSIM: 0.8288


Frames Processed:  85%|████████▍ | 46200/54355 [01:42<00:14, 559.90frame/s]

Slide change detected at 1540.00 seconds (frame 46200). SSIM: 0.8130


Frames Processed:  94%|█████████▍| 51000/54355 [01:48<00:05, 653.17frame/s]

Slide change detected at 1700.00 seconds (frame 51000). SSIM: 0.7898


Frames Processed:  99%|█████████▉| 54055/54355 [01:54<00:00, 472.27frame/s]


Adding final slide change at end of video (1811.83 seconds).
Total slide changes detected: 19

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/19 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 20.00s
Transcribing audio for segment 0 from 0.00s to 20.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▌         | 1/19 [00:06<01:52,  6.27s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 20.00s to 80.00s
Transcribing audio for segment 1 from 20.00s to 80.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  11%|█         | 2/19 [00:20<03:05, 10.92s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 80.00s to 110.00s
Transcribing audio for segment 2 from 80.00s to 110.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  16%|█▌        | 3/19 [00:26<02:22,  8.89s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 110.00s to 130.00s
Transcribing audio for segment 3 from 110.00s to 130.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  21%|██        | 4/19 [00:33<01:57,  7.83s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 130.00s to 180.00s
Transcribing audio for segment 4 from 130.00s to 180.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  26%|██▋       | 5/19 [00:42<01:56,  8.33s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 180.00s to 270.00s
Transcribing audio for segment 5 from 180.00s to 270.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  32%|███▏      | 6/19 [00:53<02:01,  9.33s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 270.00s to 350.00s
Transcribing audio for segment 6 from 270.00s to 350.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  37%|███▋      | 7/19 [01:05<02:03, 10.27s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 350.00s to 380.00s
Transcribing audio for segment 7 from 350.00s to 380.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  42%|████▏     | 8/19 [01:13<01:42,  9.31s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 380.00s to 480.00s
Transcribing audio for segment 8 from 380.00s to 480.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  47%|████▋     | 9/19 [01:24<01:38,  9.89s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 480.00s to 590.00s
Transcribing audio for segment 9 from 480.00s to 590.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  53%|█████▎    | 10/19 [01:39<01:42, 11.43s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 590.00s to 960.00s
Transcribing audio for segment 10 from 590.00s to 960.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  58%|█████▊    | 11/19 [02:03<02:01, 15.25s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 960.00s to 980.00s
Transcribing audio for segment 11 from 960.00s to 980.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  63%|██████▎   | 12/19 [02:12<01:34, 13.48s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 980.00s to 1020.00s
Transcribing audio for segment 12 from 980.00s to 1020.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  68%|██████▊   | 13/19 [02:25<01:19, 13.27s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1020.00s to 1230.00s
Transcribing audio for segment 13 from 1020.00s to 1230.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  74%|███████▎  | 14/19 [02:45<01:17, 15.42s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1230.00s to 1320.00s
Transcribing audio for segment 14 from 1230.00s to 1320.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  79%|███████▉  | 15/19 [03:01<01:01, 15.43s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1320.00s to 1380.00s
Transcribing audio for segment 15 from 1320.00s to 1380.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  84%|████████▍ | 16/19 [03:12<00:42, 14.24s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1380.00s to 1540.00s
Transcribing audio for segment 16 from 1380.00s to 1540.00s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  89%|████████▉ | 17/19 [03:27<00:28, 14.39s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1540.00s to 1700.00s
Transcribing audio for segment 17 from 1540.00s to 1700.00s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  95%|█████████▍| 18/19 [03:51<00:17, 17.33s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1700.00s to 1811.83s
Transcribing audio for segment 18 from 1700.00s to 1811.83s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed: 100%|██████████| 19/19 [04:06<00:00, 12.96s/segment]

Summary generation for segment 18 completed.

Processing completed.





Document saved as /content/outputs/Building High Performance Teams - Teil 03.docx
Successfully processed: /content/Building High Performance Teams - Teil 03.mp4
Output saved as: /content/outputs/Building High Performance Teams - Teil 03.docx

Processing: /content/D Video Organizational Design Session 1 Ihre SWOT + HF Sätze Kohorte 1.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 15.437648736023062
Total frames: 3528
Video duration: 228.53 seconds
Frame interval: 154 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   4%|▍         | 154/3528 [00:00<00:17, 190.91frame/s]

Slide change detected at 9.98 seconds (frame 154). SSIM: 0.5835


Frames Processed:   9%|▊         | 308/3528 [00:01<00:13, 241.20frame/s]

Slide change detected at 19.95 seconds (frame 308). SSIM: 0.8655


Frames Processed:  13%|█▎        | 462/3528 [00:01<00:12, 243.96frame/s]

Slide change detected at 29.93 seconds (frame 462). SSIM: 0.8018


Frames Processed:  17%|█▋        | 616/3528 [00:02<00:11, 251.17frame/s]

Slide change detected at 39.90 seconds (frame 616). SSIM: 0.8669


Frames Processed:  22%|██▏       | 770/3528 [00:03<00:11, 238.71frame/s]

Slide change detected at 49.88 seconds (frame 770). SSIM: 0.5925


Frames Processed:  26%|██▌       | 924/3528 [00:03<00:11, 228.92frame/s]

Slide change detected at 59.85 seconds (frame 924). SSIM: 0.8497


Frames Processed:  31%|███       | 1078/3528 [00:04<00:10, 243.19frame/s]

Slide change detected at 69.83 seconds (frame 1078). SSIM: 0.6329


Frames Processed:  35%|███▍      | 1232/3528 [00:05<00:09, 243.34frame/s]

Slide change detected at 79.80 seconds (frame 1232). SSIM: 0.8936


Frames Processed:  39%|███▉      | 1386/3528 [00:05<00:08, 258.08frame/s]

Slide change detected at 89.78 seconds (frame 1386). SSIM: 0.5590


Frames Processed:  48%|████▊     | 1694/3528 [00:06<00:06, 283.21frame/s]

Slide change detected at 109.73 seconds (frame 1694). SSIM: 0.8764


Frames Processed:  52%|█████▏    | 1848/3528 [00:07<00:05, 304.85frame/s]

Slide change detected at 119.71 seconds (frame 1848). SSIM: 0.8605


Frames Processed:  61%|██████    | 2156/3528 [00:07<00:04, 342.31frame/s]

Slide change detected at 139.66 seconds (frame 2156). SSIM: 0.8693


Frames Processed:  65%|██████▌   | 2310/3528 [00:08<00:03, 345.43frame/s]

Slide change detected at 149.63 seconds (frame 2310). SSIM: 0.6330


Frames Processed:  70%|██████▉   | 2464/3528 [00:08<00:03, 306.18frame/s]

Slide change detected at 159.61 seconds (frame 2464). SSIM: 0.5989


Frames Processed:  74%|███████▍  | 2618/3528 [00:09<00:03, 295.00frame/s]

Slide change detected at 169.59 seconds (frame 2618). SSIM: 0.8456


Frames Processed:  79%|███████▊  | 2772/3528 [00:10<00:02, 252.07frame/s]

Slide change detected at 179.56 seconds (frame 2772). SSIM: 0.8007


Frames Processed:  83%|████████▎ | 2926/3528 [00:11<00:02, 227.47frame/s]

Slide change detected at 189.54 seconds (frame 2926). SSIM: 0.8710


Frames Processed:  87%|████████▋ | 3080/3528 [00:11<00:01, 231.08frame/s]

Slide change detected at 199.51 seconds (frame 3080). SSIM: 0.8535


Frames Processed:  96%|█████████▌| 3374/3528 [00:12<00:00, 261.27frame/s]

Slide change detected at 219.46 seconds (frame 3388). SSIM: 0.8814
Adding final slide change at end of video (228.53 seconds).
Total slide changes detected: 20

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/20 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 9.98s
Transcribing audio for segment 0 from 0.00s to 9.98s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▌         | 1/20 [00:05<01:53,  5.97s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 9.98s to 19.95s
Transcribing audio for segment 1 from 9.98s to 19.95s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  10%|█         | 2/20 [00:11<01:39,  5.52s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 19.95s to 29.93s
Transcribing audio for segment 2 from 19.95s to 29.93s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  15%|█▌        | 3/20 [00:19<01:53,  6.66s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 29.93s to 39.90s
Transcribing audio for segment 3 from 29.93s to 39.90s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  20%|██        | 4/20 [00:23<01:31,  5.72s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 39.90s to 49.88s
Transcribing audio for segment 4 from 39.90s to 49.88s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  25%|██▌       | 5/20 [00:27<01:19,  5.28s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 49.88s to 59.85s
Transcribing audio for segment 5 from 49.88s to 59.85s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  30%|███       | 6/20 [00:33<01:14,  5.29s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 59.85s to 69.83s
Transcribing audio for segment 6 from 59.85s to 69.83s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  35%|███▌      | 7/20 [00:38<01:10,  5.43s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 69.83s to 79.80s
Transcribing audio for segment 7 from 69.83s to 79.80s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  40%|████      | 8/20 [00:45<01:07,  5.65s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 79.80s to 89.78s
Transcribing audio for segment 8 from 79.80s to 89.78s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  45%|████▌     | 9/20 [00:50<01:01,  5.60s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 89.78s to 109.73s
Transcribing audio for segment 9 from 89.78s to 109.73s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  50%|█████     | 10/20 [00:57<00:58,  5.88s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 109.73s to 119.71s
Transcribing audio for segment 10 from 109.73s to 119.71s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  55%|█████▌    | 11/20 [01:01<00:49,  5.50s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 119.71s to 139.66s
Transcribing audio for segment 11 from 119.71s to 139.66s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  60%|██████    | 12/20 [01:07<00:45,  5.69s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 139.66s to 149.63s
Transcribing audio for segment 12 from 139.66s to 149.63s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  65%|██████▌   | 13/20 [01:12<00:38,  5.51s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 149.63s to 159.61s
Transcribing audio for segment 13 from 149.63s to 159.61s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  70%|███████   | 14/20 [01:18<00:33,  5.61s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 159.61s to 169.59s
Transcribing audio for segment 14 from 159.61s to 169.59s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  75%|███████▌  | 15/20 [01:24<00:27,  5.59s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 169.59s to 179.56s
Transcribing audio for segment 15 from 169.59s to 179.56s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  80%|████████  | 16/20 [01:29<00:22,  5.56s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 179.56s to 189.54s
Transcribing audio for segment 16 from 179.56s to 189.54s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  85%|████████▌ | 17/20 [01:35<00:16,  5.50s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 189.54s to 199.51s
Transcribing audio for segment 17 from 189.54s to 199.51s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  90%|█████████ | 18/20 [01:41<00:11,  5.83s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 199.51s to 219.46s
Transcribing audio for segment 18 from 199.51s to 219.46s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  95%|█████████▌| 19/20 [01:47<00:05,  5.78s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 219.46s to 228.53s
Transcribing audio for segment 19 from 219.46s to 228.53s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed: 100%|██████████| 20/20 [01:53<00:00,  5.70s/segment]

Summary generation for segment 19 completed.

Processing completed.





Document saved as /content/outputs/D Video Organizational Design Session 1 Ihre SWOT + HF Sätze Kohorte 1.docx
Successfully processed: /content/D Video Organizational Design Session 1 Ihre SWOT + HF Sätze Kohorte 1.mp4
Output saved as: /content/outputs/D Video Organizational Design Session 1 Ihre SWOT + HF Sätze Kohorte 1.docx

Processing: /content/B Change Management_ Video 1 Einführung und meine Veränderung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.81941867504418
Total frames: 27548
Video duration: 923.83 seconds
Frame interval: 298 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   3%|▎         | 894/27548 [00:00<00:19, 1369.66frame/s]

Slide change detected at 19.99 seconds (frame 596). SSIM: 0.4809


Frames Processed:   8%|▊         | 2086/27548 [00:01<00:16, 1516.63frame/s]

Slide change detected at 59.96 seconds (frame 1788). SSIM: 0.7356
Slide change detected at 69.95 seconds (frame 2086). SSIM: 0.5993


Frames Processed:  10%|▉         | 2682/27548 [00:01<00:15, 1560.04frame/s]

Slide change detected at 79.95 seconds (frame 2384). SSIM: 0.4505


Frames Processed:  14%|█▍        | 3874/27548 [00:02<00:14, 1628.41frame/s]

Slide change detected at 119.92 seconds (frame 3576). SSIM: 0.7880


Frames Processed:  18%|█▊        | 5066/27548 [00:03<00:14, 1570.44frame/s]

Slide change detected at 169.89 seconds (frame 5066). SSIM: 0.8404


Frames Processed:  25%|██▍       | 6854/27548 [00:04<00:13, 1554.66frame/s]

Slide change detected at 219.86 seconds (frame 6556). SSIM: 0.8968
Slide change detected at 229.85 seconds (frame 6854). SSIM: 0.8779


Frames Processed:  27%|██▋       | 7450/27548 [00:04<00:13, 1525.86frame/s]

Slide change detected at 249.84 seconds (frame 7450). SSIM: 0.6918


Frames Processed:  28%|██▊       | 7748/27548 [00:05<00:13, 1483.25frame/s]

Slide change detected at 259.83 seconds (frame 7748). SSIM: 0.6589


Frames Processed:  29%|██▉       | 8046/27548 [00:05<00:13, 1420.20frame/s]

Slide change detected at 269.82 seconds (frame 8046). SSIM: 0.5961


Frames Processed:  30%|███       | 8344/27548 [00:05<00:14, 1348.21frame/s]

Slide change detected at 279.82 seconds (frame 8344). SSIM: 0.8772


Frames Processed:  32%|███▏      | 8940/27548 [00:06<00:13, 1335.32frame/s]

Slide change detected at 299.80 seconds (frame 8940). SSIM: 0.8952


Frames Processed:  34%|███▎      | 9238/27548 [00:06<00:14, 1220.90frame/s]

Slide change detected at 309.80 seconds (frame 9238). SSIM: 0.8932


Frames Processed:  36%|███▌      | 9834/27548 [00:06<00:15, 1126.67frame/s]

Slide change detected at 329.79 seconds (frame 9834). SSIM: 0.8829


Frames Processed:  37%|███▋      | 10132/27548 [00:07<00:15, 1106.67frame/s]

Slide change detected at 339.78 seconds (frame 10132). SSIM: 0.8780


Frames Processed:  39%|███▉      | 10728/27548 [00:07<00:14, 1133.61frame/s]

Slide change detected at 359.77 seconds (frame 10728). SSIM: 0.8412


Frames Processed:  40%|████      | 11026/27548 [00:07<00:14, 1142.51frame/s]

Slide change detected at 369.76 seconds (frame 11026). SSIM: 0.7703


Frames Processed:  41%|████      | 11324/27548 [00:08<00:15, 1029.02frame/s]

Slide change detected at 379.75 seconds (frame 11324). SSIM: 0.6238


Frames Processed:  55%|█████▌    | 15198/27548 [00:11<00:08, 1508.09frame/s]

Slide change detected at 499.67 seconds (frame 14900). SSIM: 0.7818


Frames Processed:  65%|██████▍   | 17880/27548 [00:12<00:06, 1537.35frame/s]

Slide change detected at 589.62 seconds (frame 17582). SSIM: 0.4264
Slide change detected at 599.61 seconds (frame 17880). SSIM: 0.4473


Frames Processed:  69%|██████▉   | 19072/27548 [00:13<00:05, 1585.67frame/s]

Slide change detected at 629.59 seconds (frame 18774). SSIM: 0.5712


Frames Processed:  70%|███████   | 19370/27548 [00:13<00:05, 1612.86frame/s]

Slide change detected at 649.58 seconds (frame 19370). SSIM: 0.4291


Frames Processed:  72%|███████▏  | 19966/27548 [00:14<00:05, 1444.04frame/s]

Slide change detected at 669.56 seconds (frame 19966). SSIM: 0.4659


Frames Processed:  76%|███████▌  | 20860/27548 [00:14<00:04, 1363.48frame/s]

Slide change detected at 699.54 seconds (frame 20860). SSIM: 0.4222


Frames Processed:  81%|████████  | 22350/27548 [00:15<00:03, 1528.50frame/s]

Slide change detected at 739.52 seconds (frame 22052). SSIM: 0.6233


Frames Processed:  85%|████████▌ | 23542/27548 [00:16<00:02, 1666.94frame/s]

Slide change detected at 779.49 seconds (frame 23244). SSIM: 0.7166


Frames Processed:  87%|████████▋ | 23840/27548 [00:16<00:02, 1518.01frame/s]

Slide change detected at 799.48 seconds (frame 23840). SSIM: 0.6428


Frames Processed:  88%|████████▊ | 24138/27548 [00:17<00:02, 1372.55frame/s]

Slide change detected at 809.47 seconds (frame 24138). SSIM: 0.3884


Frames Processed:  91%|█████████ | 25032/27548 [00:17<00:01, 1313.19frame/s]

Slide change detected at 839.45 seconds (frame 25032). SSIM: 0.4116


Frames Processed:  92%|█████████▏| 25330/27548 [00:18<00:01, 1325.41frame/s]

Slide change detected at 849.45 seconds (frame 25330). SSIM: 0.5548


Frames Processed:  93%|█████████▎| 25628/27548 [00:18<00:01, 1351.47frame/s]

Slide change detected at 859.44 seconds (frame 25628). SSIM: 0.6158


Frames Processed:  97%|█████████▋| 26820/27548 [00:19<00:00, 1444.20frame/s]

Slide change detected at 889.42 seconds (frame 26522). SSIM: 0.5347


Frames Processed:  99%|█████████▉| 27250/27548 [00:19<00:00, 1400.81frame/s]


Slide change detected at 919.40 seconds (frame 27416). SSIM: 0.4379
Adding final slide change at end of video (923.83 seconds).
Total slide changes detected: 36

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/36 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.99s
Transcribing audio for segment 0 from 0.00s to 19.99s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   3%|▎         | 1/36 [00:11<06:27, 11.06s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.99s to 59.96s
Transcribing audio for segment 1 from 19.99s to 59.96s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   6%|▌         | 2/36 [00:18<05:06,  9.00s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 59.96s to 69.95s
Transcribing audio for segment 2 from 59.96s to 69.95s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   8%|▊         | 3/36 [00:25<04:30,  8.20s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 69.95s to 79.95s
Transcribing audio for segment 3 from 69.95s to 79.95s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  11%|█         | 4/36 [00:33<04:10,  7.83s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 79.95s to 119.92s
Transcribing audio for segment 4 from 79.95s to 119.92s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  14%|█▍        | 5/36 [00:42<04:25,  8.56s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 119.92s to 169.89s
Transcribing audio for segment 5 from 119.92s to 169.89s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  17%|█▋        | 6/36 [00:53<04:32,  9.09s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 169.89s to 219.86s
Transcribing audio for segment 6 from 169.89s to 219.86s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  19%|█▉        | 7/36 [01:06<05:03, 10.46s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 219.86s to 229.85s
Transcribing audio for segment 7 from 219.86s to 229.85s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  22%|██▏       | 8/36 [01:13<04:22,  9.39s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 229.85s to 249.84s
Transcribing audio for segment 8 from 229.85s to 249.84s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  25%|██▌       | 9/36 [01:20<03:51,  8.56s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 249.84s to 259.83s
Transcribing audio for segment 9 from 249.84s to 259.83s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  28%|██▊       | 10/36 [01:26<03:25,  7.89s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 259.83s to 269.82s
Transcribing audio for segment 10 from 259.83s to 269.82s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  31%|███       | 11/36 [01:32<03:05,  7.42s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 269.82s to 279.82s
Transcribing audio for segment 11 from 269.82s to 279.82s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  33%|███▎      | 12/36 [01:38<02:42,  6.79s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 279.82s to 299.80s
Transcribing audio for segment 12 from 279.82s to 299.80s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  36%|███▌      | 13/36 [01:44<02:34,  6.70s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 299.80s to 309.80s
Transcribing audio for segment 13 from 299.80s to 309.80s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  39%|███▉      | 14/36 [01:50<02:23,  6.52s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 309.80s to 329.79s
Transcribing audio for segment 14 from 309.80s to 329.79s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  42%|████▏     | 15/36 [01:58<02:25,  6.94s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 329.79s to 339.78s
Transcribing audio for segment 15 from 329.79s to 339.78s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  44%|████▍     | 16/36 [02:04<02:13,  6.69s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 339.78s to 359.77s
Transcribing audio for segment 16 from 339.78s to 359.77s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  47%|████▋     | 17/36 [02:13<02:19,  7.36s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 359.77s to 369.76s
Transcribing audio for segment 17 from 359.77s to 369.76s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  50%|█████     | 18/36 [02:21<02:16,  7.59s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 369.76s to 379.75s
Transcribing audio for segment 18 from 369.76s to 379.75s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  53%|█████▎    | 19/36 [02:29<02:08,  7.57s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 379.75s to 499.67s
Transcribing audio for segment 19 from 379.75s to 499.67s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  56%|█████▌    | 20/36 [02:47<02:53, 10.84s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 499.67s to 589.62s
Transcribing audio for segment 20 from 499.67s to 589.62s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  58%|█████▊    | 21/36 [03:04<03:08, 12.58s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 589.62s to 599.61s
Transcribing audio for segment 21 from 589.62s to 599.61s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  61%|██████    | 22/36 [03:11<02:33, 10.97s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 599.61s to 629.59s
Transcribing audio for segment 22 from 599.61s to 629.59s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  64%|██████▍   | 23/36 [03:24<02:30, 11.56s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 629.59s to 649.58s
Transcribing audio for segment 23 from 629.59s to 649.58s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  67%|██████▋   | 24/36 [03:33<02:08, 10.72s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 649.58s to 669.56s
Transcribing audio for segment 24 from 649.58s to 669.56s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  69%|██████▉   | 25/36 [03:40<01:47,  9.75s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 669.56s to 699.54s
Transcribing audio for segment 25 from 669.56s to 699.54s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  72%|███████▏  | 26/36 [03:48<01:30,  9.08s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 699.54s to 739.52s
Transcribing audio for segment 26 from 699.54s to 739.52s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  75%|███████▌  | 27/36 [03:57<01:22,  9.19s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 739.52s to 779.49s
Transcribing audio for segment 27 from 739.52s to 779.49s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  78%|███████▊  | 28/36 [04:09<01:18,  9.82s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 779.49s to 799.48s
Transcribing audio for segment 28 from 779.49s to 799.48s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  81%|████████  | 29/36 [04:16<01:02,  8.95s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 799.48s to 809.47s
Transcribing audio for segment 29 from 799.48s to 809.47s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  83%|████████▎ | 30/36 [04:23<00:50,  8.48s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 809.47s to 839.45s
Transcribing audio for segment 30 from 809.47s to 839.45s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  86%|████████▌ | 31/36 [04:31<00:42,  8.43s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 839.45s to 849.45s
Transcribing audio for segment 31 from 839.45s to 849.45s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  89%|████████▉ | 32/36 [04:40<00:33,  8.40s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 849.45s to 859.44s
Transcribing audio for segment 32 from 849.45s to 859.44s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  92%|█████████▏| 33/36 [04:50<00:26,  8.89s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 859.44s to 889.42s
Transcribing audio for segment 33 from 859.44s to 889.42s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  94%|█████████▍| 34/36 [04:59<00:17,  8.97s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 889.42s to 919.40s
Transcribing audio for segment 34 from 889.42s to 919.40s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  97%|█████████▋| 35/36 [05:07<00:08,  8.68s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 919.40s to 923.83s
Transcribing audio for segment 35 from 919.40s to 923.83s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed: 100%|██████████| 36/36 [05:13<00:00,  8.70s/segment]

Summary generation for segment 35 completed.

Processing completed.





Document saved as /content/outputs/B Change Management_ Video 1 Einführung und meine Veränderung.docx
Successfully processed: /content/B Change Management_ Video 1 Einführung und meine Veränderung.mp4
Output saved as: /content/outputs/B Change Management_ Video 1 Einführung und meine Veränderung.docx

Processing: /content/Managing High-Performance Teams - Teil 3.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 22211
Video duration: 740.37 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|▏         | 300/22211 [00:00<00:57, 383.83frame/s]

Slide change detected at 10.00 seconds (frame 300). SSIM: 0.5468


Frames Processed:  11%|█         | 2400/22211 [00:04<00:38, 508.55frame/s]

Slide change detected at 80.00 seconds (frame 2400). SSIM: 0.8356


Frames Processed:  26%|██▌       | 5700/22211 [00:13<00:51, 318.40frame/s]

Slide change detected at 190.00 seconds (frame 5700). SSIM: 0.8103


Frames Processed:  31%|███       | 6900/22211 [00:16<00:40, 376.67frame/s]

Slide change detected at 230.00 seconds (frame 6900). SSIM: 0.8626


Frames Processed:  39%|███▉      | 8700/22211 [00:21<00:41, 324.04frame/s]

Slide change detected at 290.00 seconds (frame 8700). SSIM: 0.8588


Frames Processed:  55%|█████▌    | 12300/22211 [00:31<00:24, 401.11frame/s]

Slide change detected at 410.00 seconds (frame 12300). SSIM: 0.8508


Frames Processed:  76%|███████▌  | 16800/22211 [00:43<00:13, 398.22frame/s]

Slide change detected at 560.00 seconds (frame 16800). SSIM: 0.8865


Frames Processed:  85%|████████▌ | 18900/22211 [00:49<00:09, 355.17frame/s]

Slide change detected at 630.00 seconds (frame 18900). SSIM: 0.8073


Frames Processed:  95%|█████████▍| 21000/22211 [00:52<00:01, 689.89frame/s]

Slide change detected at 700.00 seconds (frame 21000). SSIM: 0.8646


Frames Processed:  99%|█████████▊| 21911/22211 [00:53<00:00, 406.88frame/s]


Adding final slide change at end of video (740.37 seconds).
Total slide changes detected: 10

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/10 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 10.00s
Transcribing audio for segment 0 from 0.00s to 10.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:  10%|█         | 1/10 [00:06<00:58,  6.46s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 10.00s to 80.00s
Transcribing audio for segment 1 from 10.00s to 80.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  20%|██        | 2/10 [00:16<01:07,  8.45s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 80.00s to 190.00s
Transcribing audio for segment 2 from 80.00s to 190.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  30%|███       | 3/10 [00:29<01:14, 10.67s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 190.00s to 230.00s
Transcribing audio for segment 3 from 190.00s to 230.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  40%|████      | 4/10 [00:38<01:00, 10.00s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 230.00s to 290.00s
Transcribing audio for segment 4 from 230.00s to 290.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  50%|█████     | 5/10 [00:47<00:47,  9.50s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 290.00s to 410.00s
Transcribing audio for segment 5 from 290.00s to 410.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  60%|██████    | 6/10 [01:00<00:42, 10.72s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 410.00s to 560.00s
Transcribing audio for segment 6 from 410.00s to 560.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  70%|███████   | 7/10 [01:13<00:34, 11.52s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 560.00s to 630.00s
Transcribing audio for segment 7 from 560.00s to 630.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  80%|████████  | 8/10 [01:24<00:22, 11.29s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 630.00s to 700.00s
Transcribing audio for segment 8 from 630.00s to 700.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  90%|█████████ | 9/10 [01:36<00:11, 11.62s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 700.00s to 740.37s
Transcribing audio for segment 9 from 700.00s to 740.37s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed: 100%|██████████| 10/10 [01:46<00:00, 10.61s/segment]

Summary generation for segment 9 completed.

Processing completed.





Document saved as /content/outputs/Managing High-Performance Teams - Teil 3.docx
Successfully processed: /content/Managing High-Performance Teams - Teil 3.mp4
Output saved as: /content/outputs/Managing High-Performance Teams - Teil 3.docx

Processing: /content/05 Zufriedenheit und Bindung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 62358
Video duration: 2078.60 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 600/62358 [00:03<05:26, 189.19frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.5170


Frames Processed:   2%|▏         | 1200/62358 [00:06<05:36, 181.96frame/s]

Slide change detected at 40.00 seconds (frame 1200). SSIM: 0.7091


Frames Processed:   3%|▎         | 2100/62358 [00:10<04:33, 220.35frame/s]

Slide change detected at 70.00 seconds (frame 2100). SSIM: 0.7927


Frames Processed:  12%|█▏        | 7200/62358 [00:34<04:39, 197.07frame/s]

Slide change detected at 240.00 seconds (frame 7200). SSIM: 0.7663


Frames Processed:  13%|█▎        | 8400/62358 [00:39<04:07, 218.27frame/s]

Slide change detected at 280.00 seconds (frame 8400). SSIM: 0.7665


Frames Processed:  14%|█▍        | 8700/62358 [00:40<04:02, 221.05frame/s]

Slide change detected at 290.00 seconds (frame 8700). SSIM: 0.7671


Frames Processed:  17%|█▋        | 10500/62358 [00:49<04:00, 215.85frame/s]

Slide change detected at 350.00 seconds (frame 10500). SSIM: 0.6411


Frames Processed:  29%|██▉       | 18000/62358 [01:22<03:31, 209.30frame/s]

Slide change detected at 600.00 seconds (frame 18000). SSIM: 0.6195


Frames Processed:  40%|███▉      | 24900/62358 [01:54<02:55, 213.22frame/s]

Slide change detected at 830.00 seconds (frame 24900). SSIM: 0.7076


Frames Processed:  50%|█████     | 31200/62358 [02:24<02:18, 224.63frame/s]

Slide change detected at 1040.00 seconds (frame 31200). SSIM: 0.6860


Frames Processed:  58%|█████▊    | 36300/62358 [02:47<02:01, 213.64frame/s]

Slide change detected at 1210.00 seconds (frame 36300). SSIM: 0.7240


Frames Processed:  67%|██████▋   | 41700/62358 [03:12<01:46, 193.45frame/s]

Slide change detected at 1390.00 seconds (frame 41700). SSIM: 0.8109


Frames Processed:  70%|██████▉   | 43500/62358 [03:19<01:19, 236.28frame/s]

Slide change detected at 1450.00 seconds (frame 43500). SSIM: 0.8481


Frames Processed:  73%|███████▎  | 45600/62358 [03:29<01:13, 229.06frame/s]

Slide change detected at 1520.00 seconds (frame 45600). SSIM: 0.8481


Frames Processed:  82%|████████▏ | 51000/62358 [03:53<00:54, 209.22frame/s]

Slide change detected at 1700.00 seconds (frame 51000). SSIM: 0.8466


Frames Processed:  89%|████████▉ | 55500/62358 [04:12<00:27, 246.13frame/s]

Slide change detected at 1850.00 seconds (frame 55500). SSIM: 0.8907


Frames Processed:  89%|████████▉ | 55800/62358 [04:13<00:26, 243.72frame/s]

Slide change detected at 1860.00 seconds (frame 55800). SSIM: 0.8969


Frames Processed:  99%|█████████▊| 61500/62358 [04:38<00:03, 243.06frame/s]

Slide change detected at 2050.00 seconds (frame 61500). SSIM: 0.7937


Frames Processed: 100%|█████████▉| 62058/62358 [04:41<00:01, 220.81frame/s]


Adding final slide change at end of video (2078.60 seconds).
Total slide changes detected: 19

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/19 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 20.00s
Transcribing audio for segment 0 from 0.00s to 20.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▌         | 1/19 [00:06<01:48,  6.02s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 20.00s to 40.00s
Transcribing audio for segment 1 from 20.00s to 40.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  11%|█         | 2/19 [00:12<01:48,  6.40s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 40.00s to 70.00s
Transcribing audio for segment 2 from 40.00s to 70.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  16%|█▌        | 3/19 [00:19<01:45,  6.58s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 70.00s to 240.00s
Transcribing audio for segment 3 from 70.00s to 240.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  21%|██        | 4/19 [00:32<02:17,  9.17s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 240.00s to 280.00s
Transcribing audio for segment 4 from 240.00s to 280.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  26%|██▋       | 5/19 [00:39<01:58,  8.45s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 280.00s to 290.00s
Transcribing audio for segment 5 from 280.00s to 290.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  32%|███▏      | 6/19 [00:47<01:47,  8.27s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 290.00s to 350.00s
Transcribing audio for segment 6 from 290.00s to 350.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  37%|███▋      | 7/19 [00:58<01:50,  9.21s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 350.00s to 600.00s
Transcribing audio for segment 7 from 350.00s to 600.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  42%|████▏     | 8/19 [01:17<02:15, 12.36s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 600.00s to 830.00s
Transcribing audio for segment 8 from 600.00s to 830.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  47%|████▋     | 9/19 [01:34<02:17, 13.78s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 830.00s to 1040.00s
Transcribing audio for segment 9 from 830.00s to 1040.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  53%|█████▎    | 10/19 [01:56<02:26, 16.26s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1040.00s to 1210.00s
Transcribing audio for segment 10 from 1040.00s to 1210.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  58%|█████▊    | 11/19 [02:12<02:09, 16.20s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1210.00s to 1390.00s
Transcribing audio for segment 11 from 1210.00s to 1390.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  63%|██████▎   | 12/19 [02:29<01:53, 16.27s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1390.00s to 1450.00s
Transcribing audio for segment 12 from 1390.00s to 1450.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  68%|██████▊   | 13/19 [02:39<01:27, 14.51s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1450.00s to 1520.00s
Transcribing audio for segment 13 from 1450.00s to 1520.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  74%|███████▎  | 14/19 [02:52<01:09, 13.97s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1520.00s to 1700.00s
Transcribing audio for segment 14 from 1520.00s to 1700.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  79%|███████▉  | 15/19 [03:12<01:02, 15.71s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1700.00s to 1850.00s
Transcribing audio for segment 15 from 1700.00s to 1850.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  84%|████████▍ | 16/19 [03:32<00:51, 17.13s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1850.00s to 1860.00s
Transcribing audio for segment 16 from 1850.00s to 1860.00s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  89%|████████▉ | 17/19 [03:42<00:29, 14.90s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1860.00s to 2050.00s
Transcribing audio for segment 17 from 1860.00s to 2050.00s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  95%|█████████▍| 18/19 [04:01<00:16, 16.28s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 2050.00s to 2078.60s
Transcribing audio for segment 18 from 2050.00s to 2078.60s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed: 100%|██████████| 19/19 [04:11<00:00, 13.22s/segment]

Summary generation for segment 18 completed.

Processing completed.





Document saved as /content/outputs/05 Zufriedenheit und Bindung.docx
Successfully processed: /content/05 Zufriedenheit und Bindung.mp4
Output saved as: /content/outputs/05 Zufriedenheit und Bindung.docx

Processing: /content/Basic Finance - Teil 04.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 23934
Video duration: 797.80 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:  10%|█         | 2400/23934 [00:09<01:12, 296.15frame/s]

Slide change detected at 80.00 seconds (frame 2400). SSIM: 0.5470


Frames Processed:  13%|█▎        | 3000/23934 [00:10<00:57, 364.70frame/s]

Slide change detected at 100.00 seconds (frame 3000). SSIM: 0.8766


Frames Processed:  21%|██▏       | 5100/23934 [00:13<00:31, 598.80frame/s]

Slide change detected at 170.00 seconds (frame 5100). SSIM: 0.8929


Frames Processed:  38%|███▊      | 9000/23934 [00:21<00:25, 594.79frame/s]

Slide change detected at 300.00 seconds (frame 9000). SSIM: 0.8754


Frames Processed:  59%|█████▉    | 14100/23934 [00:30<00:21, 453.39frame/s]

Slide change detected at 470.00 seconds (frame 14100). SSIM: 0.8407


Frames Processed:  70%|███████   | 16800/23934 [00:35<00:11, 604.34frame/s]

Slide change detected at 560.00 seconds (frame 16800). SSIM: 0.8067


Frames Processed:  85%|████████▌ | 20400/23934 [00:40<00:05, 629.14frame/s]

Slide change detected at 680.00 seconds (frame 20400). SSIM: 0.5102


Frames Processed:  88%|████████▊ | 21000/23934 [00:43<00:07, 380.07frame/s]

Slide change detected at 700.00 seconds (frame 21000). SSIM: 0.5199


Frames Processed:  90%|█████████ | 21600/23934 [00:45<00:06, 341.95frame/s]

Slide change detected at 720.00 seconds (frame 21600). SSIM: 0.8722


Frames Processed:  99%|█████████▊| 23634/23934 [00:49<00:00, 475.28frame/s]


Adding final slide change at end of video (797.80 seconds).
Total slide changes detected: 10

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/10 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 80.00s
Transcribing audio for segment 0 from 0.00s to 80.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:  10%|█         | 1/10 [00:11<01:45, 11.72s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 80.00s to 100.00s
Transcribing audio for segment 1 from 80.00s to 100.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  20%|██        | 2/10 [00:19<01:15,  9.47s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 100.00s to 170.00s
Transcribing audio for segment 2 from 100.00s to 170.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  30%|███       | 3/10 [00:31<01:14, 10.66s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 170.00s to 300.00s
Transcribing audio for segment 3 from 170.00s to 300.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  40%|████      | 4/10 [00:47<01:17, 12.86s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 300.00s to 470.00s
Transcribing audio for segment 4 from 300.00s to 470.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  50%|█████     | 5/10 [01:02<01:07, 13.41s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 470.00s to 560.00s
Transcribing audio for segment 5 from 470.00s to 560.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  60%|██████    | 6/10 [01:14<00:52, 13.13s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 560.00s to 680.00s
Transcribing audio for segment 6 from 560.00s to 680.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  70%|███████   | 7/10 [01:30<00:41, 13.93s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 680.00s to 700.00s
Transcribing audio for segment 7 from 680.00s to 700.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  80%|████████  | 8/10 [01:38<00:24, 12.08s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 700.00s to 720.00s
Transcribing audio for segment 8 from 700.00s to 720.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  90%|█████████ | 9/10 [01:46<00:10, 10.89s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 720.00s to 797.80s
Transcribing audio for segment 9 from 720.00s to 797.80s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed: 100%|██████████| 10/10 [01:57<00:00, 11.75s/segment]

Summary generation for segment 9 completed.

Processing completed.





Document saved as /content/outputs/Basic Finance - Teil 04.docx
Successfully processed: /content/Basic Finance - Teil 04.mp4
Output saved as: /content/outputs/Basic Finance - Teil 04.docx

Processing: /content/B Video Organizational Design： Teil 3 Reinventing Organizations.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.98659560851842
Total frames: 27255
Video duration: 908.91 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   2%|▏         | 598/27255 [00:00<00:20, 1305.35frame/s]

Slide change detected at 19.94 seconds (frame 598). SSIM: 0.5002


Frames Processed:   5%|▌         | 1495/27255 [00:01<00:21, 1173.16frame/s]

Slide change detected at 49.86 seconds (frame 1495). SSIM: 0.7867


Frames Processed:  15%|█▌        | 4186/27255 [00:03<00:22, 1006.69frame/s]

Slide change detected at 139.60 seconds (frame 4186). SSIM: 0.7983


Frames Processed:  21%|██        | 5681/27255 [00:04<00:17, 1252.23frame/s]

Slide change detected at 189.45 seconds (frame 5681). SSIM: 0.8869


Frames Processed:  24%|██▍       | 6578/27255 [00:05<00:15, 1315.46frame/s]

Slide change detected at 219.36 seconds (frame 6578). SSIM: 0.8607


Frames Processed:  25%|██▌       | 6877/27255 [00:05<00:15, 1327.30frame/s]

Slide change detected at 229.34 seconds (frame 6877). SSIM: 0.5643


Frames Processed:  33%|███▎      | 8970/27255 [00:07<00:12, 1422.98frame/s]

Slide change detected at 299.13 seconds (frame 8970). SSIM: 0.4446


Frames Processed:  42%|████▏     | 11362/27255 [00:08<00:10, 1553.39frame/s]

Slide change detected at 368.93 seconds (frame 11063). SSIM: 0.7271


Frames Processed:  46%|████▌     | 12558/27255 [00:09<00:09, 1488.50frame/s]

Slide change detected at 418.79 seconds (frame 12558). SSIM: 0.7579


Frames Processed:  49%|████▉     | 13455/27255 [00:10<00:09, 1395.62frame/s]

Slide change detected at 448.70 seconds (frame 13455). SSIM: 0.8265


Frames Processed:  59%|█████▉    | 16146/27255 [00:12<00:07, 1435.81frame/s]

Slide change detected at 528.47 seconds (frame 15847). SSIM: 0.7567


Frames Processed:  66%|██████▌   | 17940/27255 [00:13<00:06, 1460.64frame/s]

Slide change detected at 588.30 seconds (frame 17641). SSIM: 0.7217
Slide change detected at 598.27 seconds (frame 17940). SSIM: 0.6456


Frames Processed:  72%|███████▏  | 19734/27255 [00:14<00:06, 1228.20frame/s]

Slide change detected at 658.09 seconds (frame 19734). SSIM: 0.7015


Frames Processed:  78%|███████▊  | 21229/27255 [00:16<00:05, 1194.14frame/s]

Slide change detected at 707.95 seconds (frame 21229). SSIM: 0.5993


Frames Processed:  94%|█████████▍| 25714/27255 [00:19<00:01, 1427.13frame/s]

Slide change detected at 857.52 seconds (frame 25714). SSIM: 0.5765


Frames Processed:  99%|█████████▉| 26956/27255 [00:20<00:00, 1300.04frame/s]

Slide change detected at 907.37 seconds (frame 27209). SSIM: 0.3914
Adding final slide change at end of video (908.91 seconds).
Total slide changes detected: 18

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/18 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.94s
Transcribing audio for segment 0 from 0.00s to 19.94s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   6%|▌         | 1/18 [00:07<02:11,  7.76s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.94s to 49.86s
Transcribing audio for segment 1 from 19.94s to 49.86s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  11%|█         | 2/18 [00:17<02:18,  8.66s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 49.86s to 139.60s
Transcribing audio for segment 2 from 49.86s to 139.60s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  17%|█▋        | 3/18 [00:28<02:29,  9.96s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 139.60s to 189.45s
Transcribing audio for segment 3 from 139.60s to 189.45s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  22%|██▏       | 4/18 [00:36<02:10,  9.31s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 189.45s to 219.36s
Transcribing audio for segment 4 from 189.45s to 219.36s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  28%|██▊       | 5/18 [00:45<01:57,  9.05s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 219.36s to 229.34s
Transcribing audio for segment 5 from 219.36s to 229.34s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  33%|███▎      | 6/18 [00:55<01:54,  9.51s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 229.34s to 299.13s
Transcribing audio for segment 6 from 229.34s to 299.13s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  39%|███▉      | 7/18 [01:05<01:46,  9.64s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 299.13s to 368.93s
Transcribing audio for segment 7 from 299.13s to 368.93s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  44%|████▍     | 8/18 [01:15<01:36,  9.64s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 368.93s to 418.79s
Transcribing audio for segment 8 from 368.93s to 418.79s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  50%|█████     | 9/18 [01:27<01:34, 10.45s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 418.79s to 448.70s
Transcribing audio for segment 9 from 418.79s to 448.70s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  56%|█████▌    | 10/18 [01:38<01:24, 10.51s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 448.70s to 528.47s
Transcribing audio for segment 10 from 448.70s to 528.47s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  61%|██████    | 11/18 [01:49<01:15, 10.72s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 528.47s to 588.30s
Transcribing audio for segment 11 from 528.47s to 588.30s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  67%|██████▋   | 12/18 [01:58<01:00, 10.05s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 588.30s to 598.27s
Transcribing audio for segment 12 from 588.30s to 598.27s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  72%|███████▏  | 13/18 [02:05<00:45,  9.13s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 598.27s to 658.09s
Transcribing audio for segment 13 from 598.27s to 658.09s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  78%|███████▊  | 14/18 [02:15<00:37,  9.50s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 658.09s to 707.95s
Transcribing audio for segment 14 from 658.09s to 707.95s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  83%|████████▎ | 15/18 [02:26<00:30, 10.12s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 707.95s to 857.52s
Transcribing audio for segment 15 from 707.95s to 857.52s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  89%|████████▉ | 16/18 [02:43<00:24, 12.12s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 857.52s to 907.37s
Transcribing audio for segment 16 from 857.52s to 907.37s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  94%|█████████▍| 17/18 [02:53<00:11, 11.49s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 907.37s to 908.91s
Transcribing audio for segment 17 from 907.37s to 908.91s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed: 100%|██████████| 18/18 [02:59<00:00,  9.99s/segment]

Summary generation for segment 17 completed.

Processing completed.





Document saved as /content/outputs/B Video Organizational Design： Teil 3 Reinventing Organizations.docx
Successfully processed: /content/B Video Organizational Design： Teil 3 Reinventing Organizations.mp4
Output saved as: /content/outputs/B Video Organizational Design： Teil 3 Reinventing Organizations.docx

Processing: /content/06 Zusammenfassung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 18184
Video duration: 606.13 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   3%|▎         | 600/18184 [00:01<00:54, 322.14frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.5332


Frames Processed:  15%|█▍        | 2700/18184 [00:05<00:29, 523.35frame/s]

Slide change detected at 90.00 seconds (frame 2700). SSIM: 0.6995


Frames Processed:  48%|████▊     | 8700/18184 [00:18<00:17, 536.96frame/s]

Slide change detected at 290.00 seconds (frame 8700). SSIM: 0.8220


Frames Processed:  66%|██████▌   | 12000/18184 [00:25<00:15, 387.80frame/s]

Slide change detected at 400.00 seconds (frame 12000). SSIM: 0.7794


Frames Processed:  86%|████████▌ | 15600/18184 [00:32<00:06, 419.93frame/s]

Slide change detected at 520.00 seconds (frame 15600). SSIM: 0.8074


Frames Processed:  98%|█████████▊| 17884/18184 [00:38<00:00, 460.62frame/s]

Slide change detected at 600.00 seconds (frame 18000). SSIM: 0.6141
Adding final slide change at end of video (606.13 seconds).
Total slide changes detected: 7

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/7 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 20.00s
Transcribing audio for segment 0 from 0.00s to 20.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:  14%|█▍        | 1/7 [00:06<00:38,  6.43s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 20.00s to 90.00s
Transcribing audio for segment 1 from 20.00s to 90.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  29%|██▊       | 2/7 [00:16<00:41,  8.34s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 90.00s to 290.00s
Transcribing audio for segment 2 from 90.00s to 290.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  43%|████▎     | 3/7 [00:29<00:42, 10.51s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 290.00s to 400.00s
Transcribing audio for segment 3 from 290.00s to 400.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  57%|█████▋    | 4/7 [00:42<00:34, 11.62s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 400.00s to 520.00s
Transcribing audio for segment 4 from 400.00s to 520.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  71%|███████▏  | 5/7 [00:53<00:22, 11.45s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 520.00s to 600.00s
Transcribing audio for segment 5 from 520.00s to 600.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  86%|████████▌ | 6/7 [01:03<00:10, 10.80s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 600.00s to 606.13s
Transcribing audio for segment 6 from 600.00s to 606.13s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed: 100%|██████████| 7/7 [01:08<00:00,  9.83s/segment]

Summary generation for segment 6 completed.

Processing completed.





Document saved as /content/outputs/06 Zusammenfassung.docx
Successfully processed: /content/06 Zusammenfassung.mp4
Output saved as: /content/outputs/06 Zusammenfassung.docx

Processing: /content/Brand Management 2_ Markenmodellierung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 12.772246317120421
Total frames: 19089
Video duration: 1494.57 seconds
Frame interval: 127 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   3%|▎         | 508/19089 [00:01<00:58, 315.73frame/s]

Slide change detected at 39.77 seconds (frame 508). SSIM: 0.7746


Frames Processed:   5%|▌         | 1016/19089 [00:03<00:58, 307.52frame/s]

Slide change detected at 79.55 seconds (frame 1016). SSIM: 0.7875


Frames Processed:   6%|▌         | 1143/19089 [00:03<01:00, 296.72frame/s]

Slide change detected at 89.49 seconds (frame 1143). SSIM: 0.6852


Frames Processed:   7%|▋         | 1397/19089 [00:04<00:59, 298.51frame/s]

Slide change detected at 109.38 seconds (frame 1397). SSIM: 0.8289


Frames Processed:  12%|█▏        | 2286/19089 [00:07<00:53, 311.67frame/s]

Slide change detected at 178.98 seconds (frame 2286). SSIM: 0.7491


Frames Processed:  24%|██▍       | 4572/19089 [00:15<00:50, 288.10frame/s]

Slide change detected at 357.96 seconds (frame 4572). SSIM: 0.7437


Frames Processed:  43%|████▎     | 8128/19089 [00:27<00:34, 317.65frame/s]

Slide change detected at 636.38 seconds (frame 8128). SSIM: 0.1165


Frames Processed:  44%|████▍     | 8382/19089 [00:28<00:36, 295.33frame/s]

Slide change detected at 656.27 seconds (frame 8382). SSIM: 0.1375


Frames Processed:  47%|████▋     | 9017/19089 [00:30<00:32, 307.26frame/s]

Slide change detected at 705.98 seconds (frame 9017). SSIM: 0.6480


Frames Processed:  55%|█████▍    | 10414/19089 [00:35<00:34, 250.89frame/s]

Slide change detected at 815.36 seconds (frame 10414). SSIM: 0.5818


Frames Processed:  63%|██████▎   | 11938/19089 [00:41<00:23, 310.80frame/s]

Slide change detected at 934.68 seconds (frame 11938). SSIM: 0.6803


Frames Processed:  71%|███████   | 13589/19089 [00:46<00:16, 339.11frame/s]

Slide change detected at 1063.95 seconds (frame 13589). SSIM: 0.7577


Frames Processed:  77%|███████▋  | 14732/19089 [00:50<00:17, 247.11frame/s]

Slide change detected at 1153.44 seconds (frame 14732). SSIM: 0.7580


Frames Processed:  82%|████████▏ | 15748/19089 [00:53<00:10, 329.89frame/s]

Slide change detected at 1232.99 seconds (frame 15748). SSIM: 0.8138


Frames Processed:  92%|█████████▏| 17653/19089 [00:59<00:04, 327.53frame/s]

Slide change detected at 1382.14 seconds (frame 17653). SSIM: 0.8283


Frames Processed:  95%|█████████▌| 18161/19089 [01:01<00:03, 276.41frame/s]

Slide change detected at 1421.91 seconds (frame 18161). SSIM: 0.8773


Frames Processed:  97%|█████████▋| 18542/19089 [01:03<00:02, 223.68frame/s]

Slide change detected at 1451.74 seconds (frame 18542). SSIM: 0.7194


Frames Processed:  98%|█████████▊| 18669/19089 [01:03<00:01, 233.03frame/s]

Slide change detected at 1461.68 seconds (frame 18669). SSIM: 0.7199


Frames Processed:  99%|█████████▉| 18923/19089 [01:04<00:00, 274.77frame/s]

Slide change detected at 1481.57 seconds (frame 18923). SSIM: 0.7177


Frames Processed:  99%|█████████▉| 18962/19089 [01:05<00:00, 291.46frame/s]


Adding final slide change at end of video (1494.57 seconds).
Total slide changes detected: 20

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/20 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 39.77s
Transcribing audio for segment 0 from 0.00s to 39.77s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▌         | 1/20 [00:08<02:42,  8.53s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 39.77s to 79.55s
Transcribing audio for segment 1 from 39.77s to 79.55s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  10%|█         | 2/20 [00:17<02:34,  8.56s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 79.55s to 89.49s
Transcribing audio for segment 2 from 79.55s to 89.49s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  15%|█▌        | 3/20 [00:22<02:00,  7.09s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 89.49s to 109.38s
Transcribing audio for segment 3 from 89.49s to 109.38s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  20%|██        | 4/20 [00:36<02:38,  9.90s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 109.38s to 178.98s
Transcribing audio for segment 4 from 109.38s to 178.98s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  25%|██▌       | 5/20 [00:47<02:34, 10.32s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 178.98s to 357.96s
Transcribing audio for segment 5 from 178.98s to 357.96s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  30%|███       | 6/20 [01:02<02:44, 11.74s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 357.96s to 636.38s
Transcribing audio for segment 6 from 357.96s to 636.38s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  35%|███▌      | 7/20 [01:20<03:01, 13.99s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 636.38s to 656.27s
Transcribing audio for segment 7 from 636.38s to 656.27s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  40%|████      | 8/20 [01:28<02:23, 11.96s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 656.27s to 705.98s
Transcribing audio for segment 8 from 656.27s to 705.98s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  45%|████▌     | 9/20 [01:35<01:54, 10.38s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 705.98s to 815.36s
Transcribing audio for segment 9 from 705.98s to 815.36s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  50%|█████     | 10/20 [01:47<01:50, 11.03s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 815.36s to 934.68s
Transcribing audio for segment 10 from 815.36s to 934.68s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  55%|█████▌    | 11/20 [01:59<01:42, 11.36s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 934.68s to 1063.95s
Transcribing audio for segment 11 from 934.68s to 1063.95s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  60%|██████    | 12/20 [02:14<01:37, 12.18s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1063.95s to 1153.44s
Transcribing audio for segment 12 from 1063.95s to 1153.44s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  65%|██████▌   | 13/20 [02:25<01:22, 11.83s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1153.44s to 1232.99s
Transcribing audio for segment 13 from 1153.44s to 1232.99s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  70%|███████   | 14/20 [02:35<01:08, 11.45s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1232.99s to 1382.14s
Transcribing audio for segment 14 from 1232.99s to 1382.14s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  75%|███████▌  | 15/20 [02:49<01:01, 12.26s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1382.14s to 1421.91s
Transcribing audio for segment 15 from 1382.14s to 1421.91s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  80%|████████  | 16/20 [02:57<00:43, 10.95s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1421.91s to 1451.74s
Transcribing audio for segment 16 from 1421.91s to 1451.74s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  85%|████████▌ | 17/20 [03:06<00:31, 10.35s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1451.74s to 1461.68s
Transcribing audio for segment 17 from 1451.74s to 1461.68s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  90%|█████████ | 18/20 [03:16<00:20, 10.20s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1461.68s to 1481.57s
Transcribing audio for segment 18 from 1461.68s to 1481.57s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  95%|█████████▌| 19/20 [03:27<00:10, 10.44s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1481.57s to 1494.57s
Transcribing audio for segment 19 from 1481.57s to 1494.57s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed: 100%|██████████| 20/20 [03:33<00:00, 10.70s/segment]

Summary generation for segment 19 completed.

Processing completed.





Document saved as /content/outputs/Brand Management 2_ Markenmodellierung.docx
Successfully processed: /content/Brand Management 2_ Markenmodellierung.mp4
Output saved as: /content/outputs/Brand Management 2_ Markenmodellierung.docx

Processing: /content/Video 2_Innovation Management and Processes.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.99998234981723
Total frames: 60056
Video duration: 2001.87 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 598/60056 [00:01<02:39, 372.38frame/s]

Slide change detected at 19.93 seconds (frame 598). SSIM: 0.4719


Frames Processed:   3%|▎         | 2093/60056 [00:04<01:41, 572.27frame/s]

Slide change detected at 69.77 seconds (frame 2093). SSIM: 0.5437


Frames Processed:   5%|▍         | 2990/60056 [00:05<01:56, 491.06frame/s]

Slide change detected at 99.67 seconds (frame 2990). SSIM: 0.5363


Frames Processed:   7%|▋         | 4485/60056 [00:08<01:31, 607.54frame/s]

Slide change detected at 149.50 seconds (frame 4485). SSIM: 0.8148


Frames Processed:   9%|▉         | 5681/60056 [00:10<01:34, 573.39frame/s]

Slide change detected at 189.37 seconds (frame 5681). SSIM: 0.7593


Frames Processed:  13%|█▎        | 8073/60056 [00:15<01:25, 609.60frame/s]

Slide change detected at 269.10 seconds (frame 8073). SSIM: 0.7765


Frames Processed:  25%|██▍       | 14950/60056 [00:26<01:45, 428.75frame/s]

Slide change detected at 498.33 seconds (frame 14950). SSIM: 0.8618


Frames Processed:  36%|███▌      | 21528/60056 [00:37<01:11, 542.65frame/s]

Slide change detected at 717.60 seconds (frame 21528). SSIM: 0.8448


Frames Processed:  45%|████▍     | 26910/60056 [00:46<00:48, 684.53frame/s]

Slide change detected at 897.00 seconds (frame 26910). SSIM: 0.8228


Frames Processed:  46%|████▋     | 27807/60056 [00:48<00:50, 633.95frame/s]

Slide change detected at 926.90 seconds (frame 27807). SSIM: 0.8041


Frames Processed:  49%|████▉     | 29601/60056 [00:50<00:51, 594.39frame/s]

Slide change detected at 986.70 seconds (frame 29601). SSIM: 0.7934


Frames Processed:  56%|█████▋    | 33787/60056 [00:58<00:47, 549.15frame/s]

Slide change detected at 1126.23 seconds (frame 33787). SSIM: 0.7901


Frames Processed:  58%|█████▊    | 34983/60056 [01:00<00:46, 544.45frame/s]

Slide change detected at 1166.10 seconds (frame 34983). SSIM: 0.7872


Frames Processed:  60%|█████▉    | 35880/60056 [01:02<00:43, 558.56frame/s]

Slide change detected at 1196.00 seconds (frame 35880). SSIM: 0.8477


Frames Processed:  66%|██████▌   | 39468/60056 [01:09<00:35, 584.75frame/s]

Slide change detected at 1315.60 seconds (frame 39468). SSIM: 0.7884


Frames Processed:  70%|██████▉   | 41860/60056 [01:13<00:30, 599.13frame/s]

Slide change detected at 1395.33 seconds (frame 41860). SSIM: 0.8463


Frames Processed:  74%|███████▍  | 44551/60056 [01:17<00:23, 671.28frame/s]

Slide change detected at 1485.03 seconds (frame 44551). SSIM: 0.7672


Frames Processed:  78%|███████▊  | 46943/60056 [01:21<00:20, 639.73frame/s]

Slide change detected at 1564.77 seconds (frame 46943). SSIM: 0.7223


Frames Processed:  81%|████████  | 48737/60056 [01:24<00:18, 611.75frame/s]

Slide change detected at 1624.57 seconds (frame 48737). SSIM: 0.7770


Frames Processed:  90%|████████▉ | 53820/60056 [01:34<00:12, 519.66frame/s]

Slide change detected at 1794.00 seconds (frame 53820). SSIM: 0.8544


Frames Processed:  98%|█████████▊| 58903/60056 [01:42<00:01, 621.96frame/s]

Slide change detected at 1963.43 seconds (frame 58903). SSIM: 0.8450


Frames Processed: 100%|█████████▉| 59757/60056 [01:43<00:00, 575.86frame/s]


Adding final slide change at end of video (2001.87 seconds).
Total slide changes detected: 22

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/22 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.93s
Transcribing audio for segment 0 from 0.00s to 19.93s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▍         | 1/22 [00:06<02:15,  6.45s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.93s to 69.77s
Transcribing audio for segment 1 from 19.93s to 69.77s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   9%|▉         | 2/22 [00:14<02:32,  7.62s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 69.77s to 99.67s
Transcribing audio for segment 2 from 69.77s to 99.67s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  14%|█▎        | 3/22 [00:22<02:25,  7.66s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 99.67s to 149.50s
Transcribing audio for segment 3 from 99.67s to 149.50s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  18%|█▊        | 4/22 [00:30<02:19,  7.74s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 149.50s to 189.37s
Transcribing audio for segment 4 from 149.50s to 189.37s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  23%|██▎       | 5/22 [00:38<02:15,  7.95s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 189.37s to 269.10s
Transcribing audio for segment 5 from 189.37s to 269.10s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  27%|██▋       | 6/22 [00:47<02:12,  8.29s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 269.10s to 498.33s
Transcribing audio for segment 6 from 269.10s to 498.33s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  32%|███▏      | 7/22 [01:05<02:48, 11.23s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 498.33s to 717.60s
Transcribing audio for segment 7 from 498.33s to 717.60s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  36%|███▋      | 8/22 [01:22<03:06, 13.32s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 717.60s to 897.00s
Transcribing audio for segment 8 from 717.60s to 897.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  41%|████      | 9/22 [01:38<03:01, 13.94s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 897.00s to 926.90s
Transcribing audio for segment 9 from 897.00s to 926.90s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  45%|████▌     | 10/22 [01:47<02:29, 12.47s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 926.90s to 986.70s
Transcribing audio for segment 10 from 926.90s to 986.70s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  50%|█████     | 11/22 [01:57<02:09, 11.76s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 986.70s to 1126.23s
Transcribing audio for segment 11 from 986.70s to 1126.23s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  55%|█████▍    | 12/22 [02:14<02:12, 13.27s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1126.23s to 1166.10s
Transcribing audio for segment 12 from 1126.23s to 1166.10s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  59%|█████▉    | 13/22 [02:23<01:49, 12.19s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1166.10s to 1196.00s
Transcribing audio for segment 13 from 1166.10s to 1196.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  64%|██████▎   | 14/22 [02:32<01:29, 11.19s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1196.00s to 1315.60s
Transcribing audio for segment 14 from 1196.00s to 1315.60s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  68%|██████▊   | 15/22 [02:49<01:30, 12.90s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1315.60s to 1395.33s
Transcribing audio for segment 15 from 1315.60s to 1395.33s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  73%|███████▎  | 16/22 [03:04<01:20, 13.37s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1395.33s to 1485.03s
Transcribing audio for segment 16 from 1395.33s to 1485.03s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  77%|███████▋  | 17/22 [03:15<01:04, 12.87s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1485.03s to 1564.77s
Transcribing audio for segment 17 from 1485.03s to 1564.77s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  82%|████████▏ | 18/22 [03:26<00:49, 12.32s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1564.77s to 1624.57s
Transcribing audio for segment 18 from 1564.77s to 1624.57s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  86%|████████▋ | 19/22 [03:42<00:39, 13.21s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1624.57s to 1794.00s
Transcribing audio for segment 19 from 1624.57s to 1794.00s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  91%|█████████ | 20/22 [03:59<00:28, 14.49s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 1794.00s to 1963.43s
Transcribing audio for segment 20 from 1794.00s to 1963.43s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  95%|█████████▌| 21/22 [04:15<00:14, 14.93s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 1963.43s to 2001.87s
Transcribing audio for segment 21 from 1963.43s to 2001.87s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed: 100%|██████████| 22/22 [04:25<00:00, 12.05s/segment]

Summary generation for segment 21 completed.

Processing completed.





Document saved as /content/outputs/Video 2_Innovation Management and Processes.docx
Successfully processed: /content/Video 2_Innovation Management and Processes.mp4
Output saved as: /content/outputs/Video 2_Innovation Management and Processes.docx

Processing: /content/B Video Organizational Design： Teil 2 Organisationsformen mit Vorteilen und Nachteilen.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 13.87093123594234
Total frames: 10895
Video duration: 785.46 seconds
Frame interval: 138 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|▏         | 138/10895 [00:00<00:26, 399.29frame/s]

Slide change detected at 9.95 seconds (frame 138). SSIM: 0.4010


Frames Processed:   4%|▍         | 414/10895 [00:00<00:18, 570.98frame/s]

Slide change detected at 19.90 seconds (frame 276). SSIM: 0.4356


Frames Processed:  10%|█         | 1104/10895 [00:01<00:14, 671.79frame/s]

Slide change detected at 79.59 seconds (frame 1104). SSIM: 0.6475


Frames Processed:  14%|█▍        | 1518/10895 [00:02<00:13, 695.40frame/s]

Slide change detected at 99.49 seconds (frame 1380). SSIM: 0.6583


Frames Processed:  18%|█▊        | 1932/10895 [00:03<00:13, 674.78frame/s]

Slide change detected at 129.34 seconds (frame 1794). SSIM: 0.7836


Frames Processed:  19%|█▉        | 2070/10895 [00:03<00:13, 660.26frame/s]

Slide change detected at 149.23 seconds (frame 2070). SSIM: 0.7734


Frames Processed:  22%|██▏       | 2346/10895 [00:03<00:13, 646.11frame/s]

Slide change detected at 169.13 seconds (frame 2346). SSIM: 0.7309


Frames Processed:  25%|██▌       | 2760/10895 [00:04<00:14, 565.56frame/s]

Slide change detected at 198.98 seconds (frame 2760). SSIM: 0.6626


Frames Processed:  29%|██▉       | 3174/10895 [00:05<00:19, 400.58frame/s]

Slide change detected at 228.82 seconds (frame 3174). SSIM: 0.6923


Frames Processed:  33%|███▎      | 3588/10895 [00:06<00:14, 497.93frame/s]

Slide change detected at 258.67 seconds (frame 3588). SSIM: 0.8103


Frames Processed:  34%|███▍      | 3726/10895 [00:06<00:14, 511.71frame/s]

Slide change detected at 268.62 seconds (frame 3726). SSIM: 0.8096


Frames Processed:  35%|███▌      | 3864/10895 [00:06<00:13, 519.88frame/s]

Slide change detected at 278.57 seconds (frame 3864). SSIM: 0.7409


Frames Processed:  41%|████      | 4416/10895 [00:07<00:13, 471.45frame/s]

Slide change detected at 318.36 seconds (frame 4416). SSIM: 0.7401


Frames Processed:  42%|████▏     | 4554/10895 [00:08<00:15, 404.91frame/s]

Slide change detected at 328.31 seconds (frame 4554). SSIM: 0.7619


Frames Processed:  48%|████▊     | 5244/10895 [00:11<00:20, 270.67frame/s]

Slide change detected at 378.06 seconds (frame 5244). SSIM: 0.6191


Frames Processed:  70%|██████▉   | 7590/10895 [00:16<00:04, 704.94frame/s]

Slide change detected at 537.24 seconds (frame 7452). SSIM: 0.6887
Slide change detected at 547.19 seconds (frame 7590). SSIM: 0.6991


Frames Processed:  91%|█████████ | 9936/10895 [00:19<00:01, 729.10frame/s]

Slide change detected at 706.37 seconds (frame 9798). SSIM: 0.5858
Slide change detected at 716.32 seconds (frame 9936). SSIM: 0.6854


Frames Processed:  99%|█████████▊| 10757/10895 [00:20<00:00, 518.90frame/s]


Slide change detected at 776.01 seconds (frame 10764). SSIM: 0.4020
Adding final slide change at end of video (785.46 seconds).
Total slide changes detected: 21

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/21 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 9.95s
Transcribing audio for segment 0 from 0.00s to 9.95s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▍         | 1/21 [00:07<02:21,  7.07s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 9.95s to 19.90s
Transcribing audio for segment 1 from 9.95s to 19.90s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  10%|▉         | 2/21 [00:14<02:14,  7.07s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 19.90s to 79.59s
Transcribing audio for segment 2 from 19.90s to 79.59s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  14%|█▍        | 3/21 [00:25<02:41,  8.96s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 79.59s to 99.49s
Transcribing audio for segment 3 from 79.59s to 99.49s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  19%|█▉        | 4/21 [00:35<02:37,  9.24s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 99.49s to 129.34s
Transcribing audio for segment 4 from 99.49s to 129.34s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  24%|██▍       | 5/21 [00:41<02:12,  8.29s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 129.34s to 149.23s
Transcribing audio for segment 5 from 129.34s to 149.23s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  29%|██▊       | 6/21 [00:48<01:58,  7.88s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 149.23s to 169.13s
Transcribing audio for segment 6 from 149.23s to 169.13s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  33%|███▎      | 7/21 [00:55<01:43,  7.39s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 169.13s to 198.98s
Transcribing audio for segment 7 from 169.13s to 198.98s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  38%|███▊      | 8/21 [01:01<01:32,  7.13s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 198.98s to 228.82s
Transcribing audio for segment 8 from 198.98s to 228.82s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  43%|████▎     | 9/21 [01:10<01:30,  7.57s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 228.82s to 258.67s
Transcribing audio for segment 9 from 228.82s to 258.67s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  48%|████▊     | 10/21 [01:18<01:25,  7.74s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 258.67s to 268.62s
Transcribing audio for segment 10 from 258.67s to 268.62s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  52%|█████▏    | 11/21 [01:25<01:16,  7.70s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 268.62s to 278.57s
Transcribing audio for segment 11 from 268.62s to 278.57s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  57%|█████▋    | 12/21 [01:32<01:05,  7.31s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 278.57s to 318.36s
Transcribing audio for segment 12 from 278.57s to 318.36s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  62%|██████▏   | 13/21 [01:40<00:59,  7.47s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 318.36s to 328.31s
Transcribing audio for segment 13 from 318.36s to 328.31s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  67%|██████▋   | 14/21 [01:48<00:53,  7.61s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 328.31s to 378.06s
Transcribing audio for segment 14 from 328.31s to 378.06s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  71%|███████▏  | 15/21 [01:59<00:51,  8.63s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 378.06s to 537.24s
Transcribing audio for segment 15 from 378.06s to 537.24s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  76%|███████▌  | 16/21 [02:15<00:54, 10.83s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 537.24s to 547.19s
Transcribing audio for segment 16 from 537.24s to 547.19s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  81%|████████  | 17/21 [02:23<00:39,  9.97s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 547.19s to 706.37s
Transcribing audio for segment 17 from 547.19s to 706.37s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  86%|████████▌ | 18/21 [02:43<00:39, 13.07s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 706.37s to 716.32s
Transcribing audio for segment 18 from 706.37s to 716.32s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  90%|█████████ | 19/21 [02:52<00:23, 11.95s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 716.32s to 776.01s
Transcribing audio for segment 19 from 716.32s to 776.01s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  95%|█████████▌| 20/21 [03:01<00:11, 11.11s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 776.01s to 785.46s
Transcribing audio for segment 20 from 776.01s to 785.46s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed: 100%|██████████| 21/21 [03:08<00:00,  8.98s/segment]

Summary generation for segment 20 completed.

Processing completed.





Document saved as /content/outputs/B Video Organizational Design： Teil 2 Organisationsformen mit Vorteilen und Nachteilen.docx
Successfully processed: /content/B Video Organizational Design： Teil 2 Organisationsformen mit Vorteilen und Nachteilen.mp4
Output saved as: /content/outputs/B Video Organizational Design： Teil 2 Organisationsformen mit Vorteilen und Nachteilen.docx

Processing: /content/Building High Performance Teams - Teil 04.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 86139
Video duration: 2871.30 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 600/86139 [00:02<05:30, 258.49frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.4853


Frames Processed:   2%|▏         | 1800/86139 [00:05<04:02, 348.43frame/s]

Slide change detected at 60.00 seconds (frame 1800). SSIM: 0.8387


Frames Processed:   2%|▏         | 2100/86139 [00:06<03:30, 399.52frame/s]

Slide change detected at 70.00 seconds (frame 2100). SSIM: 0.8786


Frames Processed:   6%|▌         | 4800/86139 [00:12<02:47, 486.95frame/s]

Slide change detected at 160.00 seconds (frame 4800). SSIM: 0.8486


Frames Processed:  63%|██████▎   | 54000/86139 [01:45<01:02, 510.76frame/s]

Slide change detected at 1800.00 seconds (frame 54000). SSIM: 0.8267


Frames Processed:  66%|██████▌   | 56700/86139 [01:49<00:51, 567.36frame/s]

Slide change detected at 1890.00 seconds (frame 56700). SSIM: 0.7985


Frames Processed:  71%|███████   | 61200/86139 [01:58<00:47, 526.79frame/s]

Slide change detected at 2040.00 seconds (frame 61200). SSIM: 0.8949


Frames Processed:  95%|█████████▌| 82200/86139 [02:40<00:08, 482.11frame/s]

Slide change detected at 2740.00 seconds (frame 82200). SSIM: 0.8603


Frames Processed: 100%|█████████▉| 85839/86139 [02:53<00:00, 495.99frame/s]


Adding final slide change at end of video (2871.30 seconds).
Total slide changes detected: 9

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/9 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 20.00s
Transcribing audio for segment 0 from 0.00s to 20.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:  11%|█         | 1/9 [00:07<00:59,  7.43s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 20.00s to 60.00s
Transcribing audio for segment 1 from 20.00s to 60.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  22%|██▏       | 2/9 [00:18<01:07,  9.64s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 60.00s to 70.00s
Transcribing audio for segment 2 from 60.00s to 70.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  33%|███▎      | 3/9 [00:23<00:46,  7.69s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 70.00s to 160.00s
Transcribing audio for segment 3 from 70.00s to 160.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  44%|████▍     | 4/9 [00:36<00:48,  9.68s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 160.00s to 1800.00s
Transcribing audio for segment 4 from 160.00s to 1800.00s...
Error during transcription of segment 4: Error code: 413 - {'error': {'message': '413: Maximum content size limit (26214400) exceeded (26260171 bytes read)', 'type': 'server_error', 'param': None, 'code': None}}
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  56%|█████▌    | 5/9 [00:46<00:39,  9.87s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 1800.00s to 1890.00s
Transcribing audio for segment 5 from 1800.00s to 1890.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  67%|██████▋   | 6/9 [00:59<00:32, 10.77s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 1890.00s to 2040.00s
Transcribing audio for segment 6 from 1890.00s to 2040.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  78%|███████▊  | 7/9 [01:16<00:25, 12.77s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 2040.00s to 2740.00s
Transcribing audio for segment 7 from 2040.00s to 2740.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  89%|████████▉ | 8/9 [01:59<00:22, 22.50s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 2740.00s to 2871.30s
Transcribing audio for segment 8 from 2740.00s to 2871.30s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed: 100%|██████████| 9/9 [02:16<00:00, 15.20s/segment]

Summary generation for segment 8 completed.

Processing completed.





Document saved as /content/outputs/Building High Performance Teams - Teil 04.docx
Successfully processed: /content/Building High Performance Teams - Teil 04.mp4
Output saved as: /content/outputs/Building High Performance Teams - Teil 04.docx

Processing: /content/Brand Management 4_ Markenorientierte Unternehmensführung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 12.87104754642496
Total frames: 22485
Video duration: 1746.94 seconds
Frame interval: 128 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   3%|▎         | 640/22485 [00:02<01:15, 288.71frame/s]

Slide change detected at 49.72 seconds (frame 640). SSIM: 0.6182


Frames Processed:   3%|▎         | 768/22485 [00:02<01:20, 270.91frame/s]

Slide change detected at 59.67 seconds (frame 768). SSIM: 0.7425


Frames Processed:   5%|▌         | 1152/22485 [00:04<01:12, 292.29frame/s]

Slide change detected at 89.50 seconds (frame 1152). SSIM: 0.6759


Frames Processed:   9%|▊         | 1920/22485 [00:06<01:16, 268.23frame/s]

Slide change detected at 149.17 seconds (frame 1920). SSIM: 0.6547


Frames Processed:  11%|█         | 2432/22485 [00:08<01:22, 244.33frame/s]

Slide change detected at 188.95 seconds (frame 2432). SSIM: 0.7224


Frames Processed:  14%|█▎        | 3072/22485 [00:12<01:32, 210.45frame/s]

Slide change detected at 238.68 seconds (frame 3072). SSIM: 0.7948


Frames Processed:  17%|█▋        | 3840/22485 [00:14<01:05, 283.22frame/s]

Slide change detected at 298.34 seconds (frame 3840). SSIM: 0.1156


Frames Processed:  20%|██        | 4608/22485 [00:17<01:03, 282.21frame/s]

Slide change detected at 358.01 seconds (frame 4608). SSIM: 0.1090


Frames Processed:  24%|██▍       | 5504/22485 [00:20<00:57, 296.66frame/s]

Slide change detected at 427.63 seconds (frame 5504). SSIM: 0.5303


Frames Processed:  28%|██▊       | 6400/22485 [00:23<01:08, 236.29frame/s]

Slide change detected at 497.24 seconds (frame 6400). SSIM: 0.5840


Frames Processed:  29%|██▉       | 6528/22485 [00:24<01:08, 233.20frame/s]

Slide change detected at 507.18 seconds (frame 6528). SSIM: 0.8942


Frames Processed:  36%|███▋      | 8192/22485 [00:29<00:48, 294.58frame/s]

Slide change detected at 636.47 seconds (frame 8192). SSIM: 0.7818


Frames Processed:  40%|████      | 9088/22485 [00:32<00:47, 279.63frame/s]

Slide change detected at 706.08 seconds (frame 9088). SSIM: 0.7711


Frames Processed:  46%|████▌     | 10368/22485 [00:37<00:50, 239.60frame/s]

Slide change detected at 805.53 seconds (frame 10368). SSIM: 0.6458


Frames Processed:  52%|█████▏    | 11776/22485 [00:42<00:36, 295.99frame/s]

Slide change detected at 914.92 seconds (frame 11776). SSIM: 0.6095


Frames Processed:  57%|█████▋    | 12928/22485 [00:46<00:29, 328.48frame/s]

Slide change detected at 1004.42 seconds (frame 12928). SSIM: 0.8788


Frames Processed:  71%|███████   | 15872/22485 [00:57<00:23, 276.69frame/s]

Slide change detected at 1233.16 seconds (frame 15872). SSIM: 0.8043


Frames Processed:  74%|███████▍  | 16640/22485 [00:59<00:19, 304.11frame/s]

Slide change detected at 1292.82 seconds (frame 16640). SSIM: 0.8337


Frames Processed:  78%|███████▊  | 17536/22485 [01:02<00:21, 231.02frame/s]

Slide change detected at 1362.44 seconds (frame 17536). SSIM: 0.7657


Frames Processed:  97%|█████████▋| 21888/22485 [01:18<00:02, 220.86frame/s]

Slide change detected at 1700.56 seconds (frame 21888). SSIM: 0.6490


Frames Processed:  99%|█████████▉| 22357/22485 [01:20<00:00, 276.43frame/s]


Adding final slide change at end of video (1746.94 seconds).
Total slide changes detected: 21

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/21 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 49.72s
Transcribing audio for segment 0 from 0.00s to 49.72s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▍         | 1/21 [00:07<02:24,  7.25s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 49.72s to 59.67s
Transcribing audio for segment 1 from 49.72s to 59.67s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  10%|▉         | 2/21 [00:12<01:57,  6.17s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 59.67s to 89.50s
Transcribing audio for segment 2 from 59.67s to 89.50s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  14%|█▍        | 3/21 [00:20<02:03,  6.87s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 89.50s to 149.17s
Transcribing audio for segment 3 from 89.50s to 149.17s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  19%|█▉        | 4/21 [00:29<02:12,  7.79s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 149.17s to 188.95s
Transcribing audio for segment 4 from 149.17s to 188.95s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  24%|██▍       | 5/21 [00:37<02:06,  7.88s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 188.95s to 238.68s
Transcribing audio for segment 5 from 188.95s to 238.68s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  29%|██▊       | 6/21 [00:46<02:04,  8.33s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 238.68s to 298.34s
Transcribing audio for segment 6 from 238.68s to 298.34s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  33%|███▎      | 7/21 [00:58<02:13,  9.55s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 298.34s to 358.01s
Transcribing audio for segment 7 from 298.34s to 358.01s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  38%|███▊      | 8/21 [01:08<02:04,  9.58s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 358.01s to 427.63s
Transcribing audio for segment 8 from 358.01s to 427.63s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  43%|████▎     | 9/21 [01:19<02:00, 10.04s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 427.63s to 497.24s
Transcribing audio for segment 9 from 427.63s to 497.24s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  48%|████▊     | 10/21 [01:29<01:49, 10.00s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 497.24s to 507.18s
Transcribing audio for segment 10 from 497.24s to 507.18s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  52%|█████▏    | 11/21 [01:37<01:32,  9.25s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 507.18s to 636.47s
Transcribing audio for segment 11 from 507.18s to 636.47s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  57%|█████▋    | 12/21 [01:48<01:29,  9.90s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 636.47s to 706.08s
Transcribing audio for segment 12 from 636.47s to 706.08s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  62%|██████▏   | 13/21 [01:59<01:22, 10.29s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 706.08s to 805.53s
Transcribing audio for segment 13 from 706.08s to 805.53s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  67%|██████▋   | 14/21 [02:09<01:11, 10.26s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 805.53s to 914.92s
Transcribing audio for segment 14 from 805.53s to 914.92s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  71%|███████▏  | 15/21 [02:22<01:06, 11.01s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 914.92s to 1004.42s
Transcribing audio for segment 15 from 914.92s to 1004.42s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  76%|███████▌  | 16/21 [02:37<01:00, 12.14s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1004.42s to 1233.16s
Transcribing audio for segment 16 from 1004.42s to 1233.16s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  81%|████████  | 17/21 [02:53<00:53, 13.43s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1233.16s to 1292.82s
Transcribing audio for segment 17 from 1233.16s to 1292.82s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  86%|████████▌ | 18/21 [03:03<00:36, 12.28s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1292.82s to 1362.44s
Transcribing audio for segment 18 from 1292.82s to 1362.44s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  90%|█████████ | 19/21 [03:13<00:23, 11.51s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1362.44s to 1700.56s
Transcribing audio for segment 19 from 1362.44s to 1700.56s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  95%|█████████▌| 20/21 [03:35<00:14, 14.72s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 1700.56s to 1746.94s
Transcribing audio for segment 20 from 1700.56s to 1746.94s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed: 100%|██████████| 21/21 [03:45<00:00, 10.72s/segment]

Summary generation for segment 20 completed.

Processing completed.





Document saved as /content/outputs/Brand Management 4_ Markenorientierte Unternehmensführung.docx
Successfully processed: /content/Brand Management 4_ Markenorientierte Unternehmensführung.mp4
Output saved as: /content/outputs/Brand Management 4_ Markenorientierte Unternehmensführung.docx

Processing: /content/Managing High-Performance Teams - Teil 01.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 50053
Video duration: 1668.43 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   2%|▏         | 1200/50053 [00:04<02:50, 286.58frame/s]

Slide change detected at 40.00 seconds (frame 1200). SSIM: 0.5375


Frames Processed:   6%|▌         | 3000/50053 [00:10<02:39, 294.96frame/s]

Slide change detected at 100.00 seconds (frame 3000). SSIM: 0.8377


Frames Processed:   7%|▋         | 3300/50053 [00:11<02:18, 338.05frame/s]

Slide change detected at 110.00 seconds (frame 3300). SSIM: 0.8602


Frames Processed:  15%|█▍        | 7500/50053 [00:19<01:19, 533.49frame/s]

Slide change detected at 250.00 seconds (frame 7500). SSIM: 0.8148


Frames Processed:  16%|█▌        | 8100/50053 [00:21<01:46, 395.31frame/s]

Slide change detected at 270.00 seconds (frame 8100). SSIM: 0.7904


Frames Processed:  18%|█▊        | 9000/50053 [00:24<01:57, 348.85frame/s]

Slide change detected at 300.00 seconds (frame 9000). SSIM: 0.8014


Frames Processed:  20%|██        | 10200/50053 [00:27<01:42, 387.90frame/s]

Slide change detected at 340.00 seconds (frame 10200). SSIM: 0.8278


Frames Processed:  23%|██▎       | 11400/50053 [00:30<01:43, 374.26frame/s]

Slide change detected at 380.00 seconds (frame 11400). SSIM: 0.8148


Frames Processed:  27%|██▋       | 13500/50053 [00:35<01:34, 386.33frame/s]

Slide change detected at 450.00 seconds (frame 13500). SSIM: 0.8703


Frames Processed:  31%|███       | 15300/50053 [00:39<01:10, 495.89frame/s]

Slide change detected at 510.00 seconds (frame 15300). SSIM: 0.8948


Frames Processed:  37%|███▋      | 18300/50053 [00:44<01:00, 527.22frame/s]

Slide change detected at 610.00 seconds (frame 18300). SSIM: 0.8546


Frames Processed:  45%|████▍     | 22500/50053 [00:53<00:52, 527.95frame/s]

Slide change detected at 750.00 seconds (frame 22500). SSIM: 0.8919


Frames Processed:  54%|█████▍    | 27000/50053 [01:03<00:56, 407.81frame/s]

Slide change detected at 900.00 seconds (frame 27000). SSIM: 0.8886


Frames Processed:  71%|███████   | 35400/50053 [01:20<00:28, 516.98frame/s]

Slide change detected at 1180.00 seconds (frame 35400). SSIM: 0.8799


Frames Processed:  79%|███████▉  | 39600/50053 [01:29<00:26, 396.16frame/s]

Slide change detected at 1320.00 seconds (frame 39600). SSIM: 0.8968


Frames Processed:  91%|█████████ | 45300/50053 [01:40<00:11, 419.53frame/s]

Slide change detected at 1510.00 seconds (frame 45300). SSIM: 0.8537


Frames Processed:  99%|█████████▉| 49753/50053 [01:53<00:00, 440.23frame/s]


Adding final slide change at end of video (1668.43 seconds).
Total slide changes detected: 17

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/17 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 40.00s
Transcribing audio for segment 0 from 0.00s to 40.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   6%|▌         | 1/17 [00:07<02:06,  7.92s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 40.00s to 100.00s
Transcribing audio for segment 1 from 40.00s to 100.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  12%|█▏        | 2/17 [00:16<02:07,  8.52s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 100.00s to 110.00s
Transcribing audio for segment 2 from 100.00s to 110.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  18%|█▊        | 3/17 [00:23<01:45,  7.54s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 110.00s to 250.00s
Transcribing audio for segment 3 from 110.00s to 250.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  24%|██▎       | 4/17 [00:36<02:08,  9.86s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 250.00s to 270.00s
Transcribing audio for segment 4 from 250.00s to 270.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  29%|██▉       | 5/17 [00:44<01:47,  8.99s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 270.00s to 300.00s
Transcribing audio for segment 5 from 270.00s to 300.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  35%|███▌      | 6/17 [00:52<01:37,  8.87s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 300.00s to 340.00s
Transcribing audio for segment 6 from 300.00s to 340.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  41%|████      | 7/17 [01:00<01:25,  8.56s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 340.00s to 380.00s
Transcribing audio for segment 7 from 340.00s to 380.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  47%|████▋     | 8/17 [01:09<01:18,  8.77s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 380.00s to 450.00s
Transcribing audio for segment 8 from 380.00s to 450.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  53%|█████▎    | 9/17 [01:19<01:13,  9.14s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 450.00s to 510.00s
Transcribing audio for segment 9 from 450.00s to 510.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  59%|█████▉    | 10/17 [01:29<01:04,  9.17s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 510.00s to 610.00s
Transcribing audio for segment 10 from 510.00s to 610.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  65%|██████▍   | 11/17 [01:39<00:56,  9.45s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 610.00s to 750.00s
Transcribing audio for segment 11 from 610.00s to 750.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  71%|███████   | 12/17 [01:51<00:51, 10.30s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 750.00s to 900.00s
Transcribing audio for segment 12 from 750.00s to 900.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  76%|███████▋  | 13/17 [02:07<00:48, 12.00s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 900.00s to 1180.00s
Transcribing audio for segment 13 from 900.00s to 1180.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  82%|████████▏ | 14/17 [02:28<00:44, 14.76s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1180.00s to 1320.00s
Transcribing audio for segment 14 from 1180.00s to 1320.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  88%|████████▊ | 15/17 [02:44<00:30, 15.09s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1320.00s to 1510.00s
Transcribing audio for segment 15 from 1320.00s to 1510.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  94%|█████████▍| 16/17 [03:01<00:15, 15.68s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1510.00s to 1668.43s
Transcribing audio for segment 16 from 1510.00s to 1668.43s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed: 100%|██████████| 17/17 [03:18<00:00, 11.68s/segment]

Summary generation for segment 16 completed.

Processing completed.





Document saved as /content/outputs/Managing High-Performance Teams - Teil 01.docx
Successfully processed: /content/Managing High-Performance Teams - Teil 01.mp4
Output saved as: /content/outputs/Managing High-Performance Teams - Teil 01.docx

Processing: /content/B Video Organizational Design： Teil 4 Organisationsentwicklung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.907668348229166
Total frames: 19880
Video duration: 664.71 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   5%|▍         | 897/19880 [00:00<00:12, 1480.70frame/s]

Slide change detected at 19.99 seconds (frame 598). SSIM: 0.3725
Slide change detected at 29.99 seconds (frame 897). SSIM: 0.3573


Frames Processed:  15%|█▌        | 2990/19880 [00:01<00:10, 1628.84frame/s]

Slide change detected at 89.98 seconds (frame 2691). SSIM: 0.7591


Frames Processed:  20%|█▉        | 3887/19880 [00:02<00:09, 1659.50frame/s]

Slide change detected at 129.97 seconds (frame 3887). SSIM: 0.7625


Frames Processed:  26%|██▌       | 5083/19880 [00:03<00:10, 1465.49frame/s]

Slide change detected at 169.96 seconds (frame 5083). SSIM: 0.7125


Frames Processed:  29%|██▊       | 5681/19880 [00:03<00:09, 1431.66frame/s]

Slide change detected at 189.95 seconds (frame 5681). SSIM: 0.7061


Frames Processed:  33%|███▎      | 6578/19880 [00:04<00:09, 1436.47frame/s]

Slide change detected at 219.94 seconds (frame 6578). SSIM: 0.4936


Frames Processed:  41%|████      | 8073/19880 [00:05<00:08, 1403.90frame/s]

Slide change detected at 269.93 seconds (frame 8073). SSIM: 0.4459


Frames Processed:  57%|█████▋    | 11362/19880 [00:08<00:06, 1234.72frame/s]

Slide change detected at 379.90 seconds (frame 11362). SSIM: 0.7642


Frames Processed:  65%|██████▍   | 12857/19880 [00:09<00:05, 1266.42frame/s]

Slide change detected at 429.89 seconds (frame 12857). SSIM: 0.8099


Frames Processed:  74%|███████▎  | 14651/19880 [00:10<00:03, 1402.66frame/s]

Slide change detected at 489.87 seconds (frame 14651). SSIM: 0.8352


Frames Processed:  81%|████████  | 16146/19880 [00:11<00:02, 1360.94frame/s]

Slide change detected at 539.86 seconds (frame 16146). SSIM: 0.6674


Frames Processed:  90%|█████████ | 17940/19880 [00:12<00:01, 1457.97frame/s]

Slide change detected at 589.85 seconds (frame 17641). SSIM: 0.6598


Frames Processed:  98%|█████████▊| 19581/19880 [00:14<00:00, 1392.24frame/s]

Slide change detected at 659.83 seconds (frame 19734). SSIM: 0.4843
Adding final slide change at end of video (664.71 seconds).
Total slide changes detected: 15

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/15 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.99s
Transcribing audio for segment 0 from 0.00s to 19.99s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   7%|▋         | 1/15 [00:07<01:40,  7.16s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.99s to 29.99s
Transcribing audio for segment 1 from 19.99s to 29.99s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  13%|█▎        | 2/15 [00:15<01:43,  7.97s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 29.99s to 89.98s
Transcribing audio for segment 2 from 29.99s to 89.98s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  20%|██        | 3/15 [00:24<01:40,  8.39s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 89.98s to 129.97s
Transcribing audio for segment 3 from 89.98s to 129.97s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  27%|██▋       | 4/15 [00:33<01:33,  8.52s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 129.97s to 169.96s
Transcribing audio for segment 4 from 129.97s to 169.96s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  33%|███▎      | 5/15 [00:47<01:44, 10.49s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 169.96s to 189.95s
Transcribing audio for segment 5 from 169.96s to 189.95s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  40%|████      | 6/15 [00:54<01:23,  9.28s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 189.95s to 219.94s
Transcribing audio for segment 6 from 189.95s to 219.94s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  47%|████▋     | 7/15 [01:04<01:16,  9.51s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 219.94s to 269.93s
Transcribing audio for segment 7 from 219.94s to 269.93s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  53%|█████▎    | 8/15 [01:15<01:11, 10.23s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 269.93s to 379.90s
Transcribing audio for segment 8 from 269.93s to 379.90s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  60%|██████    | 9/15 [01:27<01:04, 10.67s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 379.90s to 429.89s
Transcribing audio for segment 9 from 379.90s to 429.89s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  67%|██████▋   | 10/15 [01:42<00:59, 11.88s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 429.89s to 489.87s
Transcribing audio for segment 10 from 429.89s to 489.87s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  73%|███████▎  | 11/15 [01:53<00:46, 11.63s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 489.87s to 539.86s
Transcribing audio for segment 11 from 489.87s to 539.86s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  80%|████████  | 12/15 [02:04<00:34, 11.56s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 539.86s to 589.85s
Transcribing audio for segment 12 from 539.86s to 589.85s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  87%|████████▋ | 13/15 [02:14<00:21, 10.90s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 589.85s to 659.83s
Transcribing audio for segment 13 from 589.85s to 659.83s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  93%|█████████▎| 14/15 [02:24<00:10, 10.64s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 659.83s to 664.71s
Transcribing audio for segment 14 from 659.83s to 664.71s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed: 100%|██████████| 15/15 [02:32<00:00, 10.16s/segment]

Summary generation for segment 14 completed.

Processing completed.





Document saved as /content/outputs/B Video Organizational Design： Teil 4 Organisationsentwicklung.docx
Successfully processed: /content/B Video Organizational Design： Teil 4 Organisationsentwicklung.mp4
Output saved as: /content/outputs/B Video Organizational Design： Teil 4 Organisationsentwicklung.docx

Processing: /content/Video 4_Interview Prof Füller.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.999981115459452
Total frames: 31772
Video duration: 1059.07 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:  99%|█████████▉| 31473/31772 [01:05<00:00, 479.54frame/s]


Adding final slide change at end of video (1059.07 seconds).
Total slide changes detected: 1

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/1 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 1059.07s
Transcribing audio for segment 0 from 0.00s to 1059.07s...
Error during transcription of segment 0: Error code: 413 - {'error': {'message': '413: Maximum content size limit (26214400) exceeded (26262329 bytes read)', 'type': 'server_error', 'param': None, 'code': None}}
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed: 100%|██████████| 1/1 [00:09<00:00,  9.27s/segment]

Summary generation for segment 0 completed.

Processing completed.





Document saved as /content/outputs/Video 4_Interview Prof Füller.docx
Successfully processed: /content/Video 4_Interview Prof Füller.mp4
Output saved as: /content/outputs/Video 4_Interview Prof Füller.docx

Processing: /content/B Change Management_ Video 4 7 Basisprozesse nach Glasl.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.971223657453745
Total frames: 41897
Video duration: 1397.91 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|▏         | 598/41897 [00:00<00:45, 909.70frame/s]

Slide change detected at 19.95 seconds (frame 598). SSIM: 0.4289


Frames Processed:   4%|▍         | 1794/41897 [00:01<00:31, 1290.87frame/s]

Slide change detected at 59.86 seconds (frame 1794). SSIM: 0.6178


Frames Processed:   6%|▌         | 2392/41897 [00:01<00:29, 1344.30frame/s]

Slide change detected at 79.81 seconds (frame 2392). SSIM: 0.6741


Frames Processed:   9%|▊         | 3588/41897 [00:03<00:38, 991.54frame/s] 

Slide change detected at 119.71 seconds (frame 3588). SSIM: 0.7269


Frames Processed:  25%|██▍       | 10465/41897 [00:12<00:21, 1470.53frame/s]

Slide change detected at 339.19 seconds (frame 10166). SSIM: 0.5883


Frames Processed:  26%|██▋       | 11063/41897 [00:12<00:21, 1454.08frame/s]

Slide change detected at 369.12 seconds (frame 11063). SSIM: 0.7794


Frames Processed:  32%|███▏      | 13455/41897 [00:14<00:18, 1502.58frame/s]

Slide change detected at 448.93 seconds (frame 13455). SSIM: 0.6287


Frames Processed:  33%|███▎      | 13754/41897 [00:14<00:19, 1416.81frame/s]

Slide change detected at 458.91 seconds (frame 13754). SSIM: 0.4979


Frames Processed:  36%|███▌      | 14950/41897 [00:15<00:19, 1350.75frame/s]

Slide change detected at 498.81 seconds (frame 14950). SSIM: 0.5483


Frames Processed:  38%|███▊      | 15847/41897 [00:16<00:18, 1397.78frame/s]

Slide change detected at 528.74 seconds (frame 15847). SSIM: 0.8251


Frames Processed:  40%|███▉      | 16744/41897 [00:16<00:17, 1444.21frame/s]

Slide change detected at 548.69 seconds (frame 16445). SSIM: 0.7488


Frames Processed:  48%|████▊     | 20033/41897 [00:18<00:13, 1609.91frame/s]

Slide change detected at 658.43 seconds (frame 19734). SSIM: 0.6036
Slide change detected at 668.41 seconds (frame 20033). SSIM: 0.6911


Frames Processed:  50%|████▉     | 20930/41897 [00:19<00:13, 1591.15frame/s]

Slide change detected at 688.36 seconds (frame 20631). SSIM: 0.5571
Slide change detected at 698.34 seconds (frame 20930). SSIM: 0.6598


Frames Processed:  58%|█████▊    | 24219/41897 [00:22<00:15, 1138.19frame/s]

Slide change detected at 808.08 seconds (frame 24219). SSIM: 0.3303


Frames Processed:  59%|█████▊    | 24518/41897 [00:22<00:15, 1104.25frame/s]

Slide change detected at 818.05 seconds (frame 24518). SSIM: 0.6923


Frames Processed:  61%|██████▏   | 25714/41897 [00:23<00:15, 1031.60frame/s]

Slide change detected at 857.96 seconds (frame 25714). SSIM: 0.2868


Frames Processed:  63%|██████▎   | 26312/41897 [00:24<00:14, 1094.38frame/s]

Slide change detected at 877.91 seconds (frame 26312). SSIM: 0.5011


Frames Processed:  64%|██████▍   | 26910/41897 [00:24<00:13, 1091.92frame/s]

Slide change detected at 897.86 seconds (frame 26910). SSIM: 0.6906


Frames Processed:  66%|██████▌   | 27508/41897 [00:25<00:11, 1226.98frame/s]

Slide change detected at 907.84 seconds (frame 27209). SSIM: 0.8002


Frames Processed:  67%|██████▋   | 28106/41897 [00:25<00:10, 1363.12frame/s]

Slide change detected at 927.79 seconds (frame 27807). SSIM: 0.7915


Frames Processed:  69%|██████▉   | 29003/41897 [00:26<00:08, 1485.42frame/s]

Slide change detected at 957.72 seconds (frame 28704). SSIM: 0.7522


Frames Processed:  74%|███████▎  | 30797/41897 [00:27<00:06, 1650.37frame/s]

Slide change detected at 1017.58 seconds (frame 30498). SSIM: 0.6363


Frames Processed:  78%|███████▊  | 32591/41897 [00:28<00:06, 1428.92frame/s]

Slide change detected at 1087.41 seconds (frame 32591). SSIM: 0.6569


Frames Processed:  79%|███████▊  | 32890/41897 [00:28<00:06, 1389.38frame/s]

Slide change detected at 1097.39 seconds (frame 32890). SSIM: 0.7320


Frames Processed:  83%|████████▎ | 34983/41897 [00:30<00:04, 1457.72frame/s]

Slide change detected at 1157.24 seconds (frame 34684). SSIM: 0.6094
Slide change detected at 1167.22 seconds (frame 34983). SSIM: 0.3921


Frames Processed:  85%|████████▍ | 35581/41897 [00:30<00:04, 1462.27frame/s]

Slide change detected at 1177.20 seconds (frame 35282). SSIM: 0.4815


Frames Processed:  87%|████████▋ | 36478/41897 [00:31<00:03, 1467.64frame/s]

Slide change detected at 1207.12 seconds (frame 36179). SSIM: 0.6353
Slide change detected at 1217.10 seconds (frame 36478). SSIM: 0.4247


Frames Processed:  88%|████████▊ | 36777/41897 [00:31<00:03, 1483.40frame/s]

Slide change detected at 1227.08 seconds (frame 36777). SSIM: 0.4838


Frames Processed:  90%|████████▉ | 37674/41897 [00:31<00:02, 1519.10frame/s]

Slide change detected at 1247.03 seconds (frame 37375). SSIM: 0.6292
Slide change detected at 1257.01 seconds (frame 37674). SSIM: 0.6498


Frames Processed:  93%|█████████▎| 38870/41897 [00:32<00:01, 1587.37frame/s]

Slide change detected at 1286.93 seconds (frame 38571). SSIM: 0.7792


Frames Processed:  94%|█████████▍| 39468/41897 [00:33<00:01, 1528.59frame/s]

Slide change detected at 1306.89 seconds (frame 39169). SSIM: 0.5196
Slide change detected at 1316.86 seconds (frame 39468). SSIM: 0.6248


Frames Processed:  96%|█████████▌| 40066/41897 [00:33<00:01, 1526.79frame/s]

Slide change detected at 1336.82 seconds (frame 40066). SSIM: 0.5496


Frames Processed:  96%|█████████▋| 40365/41897 [00:33<00:01, 1419.24frame/s]

Slide change detected at 1346.79 seconds (frame 40365). SSIM: 0.6348


Frames Processed:  98%|█████████▊| 41262/41897 [00:34<00:00, 1378.39frame/s]

Slide change detected at 1376.72 seconds (frame 41262). SSIM: 0.4827


Frames Processed:  99%|█████████▉| 41561/41897 [00:34<00:00, 1337.27frame/s]

Slide change detected at 1386.70 seconds (frame 41561). SSIM: 0.4207


Frames Processed:  99%|█████████▉| 41598/41897 [00:34<00:00, 1193.83frame/s]

Slide change detected at 1396.67 seconds (frame 41860). SSIM: 0.5191
Adding final slide change at end of video (1397.91 seconds).
Total slide changes detected: 43

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/43 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.95s
Transcribing audio for segment 0 from 0.00s to 19.95s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   2%|▏         | 1/43 [00:06<04:22,  6.24s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.95s to 59.86s
Transcribing audio for segment 1 from 19.95s to 59.86s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   5%|▍         | 2/43 [00:15<05:18,  7.77s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 59.86s to 79.81s
Transcribing audio for segment 2 from 59.86s to 79.81s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   7%|▋         | 3/43 [00:20<04:33,  6.84s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 79.81s to 119.71s
Transcribing audio for segment 3 from 79.81s to 119.71s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:   9%|▉         | 4/43 [00:28<04:34,  7.04s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 119.71s to 339.19s
Transcribing audio for segment 4 from 119.71s to 339.19s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  12%|█▏        | 5/43 [00:48<07:24, 11.71s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 339.19s to 369.12s
Transcribing audio for segment 5 from 339.19s to 369.12s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  14%|█▍        | 6/43 [00:56<06:31, 10.58s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 369.12s to 448.93s
Transcribing audio for segment 6 from 369.12s to 448.93s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  16%|█▋        | 7/43 [01:09<06:44, 11.25s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 448.93s to 458.91s
Transcribing audio for segment 7 from 448.93s to 458.91s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  19%|█▊        | 8/43 [01:14<05:28,  9.38s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 458.91s to 498.81s
Transcribing audio for segment 8 from 458.91s to 498.81s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  21%|██        | 9/43 [01:23<05:13,  9.23s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 498.81s to 528.74s
Transcribing audio for segment 9 from 498.81s to 528.74s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  23%|██▎       | 10/43 [01:32<05:04,  9.23s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 528.74s to 548.69s
Transcribing audio for segment 10 from 528.74s to 548.69s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  26%|██▌       | 11/43 [01:42<05:00,  9.40s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 548.69s to 658.43s
Transcribing audio for segment 11 from 548.69s to 658.43s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  28%|██▊       | 12/43 [01:55<05:25, 10.50s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 658.43s to 668.41s
Transcribing audio for segment 12 from 658.43s to 668.41s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  30%|███       | 13/43 [02:02<04:47,  9.59s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 668.41s to 688.36s
Transcribing audio for segment 13 from 668.41s to 688.36s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  33%|███▎      | 14/43 [02:14<04:53, 10.13s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 688.36s to 698.34s
Transcribing audio for segment 14 from 688.36s to 698.34s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  35%|███▍      | 15/43 [02:20<04:13,  9.06s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 698.34s to 808.08s
Transcribing audio for segment 15 from 698.34s to 808.08s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  37%|███▋      | 16/43 [02:32<04:28,  9.95s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 808.08s to 818.05s
Transcribing audio for segment 16 from 808.08s to 818.05s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  40%|███▉      | 17/43 [02:41<04:06,  9.48s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 818.05s to 857.96s
Transcribing audio for segment 17 from 818.05s to 857.96s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  42%|████▏     | 18/43 [02:49<03:44,  8.98s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 857.96s to 877.91s
Transcribing audio for segment 18 from 857.96s to 877.91s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  44%|████▍     | 19/43 [02:55<03:20,  8.34s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 877.91s to 897.86s
Transcribing audio for segment 19 from 877.91s to 897.86s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  47%|████▋     | 20/43 [03:05<03:21,  8.75s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 897.86s to 907.84s
Transcribing audio for segment 20 from 897.86s to 907.84s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  49%|████▉     | 21/43 [03:13<03:06,  8.47s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 907.84s to 927.79s
Transcribing audio for segment 21 from 907.84s to 927.79s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  51%|█████     | 22/43 [03:27<03:34, 10.24s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 927.79s to 957.72s
Transcribing audio for segment 22 from 927.79s to 957.72s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  53%|█████▎    | 23/43 [03:39<03:31, 10.59s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 957.72s to 1017.58s
Transcribing audio for segment 23 from 957.72s to 1017.58s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  56%|█████▌    | 24/43 [03:49<03:17, 10.42s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 1017.58s to 1087.41s
Transcribing audio for segment 24 from 1017.58s to 1087.41s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  58%|█████▊    | 25/43 [03:58<02:59,  9.97s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 1087.41s to 1097.39s
Transcribing audio for segment 25 from 1087.41s to 1097.39s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  60%|██████    | 26/43 [04:05<02:36,  9.20s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 1097.39s to 1157.24s
Transcribing audio for segment 26 from 1097.39s to 1157.24s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  63%|██████▎   | 27/43 [04:17<02:37,  9.87s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 1157.24s to 1167.22s
Transcribing audio for segment 27 from 1157.24s to 1167.22s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  65%|██████▌   | 28/43 [04:27<02:29,  9.95s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 1167.22s to 1177.20s
Transcribing audio for segment 28 from 1167.22s to 1177.20s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  67%|██████▋   | 29/43 [04:34<02:09,  9.22s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 1177.20s to 1207.12s
Transcribing audio for segment 29 from 1177.20s to 1207.12s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  70%|██████▉   | 30/43 [04:45<02:04,  9.59s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 1207.12s to 1217.10s
Transcribing audio for segment 30 from 1207.12s to 1217.10s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  72%|███████▏  | 31/43 [05:03<02:25, 12.09s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 1217.10s to 1227.08s
Transcribing audio for segment 31 from 1217.10s to 1227.08s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  74%|███████▍  | 32/43 [05:10<01:57, 10.64s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 1227.08s to 1247.03s
Transcribing audio for segment 32 from 1227.08s to 1247.03s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  77%|███████▋  | 33/43 [05:22<01:51, 11.17s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 1247.03s to 1257.01s
Transcribing audio for segment 33 from 1247.03s to 1257.01s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  79%|███████▉  | 34/43 [05:35<01:44, 11.64s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 1257.01s to 1286.93s
Transcribing audio for segment 34 from 1257.01s to 1286.93s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  81%|████████▏ | 35/43 [05:45<01:28, 11.04s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 1286.93s to 1306.89s
Transcribing audio for segment 35 from 1286.93s to 1306.89s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed:  84%|████████▎ | 36/43 [05:52<01:09,  9.94s/segment]

Summary generation for segment 35 completed.

Processing segment 36: 1306.89s to 1316.86s
Transcribing audio for segment 36 from 1306.89s to 1316.86s...
Transcription for segment 36 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 36...


Segments Processed:  86%|████████▌ | 37/43 [06:02<01:00, 10.10s/segment]

Summary generation for segment 36 completed.

Processing segment 37: 1316.86s to 1336.82s
Transcribing audio for segment 37 from 1316.86s to 1336.82s...
Transcription for segment 37 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 37...


Segments Processed:  88%|████████▊ | 38/43 [06:13<00:50, 10.10s/segment]

Summary generation for segment 37 completed.

Processing segment 38: 1336.82s to 1346.79s
Transcribing audio for segment 38 from 1336.82s to 1346.79s...
Transcription for segment 38 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 38...


Segments Processed:  91%|█████████ | 39/43 [06:21<00:38,  9.59s/segment]

Summary generation for segment 38 completed.

Processing segment 39: 1346.79s to 1376.72s
Transcribing audio for segment 39 from 1346.79s to 1376.72s...
Transcription for segment 39 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 39...


Segments Processed:  93%|█████████▎| 40/43 [06:29<00:27,  9.07s/segment]

Summary generation for segment 39 completed.

Processing segment 40: 1376.72s to 1386.70s
Transcribing audio for segment 40 from 1376.72s to 1386.70s...
Transcription for segment 40 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 40...


Segments Processed:  95%|█████████▌| 41/43 [06:37<00:17,  8.67s/segment]

Summary generation for segment 40 completed.

Processing segment 41: 1386.70s to 1396.67s
Transcribing audio for segment 41 from 1386.70s to 1396.67s...
Transcription for segment 41 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 41...


Segments Processed:  98%|█████████▊| 42/43 [06:48<00:09,  9.49s/segment]

Summary generation for segment 41 completed.

Processing segment 42: 1396.67s to 1397.91s
Transcribing audio for segment 42 from 1396.67s to 1397.91s...
Transcription for segment 42 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 42...


Segments Processed: 100%|██████████| 43/43 [06:54<00:00,  9.64s/segment]

Summary generation for segment 42 completed.

Processing completed.





Document saved as /content/outputs/B Change Management_ Video 4 7 Basisprozesse nach Glasl.docx
Successfully processed: /content/B Change Management_ Video 4 7 Basisprozesse nach Glasl.mp4
Output saved as: /content/outputs/B Change Management_ Video 4 7 Basisprozesse nach Glasl.docx

Processing: /content/Basic Finance - Teil 02.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 18013
Video duration: 600.43 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   2%|▏         | 300/18013 [00:00<00:50, 349.16frame/s]

Slide change detected at 10.00 seconds (frame 300). SSIM: 0.5267


Frames Processed:   5%|▍         | 900/18013 [00:02<00:51, 330.56frame/s]

Slide change detected at 30.00 seconds (frame 900). SSIM: 0.8496


Frames Processed:   8%|▊         | 1500/18013 [00:04<00:45, 362.77frame/s]

Slide change detected at 50.00 seconds (frame 1500). SSIM: 0.8391


Frames Processed:  23%|██▎       | 4200/18013 [00:08<00:19, 722.93frame/s]

Slide change detected at 140.00 seconds (frame 4200). SSIM: 0.8341


Frames Processed:  45%|████▍     | 8100/18013 [00:13<00:14, 688.89frame/s]

Slide change detected at 270.00 seconds (frame 8100). SSIM: 0.8744


Frames Processed:  52%|█████▏    | 9300/18013 [00:15<00:12, 674.01frame/s]

Slide change detected at 310.00 seconds (frame 9300). SSIM: 0.7874


Frames Processed:  95%|█████████▍| 17100/18013 [00:28<00:01, 488.78frame/s]

Slide change detected at 570.00 seconds (frame 17100). SSIM: 0.7500


Frames Processed:  98%|█████████▊| 17713/18013 [00:32<00:00, 540.94frame/s]


Adding final slide change at end of video (600.43 seconds).
Total slide changes detected: 8

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/8 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 10.00s
Transcribing audio for segment 0 from 0.00s to 10.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:  12%|█▎        | 1/8 [00:07<00:52,  7.48s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 10.00s to 30.00s
Transcribing audio for segment 1 from 10.00s to 30.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  25%|██▌       | 2/8 [00:13<00:40,  6.77s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 30.00s to 50.00s
Transcribing audio for segment 2 from 30.00s to 50.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  38%|███▊      | 3/8 [00:20<00:34,  6.88s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 50.00s to 140.00s
Transcribing audio for segment 3 from 50.00s to 140.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  50%|█████     | 4/8 [00:30<00:31,  7.93s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 140.00s to 270.00s
Transcribing audio for segment 4 from 140.00s to 270.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  62%|██████▎   | 5/8 [00:41<00:27,  9.17s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 270.00s to 310.00s
Transcribing audio for segment 5 from 270.00s to 310.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  75%|███████▌  | 6/8 [00:52<00:19,  9.67s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 310.00s to 570.00s
Transcribing audio for segment 6 from 310.00s to 570.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  88%|████████▊ | 7/8 [01:14<00:13, 13.77s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 570.00s to 600.43s
Transcribing audio for segment 7 from 570.00s to 600.43s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed: 100%|██████████| 8/8 [01:22<00:00, 10.33s/segment]

Summary generation for segment 7 completed.

Processing completed.





Document saved as /content/outputs/Basic Finance - Teil 02.docx
Successfully processed: /content/Basic Finance - Teil 02.mp4
Output saved as: /content/outputs/Basic Finance - Teil 02.docx

Processing: /content/B Change Management_ Video 2 Erste Modelle.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.942840129612645
Total frames: 22845
Video duration: 762.95 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   3%|▎         | 598/22845 [00:00<00:14, 1512.03frame/s]

Slide change detected at 9.99 seconds (frame 299). SSIM: 0.4689


Frames Processed:  10%|█         | 2392/22845 [00:01<00:12, 1670.71frame/s]

Slide change detected at 69.90 seconds (frame 2093). SSIM: 0.8388


Frames Processed:  13%|█▎        | 2990/22845 [00:01<00:12, 1603.22frame/s]

Slide change detected at 99.86 seconds (frame 2990). SSIM: 0.8760


Frames Processed:  29%|██▉       | 6578/22845 [00:04<00:11, 1451.02frame/s]

Slide change detected at 209.70 seconds (frame 6279). SSIM: 0.8217


Frames Processed:  37%|███▋      | 8372/22845 [00:05<00:11, 1265.97frame/s]

Slide change detected at 279.60 seconds (frame 8372). SSIM: 0.5568


Frames Processed:  39%|███▉      | 8970/22845 [00:06<00:11, 1197.78frame/s]

Slide change detected at 299.57 seconds (frame 8970). SSIM: 0.5452


Frames Processed:  42%|████▏     | 9568/22845 [00:06<00:11, 1194.68frame/s]

Slide change detected at 319.54 seconds (frame 9568). SSIM: 0.5990


Frames Processed:  46%|████▌     | 10465/22845 [00:07<00:10, 1184.23frame/s]

Slide change detected at 349.50 seconds (frame 10465). SSIM: 0.4421


Frames Processed:  47%|████▋     | 10764/22845 [00:07<00:10, 1205.27frame/s]

Slide change detected at 359.48 seconds (frame 10764). SSIM: 0.4447


Frames Processed:  50%|████▉     | 11362/22845 [00:08<00:09, 1186.66frame/s]

Slide change detected at 379.46 seconds (frame 11362). SSIM: 0.8075


Frames Processed:  59%|█████▉    | 13455/22845 [00:09<00:06, 1456.39frame/s]

Slide change detected at 449.36 seconds (frame 13455). SSIM: 0.5321


Frames Processed:  60%|██████    | 13754/22845 [00:09<00:06, 1440.02frame/s]

Slide change detected at 459.34 seconds (frame 13754). SSIM: 0.5131


Frames Processed:  72%|███████▏  | 16445/22845 [00:11<00:04, 1395.58frame/s]

Slide change detected at 549.21 seconds (frame 16445). SSIM: 0.7506


Frames Processed:  84%|████████▍ | 19136/22845 [00:13<00:02, 1535.49frame/s]

Slide change detected at 629.10 seconds (frame 18837). SSIM: 0.6019
Slide change detected at 639.08 seconds (frame 19136). SSIM: 0.8023


Frames Processed:  86%|████████▋ | 19734/22845 [00:14<00:01, 1579.82frame/s]

Slide change detected at 649.07 seconds (frame 19435). SSIM: 0.6165


Frames Processed:  94%|█████████▍| 21528/22845 [00:15<00:00, 1615.00frame/s]

Slide change detected at 718.97 seconds (frame 21528). SSIM: 0.5776


Frames Processed:  96%|█████████▌| 21827/22845 [00:15<00:00, 1430.37frame/s]

Slide change detected at 728.96 seconds (frame 21827). SSIM: 0.6254


Frames Processed:  97%|█████████▋| 22126/22845 [00:15<00:00, 1393.31frame/s]

Slide change detected at 738.94 seconds (frame 22126). SSIM: 0.8773


Frames Processed:  98%|█████████▊| 22425/22845 [00:15<00:00, 1364.00frame/s]

Slide change detected at 748.93 seconds (frame 22425). SSIM: 0.6414


Frames Processed:  99%|█████████▊| 22546/22845 [00:16<00:00, 1403.46frame/s]

Slide change detected at 758.91 seconds (frame 22724). SSIM: 0.5610
Adding final slide change at end of video (762.95 seconds).
Total slide changes detected: 22

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/22 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 9.99s
Transcribing audio for segment 0 from 0.00s to 9.99s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▍         | 1/22 [00:05<01:58,  5.64s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 9.99s to 69.90s
Transcribing audio for segment 1 from 9.99s to 69.90s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   9%|▉         | 2/22 [00:13<02:17,  6.88s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 69.90s to 99.86s
Transcribing audio for segment 2 from 69.90s to 99.86s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  14%|█▎        | 3/22 [00:22<02:30,  7.94s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 99.86s to 209.70s
Transcribing audio for segment 3 from 99.86s to 209.70s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  18%|█▊        | 4/22 [00:43<03:53, 12.99s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 209.70s to 279.60s
Transcribing audio for segment 4 from 209.70s to 279.60s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  23%|██▎       | 5/22 [00:54<03:31, 12.45s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 279.60s to 299.57s
Transcribing audio for segment 5 from 279.60s to 299.57s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  27%|██▋       | 6/22 [01:02<02:55, 10.94s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 299.57s to 319.54s
Transcribing audio for segment 6 from 299.57s to 319.54s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  32%|███▏      | 7/22 [01:09<02:23,  9.58s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 319.54s to 349.50s
Transcribing audio for segment 7 from 319.54s to 349.50s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  36%|███▋      | 8/22 [01:18<02:11,  9.42s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 349.50s to 359.48s
Transcribing audio for segment 8 from 349.50s to 359.48s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  41%|████      | 9/22 [01:25<01:52,  8.65s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 359.48s to 379.46s
Transcribing audio for segment 9 from 359.48s to 379.46s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  45%|████▌     | 10/22 [01:33<01:41,  8.50s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 379.46s to 449.36s
Transcribing audio for segment 10 from 379.46s to 449.36s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  50%|█████     | 11/22 [01:46<01:49,  9.91s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 449.36s to 459.34s
Transcribing audio for segment 11 from 449.36s to 459.34s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  55%|█████▍    | 12/22 [01:57<01:41, 10.17s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 459.34s to 549.21s
Transcribing audio for segment 12 from 459.34s to 549.21s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  59%|█████▉    | 13/22 [02:07<01:30, 10.05s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 549.21s to 629.10s
Transcribing audio for segment 13 from 549.21s to 629.10s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  64%|██████▎   | 14/22 [02:19<01:24, 10.56s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 629.10s to 639.08s
Transcribing audio for segment 14 from 629.10s to 639.08s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  68%|██████▊   | 15/22 [02:25<01:05,  9.39s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 639.08s to 649.07s
Transcribing audio for segment 15 from 639.08s to 649.07s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  73%|███████▎  | 16/22 [02:33<00:53,  8.87s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 649.07s to 718.97s
Transcribing audio for segment 16 from 649.07s to 718.97s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  77%|███████▋  | 17/22 [02:43<00:46,  9.32s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 718.97s to 728.96s
Transcribing audio for segment 17 from 718.97s to 728.96s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  82%|████████▏ | 18/22 [02:51<00:35,  8.75s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 728.96s to 738.94s
Transcribing audio for segment 18 from 728.96s to 738.94s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  86%|████████▋ | 19/22 [02:58<00:24,  8.16s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 738.94s to 748.93s
Transcribing audio for segment 19 from 738.94s to 748.93s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  91%|█████████ | 20/22 [03:10<00:19,  9.52s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 748.93s to 758.91s
Transcribing audio for segment 20 from 748.93s to 758.91s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  95%|█████████▌| 21/22 [03:15<00:08,  8.22s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 758.91s to 762.95s
Transcribing audio for segment 21 from 758.91s to 762.95s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed: 100%|██████████| 22/22 [03:22<00:00,  9.20s/segment]

Summary generation for segment 21 completed.

Processing completed.





Document saved as /content/outputs/B Change Management_ Video 2 Erste Modelle.docx
Successfully processed: /content/B Change Management_ Video 2 Erste Modelle.mp4
Output saved as: /content/outputs/B Change Management_ Video 2 Erste Modelle.docx

Processing: /content/Brand Management 3_ Brand Experience Management.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 12.423773760734939
Total frames: 27080
Video duration: 2179.69 seconds
Frame interval: 124 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 248/27080 [00:01<02:07, 211.20frame/s]

Slide change detected at 19.96 seconds (frame 248). SSIM: 0.6898


Frames Processed:   1%|▏         | 372/27080 [00:01<01:50, 241.70frame/s]

Slide change detected at 29.94 seconds (frame 372). SSIM: 0.8798


Frames Processed:   6%|▌         | 1612/27080 [00:05<01:29, 284.90frame/s]

Slide change detected at 129.75 seconds (frame 1612). SSIM: 0.7758


Frames Processed:  10%|▉         | 2604/27080 [00:09<01:33, 260.78frame/s]

Slide change detected at 209.60 seconds (frame 2604). SSIM: 0.7514


Frames Processed:  12%|█▏        | 3224/27080 [00:12<01:24, 281.58frame/s]

Slide change detected at 259.50 seconds (frame 3224). SSIM: 0.7045


Frames Processed:  16%|█▌        | 4216/27080 [00:15<01:13, 311.42frame/s]

Slide change detected at 339.35 seconds (frame 4216). SSIM: 0.6502


Frames Processed:  19%|█▉        | 5084/27080 [00:18<01:12, 302.97frame/s]

Slide change detected at 409.22 seconds (frame 5084). SSIM: 0.8594


Frames Processed:  21%|██        | 5580/27080 [00:20<01:28, 244.20frame/s]

Slide change detected at 449.14 seconds (frame 5580). SSIM: 0.6990


Frames Processed:  27%|██▋       | 7316/27080 [00:26<01:08, 290.24frame/s]

Slide change detected at 588.87 seconds (frame 7316). SSIM: 0.7664


Frames Processed:  33%|███▎      | 8804/27080 [00:31<01:03, 288.98frame/s]

Slide change detected at 708.64 seconds (frame 8804). SSIM: 0.8640


Frames Processed:  36%|███▌      | 9796/27080 [00:35<01:09, 248.60frame/s]

Slide change detected at 788.49 seconds (frame 9796). SSIM: 0.8392


Frames Processed:  44%|████▍     | 11904/27080 [00:42<00:50, 302.14frame/s]

Slide change detected at 958.16 seconds (frame 11904). SSIM: 0.5160


Frames Processed:  45%|████▌     | 12276/27080 [00:44<00:54, 269.43frame/s]

Slide change detected at 988.11 seconds (frame 12276). SSIM: 0.3974


Frames Processed:  46%|████▌     | 12524/27080 [00:45<00:52, 276.99frame/s]

Slide change detected at 1008.07 seconds (frame 12524). SSIM: 0.4387


Frames Processed:  49%|████▉     | 13268/27080 [00:48<01:05, 211.72frame/s]

Slide change detected at 1067.95 seconds (frame 13268). SSIM: 0.5444


Frames Processed:  50%|████▉     | 13516/27080 [00:49<00:58, 232.87frame/s]

Slide change detected at 1087.91 seconds (frame 13516). SSIM: 0.3665


Frames Processed:  52%|█████▏    | 14012/27080 [00:51<00:48, 268.79frame/s]

Slide change detected at 1127.84 seconds (frame 14012). SSIM: 0.2226


Frames Processed:  54%|█████▎    | 14508/27080 [00:53<00:44, 283.54frame/s]

Slide change detected at 1167.76 seconds (frame 14508). SSIM: 0.1866


Frames Processed:  55%|█████▍    | 14880/27080 [00:54<00:46, 260.93frame/s]

Slide change detected at 1197.70 seconds (frame 14880). SSIM: 0.2773


Frames Processed:  56%|█████▋    | 15252/27080 [00:55<00:46, 252.66frame/s]

Slide change detected at 1227.65 seconds (frame 15252). SSIM: 0.2161


Frames Processed:  57%|█████▋    | 15500/27080 [00:57<00:46, 247.96frame/s]

Slide change detected at 1247.61 seconds (frame 15500). SSIM: 0.1849


Frames Processed:  58%|█████▊    | 15624/27080 [00:57<00:47, 242.51frame/s]

Slide change detected at 1257.59 seconds (frame 15624). SSIM: 0.8448


Frames Processed:  60%|█████▉    | 16120/27080 [00:59<00:44, 246.35frame/s]

Slide change detected at 1297.51 seconds (frame 16120). SSIM: 0.2625


Frames Processed:  62%|██████▏   | 16740/27080 [01:02<00:44, 234.37frame/s]

Slide change detected at 1347.42 seconds (frame 16740). SSIM: 0.3241


Frames Processed:  64%|██████▎   | 17236/27080 [01:04<00:34, 281.40frame/s]

Slide change detected at 1387.34 seconds (frame 17236). SSIM: 0.6615


Frames Processed:  65%|██████▌   | 17732/27080 [01:05<00:35, 266.33frame/s]

Slide change detected at 1427.26 seconds (frame 17732). SSIM: 0.8268


Frames Processed:  66%|██████▋   | 17980/27080 [01:06<00:34, 261.21frame/s]

Slide change detected at 1447.23 seconds (frame 17980). SSIM: 0.5569


Frames Processed:  71%|███████   | 19220/27080 [01:11<00:26, 300.01frame/s]

Slide change detected at 1547.03 seconds (frame 19220). SSIM: 0.6593


Frames Processed:  71%|███████▏  | 19344/27080 [01:11<00:25, 297.61frame/s]

Slide change detected at 1557.01 seconds (frame 19344). SSIM: 0.7998


Frames Processed:  72%|███████▏  | 19468/27080 [01:12<00:26, 287.42frame/s]

Slide change detected at 1567.00 seconds (frame 19468). SSIM: 0.5667


Frames Processed:  75%|███████▌  | 20336/27080 [01:16<00:28, 236.76frame/s]

Slide change detected at 1636.86 seconds (frame 20336). SSIM: 0.6814


Frames Processed:  81%|████████  | 21948/27080 [01:21<00:16, 320.46frame/s]

Slide change detected at 1766.61 seconds (frame 21948). SSIM: 0.7355


Frames Processed:  84%|████████▍ | 22816/27080 [01:23<00:13, 314.23frame/s]

Slide change detected at 1836.48 seconds (frame 22816). SSIM: 0.5675


Frames Processed:  86%|████████▌ | 23312/27080 [01:25<00:12, 301.28frame/s]

Slide change detected at 1876.40 seconds (frame 23312). SSIM: 0.3028


Frames Processed:  89%|████████▉ | 24056/27080 [01:29<00:13, 226.72frame/s]

Slide change detected at 1936.29 seconds (frame 24056). SSIM: 0.3022


Frames Processed:  91%|█████████ | 24552/27080 [01:31<00:10, 251.41frame/s]

Slide change detected at 1976.21 seconds (frame 24552). SSIM: 0.3566


Frames Processed:  94%|█████████▍| 25544/27080 [01:34<00:05, 281.75frame/s]

Slide change detected at 2056.06 seconds (frame 25544). SSIM: 0.3681


Frames Processed:  95%|█████████▌| 25792/27080 [01:35<00:04, 277.32frame/s]

Slide change detected at 2076.02 seconds (frame 25792). SSIM: 0.4558


Frames Processed:  99%|█████████▉| 26908/27080 [01:39<00:00, 267.63frame/s]

Slide change detected at 2165.85 seconds (frame 26908). SSIM: 0.5802


Frames Processed: 100%|█████████▉| 26956/27080 [01:39<00:00, 269.64frame/s]


Adding final slide change at end of video (2179.69 seconds).
Total slide changes detected: 40

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/40 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.96s
Transcribing audio for segment 0 from 0.00s to 19.96s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   2%|▎         | 1/40 [00:06<04:01,  6.18s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.96s to 29.94s
Transcribing audio for segment 1 from 19.96s to 29.94s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   5%|▌         | 2/40 [00:13<04:19,  6.84s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 29.94s to 129.75s
Transcribing audio for segment 2 from 29.94s to 129.75s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   8%|▊         | 3/40 [00:25<05:39,  9.16s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 129.75s to 209.60s
Transcribing audio for segment 3 from 129.75s to 209.60s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  10%|█         | 4/40 [00:37<06:11, 10.31s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 209.60s to 259.50s
Transcribing audio for segment 4 from 209.60s to 259.50s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  12%|█▎        | 5/40 [00:45<05:35,  9.58s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 259.50s to 339.35s
Transcribing audio for segment 5 from 259.50s to 339.35s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  15%|█▌        | 6/40 [00:57<05:55, 10.47s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 339.35s to 409.22s
Transcribing audio for segment 6 from 339.35s to 409.22s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  18%|█▊        | 7/40 [01:13<06:35, 11.97s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 409.22s to 449.14s
Transcribing audio for segment 7 from 409.22s to 449.14s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  20%|██        | 8/40 [01:21<05:49, 10.93s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 449.14s to 588.87s
Transcribing audio for segment 8 from 449.14s to 588.87s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  22%|██▎       | 9/40 [01:39<06:45, 13.07s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 588.87s to 708.64s
Transcribing audio for segment 9 from 588.87s to 708.64s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  25%|██▌       | 10/40 [01:54<06:46, 13.54s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 708.64s to 788.49s
Transcribing audio for segment 10 from 708.64s to 788.49s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  28%|██▊       | 11/40 [02:05<06:14, 12.93s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 788.49s to 958.16s
Transcribing audio for segment 11 from 788.49s to 958.16s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  30%|███       | 12/40 [02:22<06:36, 14.17s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 958.16s to 988.11s
Transcribing audio for segment 12 from 958.16s to 988.11s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  32%|███▎      | 13/40 [02:32<05:47, 12.86s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 988.11s to 1008.07s
Transcribing audio for segment 13 from 988.11s to 1008.07s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  35%|███▌      | 14/40 [02:40<05:00, 11.54s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1008.07s to 1067.95s
Transcribing audio for segment 14 from 1008.07s to 1067.95s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  38%|███▊      | 15/40 [02:53<04:59, 11.97s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1067.95s to 1087.91s
Transcribing audio for segment 15 from 1067.95s to 1087.91s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  40%|████      | 16/40 [03:05<04:42, 11.78s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1087.91s to 1127.84s
Transcribing audio for segment 16 from 1087.91s to 1127.84s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  42%|████▎     | 17/40 [03:14<04:14, 11.08s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1127.84s to 1167.76s
Transcribing audio for segment 17 from 1127.84s to 1167.76s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  45%|████▌     | 18/40 [03:23<03:50, 10.47s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1167.76s to 1197.70s
Transcribing audio for segment 18 from 1167.76s to 1197.70s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  48%|████▊     | 19/40 [03:31<03:21,  9.58s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1197.70s to 1227.65s
Transcribing audio for segment 19 from 1197.70s to 1227.65s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  50%|█████     | 20/40 [03:40<03:07,  9.39s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 1227.65s to 1247.61s
Transcribing audio for segment 20 from 1227.65s to 1247.61s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  52%|█████▎    | 21/40 [03:47<02:48,  8.86s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 1247.61s to 1257.59s
Transcribing audio for segment 21 from 1247.61s to 1257.59s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  55%|█████▌    | 22/40 [03:55<02:33,  8.52s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 1257.59s to 1297.51s
Transcribing audio for segment 22 from 1257.59s to 1297.51s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  57%|█████▊    | 23/40 [04:05<02:29,  8.81s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 1297.51s to 1347.42s
Transcribing audio for segment 23 from 1297.51s to 1347.42s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  60%|██████    | 24/40 [04:14<02:24,  9.05s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 1347.42s to 1387.34s
Transcribing audio for segment 24 from 1347.42s to 1387.34s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  62%|██████▎   | 25/40 [04:24<02:18,  9.26s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 1387.34s to 1427.26s
Transcribing audio for segment 25 from 1387.34s to 1427.26s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  65%|██████▌   | 26/40 [04:34<02:12,  9.46s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 1427.26s to 1447.23s
Transcribing audio for segment 26 from 1427.26s to 1447.23s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  68%|██████▊   | 27/40 [04:45<02:08,  9.91s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 1447.23s to 1547.03s
Transcribing audio for segment 27 from 1447.23s to 1547.03s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  70%|███████   | 28/40 [04:59<02:12, 11.07s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 1547.03s to 1557.01s
Transcribing audio for segment 28 from 1547.03s to 1557.01s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  72%|███████▎  | 29/40 [05:09<01:59, 10.89s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 1557.01s to 1567.00s
Transcribing audio for segment 29 from 1557.01s to 1567.00s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  75%|███████▌  | 30/40 [05:21<01:51, 11.18s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 1567.00s to 1636.86s
Transcribing audio for segment 30 from 1567.00s to 1636.86s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  78%|███████▊  | 31/40 [05:35<01:47, 11.91s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 1636.86s to 1766.61s
Transcribing audio for segment 31 from 1636.86s to 1766.61s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  80%|████████  | 32/40 [05:49<01:40, 12.59s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 1766.61s to 1836.48s
Transcribing audio for segment 32 from 1766.61s to 1836.48s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  82%|████████▎ | 33/40 [05:58<01:21, 11.68s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 1836.48s to 1876.40s
Transcribing audio for segment 33 from 1836.48s to 1876.40s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  85%|████████▌ | 34/40 [06:08<01:07, 11.20s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 1876.40s to 1936.29s
Transcribing audio for segment 34 from 1876.40s to 1936.29s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  88%|████████▊ | 35/40 [06:20<00:56, 11.39s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 1936.29s to 1976.21s
Transcribing audio for segment 35 from 1936.29s to 1976.21s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed:  90%|█████████ | 36/40 [06:32<00:45, 11.40s/segment]

Summary generation for segment 35 completed.

Processing segment 36: 1976.21s to 2056.06s
Transcribing audio for segment 36 from 1976.21s to 2056.06s...
Transcription for segment 36 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 36...


Segments Processed:  92%|█████████▎| 37/40 [06:43<00:34, 11.37s/segment]

Summary generation for segment 36 completed.

Processing segment 37: 2056.06s to 2076.02s
Transcribing audio for segment 37 from 2056.06s to 2076.02s...
Transcription for segment 37 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 37...


Segments Processed:  95%|█████████▌| 38/40 [06:52<00:21, 10.59s/segment]

Summary generation for segment 37 completed.

Processing segment 38: 2076.02s to 2165.85s
Transcribing audio for segment 38 from 2076.02s to 2165.85s...
Transcription for segment 38 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 38...


Segments Processed:  98%|█████████▊| 39/40 [07:03<00:10, 10.89s/segment]

Summary generation for segment 38 completed.

Processing segment 39: 2165.85s to 2179.69s
Transcribing audio for segment 39 from 2165.85s to 2179.69s...
Transcription for segment 39 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 39...


Segments Processed: 100%|██████████| 40/40 [07:12<00:00, 10.80s/segment]

Summary generation for segment 39 completed.

Processing completed.





Document saved as /content/outputs/Brand Management 3_ Brand Experience Management.docx
Successfully processed: /content/Brand Management 3_ Brand Experience Management.mp4
Output saved as: /content/outputs/Brand Management 3_ Brand Experience Management.docx

Processing: /content/03 Kategorisierung und Priorisierung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 53309
Video duration: 1776.97 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 300/53309 [00:01<05:04, 174.01frame/s]

Slide change detected at 10.00 seconds (frame 300). SSIM: 0.8913


Frames Processed:   1%|          | 600/53309 [00:03<04:28, 196.17frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.3182


Frames Processed:   2%|▏         | 900/53309 [00:04<04:05, 213.07frame/s]

Slide change detected at 30.00 seconds (frame 900). SSIM: 0.7045


Frames Processed:   6%|▌         | 3300/53309 [00:15<03:55, 211.92frame/s]

Slide change detected at 110.00 seconds (frame 3300). SSIM: 0.8215


Frames Processed:  12%|█▏        | 6300/53309 [00:28<03:32, 221.47frame/s]

Slide change detected at 210.00 seconds (frame 6300). SSIM: 0.6936


Frames Processed:  25%|██▍       | 13200/53309 [00:59<02:46, 240.41frame/s]

Slide change detected at 440.00 seconds (frame 13200). SSIM: 0.7338


Frames Processed:  26%|██▌       | 13800/53309 [01:01<02:43, 242.23frame/s]

Slide change detected at 460.00 seconds (frame 13800). SSIM: 0.8121


Frames Processed:  41%|████      | 21900/53309 [01:37<02:12, 236.75frame/s]

Slide change detected at 730.00 seconds (frame 21900). SSIM: 0.7429


Frames Processed:  46%|████▌     | 24600/53309 [01:49<02:01, 235.43frame/s]

Slide change detected at 820.00 seconds (frame 24600). SSIM: 0.7542


Frames Processed:  61%|██████    | 32400/53309 [02:24<01:50, 189.35frame/s]

Slide change detected at 1080.00 seconds (frame 32400). SSIM: 0.8702


Frames Processed:  67%|██████▋   | 35700/53309 [02:39<01:24, 208.37frame/s]

Slide change detected at 1190.00 seconds (frame 35700). SSIM: 0.7984


Frames Processed:  77%|███████▋  | 40800/53309 [03:00<00:42, 293.82frame/s]

Slide change detected at 1360.00 seconds (frame 40800). SSIM: 0.8110


Frames Processed:  87%|████████▋ | 46200/53309 [03:11<00:13, 544.31frame/s]

Slide change detected at 1540.00 seconds (frame 46200). SSIM: 0.7997


Frames Processed:  95%|█████████▌| 50700/53309 [03:20<00:05, 453.29frame/s]

Slide change detected at 1690.00 seconds (frame 50700). SSIM: 0.7743


Frames Processed:  99%|█████████▉| 53009/53309 [03:25<00:01, 257.82frame/s]


Adding final slide change at end of video (1776.97 seconds).
Total slide changes detected: 15

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/15 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 10.00s
Transcribing audio for segment 0 from 0.00s to 10.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   7%|▋         | 1/15 [00:12<02:55, 12.56s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 10.00s to 20.00s
Transcribing audio for segment 1 from 10.00s to 20.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  13%|█▎        | 2/15 [00:23<02:34, 11.88s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 20.00s to 30.00s
Transcribing audio for segment 2 from 20.00s to 30.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  20%|██        | 3/15 [00:32<02:06, 10.56s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 30.00s to 110.00s
Transcribing audio for segment 3 from 30.00s to 110.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  27%|██▋       | 4/15 [00:41<01:49,  9.94s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 110.00s to 210.00s
Transcribing audio for segment 4 from 110.00s to 210.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  33%|███▎      | 5/15 [00:51<01:38,  9.81s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 210.00s to 440.00s
Transcribing audio for segment 5 from 210.00s to 440.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  40%|████      | 6/15 [01:13<02:06, 14.03s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 440.00s to 460.00s
Transcribing audio for segment 6 from 440.00s to 460.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  47%|████▋     | 7/15 [01:21<01:36, 12.07s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 460.00s to 730.00s
Transcribing audio for segment 7 from 460.00s to 730.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  53%|█████▎    | 8/15 [01:39<01:37, 13.99s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 730.00s to 820.00s
Transcribing audio for segment 8 from 730.00s to 820.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  60%|██████    | 9/15 [01:53<01:23, 13.95s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 820.00s to 1080.00s
Transcribing audio for segment 9 from 820.00s to 1080.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  67%|██████▋   | 10/15 [02:13<01:19, 15.82s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1080.00s to 1190.00s
Transcribing audio for segment 10 from 1080.00s to 1190.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  73%|███████▎  | 11/15 [02:28<01:02, 15.51s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1190.00s to 1360.00s
Transcribing audio for segment 11 from 1190.00s to 1360.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  80%|████████  | 12/15 [02:46<00:48, 16.26s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1360.00s to 1540.00s
Transcribing audio for segment 12 from 1360.00s to 1540.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  87%|████████▋ | 13/15 [03:03<00:32, 16.49s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1540.00s to 1690.00s
Transcribing audio for segment 13 from 1540.00s to 1690.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  93%|█████████▎| 14/15 [03:18<00:15, 15.97s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1690.00s to 1776.97s
Transcribing audio for segment 14 from 1690.00s to 1776.97s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed: 100%|██████████| 15/15 [03:32<00:00, 14.17s/segment]

Summary generation for segment 14 completed.

Processing completed.





Document saved as /content/outputs/03 Kategorisierung und Priorisierung.docx
Successfully processed: /content/03 Kategorisierung und Priorisierung.mp4
Output saved as: /content/outputs/03 Kategorisierung und Priorisierung.docx

Processing: /content/Modul 12 Teil C - Operative Steuerung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.99437278271674
Total frames: 34945
Video duration: 1165.05 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   2%|▏         | 598/34945 [00:02<02:38, 216.31frame/s]

Slide change detected at 19.94 seconds (frame 598). SSIM: 0.4400


Frames Processed:   7%|▋         | 2392/34945 [00:10<02:08, 253.22frame/s]

Slide change detected at 79.75 seconds (frame 2392). SSIM: 0.7724


Frames Processed:  24%|██▍       | 8372/34945 [00:18<00:33, 798.95frame/s]

Slide change detected at 279.12 seconds (frame 8372). SSIM: 0.7668


Frames Processed:  30%|██▉       | 10465/34945 [00:26<01:25, 287.37frame/s]

Slide change detected at 348.90 seconds (frame 10465). SSIM: 0.7219


Frames Processed:  39%|███▊      | 13455/34945 [00:34<01:02, 346.58frame/s]

Slide change detected at 448.58 seconds (frame 13455). SSIM: 0.7113


Frames Processed:  57%|█████▋    | 20033/34945 [00:52<00:42, 352.00frame/s]

Slide change detected at 667.89 seconds (frame 20033). SSIM: 0.7838


Frames Processed:  73%|███████▎  | 25415/34945 [01:13<00:38, 247.69frame/s]

Slide change detected at 847.33 seconds (frame 25415). SSIM: 0.8227


Frames Processed:  79%|███████▊  | 27508/34945 [01:20<00:21, 345.46frame/s]

Slide change detected at 917.11 seconds (frame 27508). SSIM: 0.8223


Frames Processed:  94%|█████████▍| 32890/34945 [01:32<00:04, 504.78frame/s]

Slide change detected at 1096.54 seconds (frame 32890). SSIM: 0.8088


Frames Processed:  99%|█████████▉| 34646/34945 [01:35<00:00, 361.11frame/s]


Adding final slide change at end of video (1165.05 seconds).
Total slide changes detected: 10

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/10 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.94s
Transcribing audio for segment 0 from 0.00s to 19.94s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:  10%|█         | 1/10 [00:06<00:58,  6.55s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.94s to 79.75s
Transcribing audio for segment 1 from 19.94s to 79.75s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  20%|██        | 2/10 [00:17<01:11,  8.90s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 79.75s to 279.12s
Transcribing audio for segment 2 from 79.75s to 279.12s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  30%|███       | 3/10 [00:36<01:34, 13.56s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 279.12s to 348.90s
Transcribing audio for segment 3 from 279.12s to 348.90s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  40%|████      | 4/10 [00:46<01:13, 12.21s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 348.90s to 448.58s
Transcribing audio for segment 4 from 348.90s to 448.58s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  50%|█████     | 5/10 [01:00<01:04, 12.84s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 448.58s to 667.89s
Transcribing audio for segment 5 from 448.58s to 667.89s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  60%|██████    | 6/10 [01:18<00:57, 14.49s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 667.89s to 847.33s
Transcribing audio for segment 6 from 667.89s to 847.33s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  70%|███████   | 7/10 [01:33<00:44, 14.88s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 847.33s to 917.11s
Transcribing audio for segment 7 from 847.33s to 917.11s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  80%|████████  | 8/10 [01:46<00:28, 14.22s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 917.11s to 1096.54s
Transcribing audio for segment 8 from 917.11s to 1096.54s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  90%|█████████ | 9/10 [02:01<00:14, 14.60s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 1096.54s to 1165.05s
Transcribing audio for segment 9 from 1096.54s to 1165.05s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed: 100%|██████████| 10/10 [02:12<00:00, 13.21s/segment]

Summary generation for segment 9 completed.

Processing completed.





Document saved as /content/outputs/Modul 12 Teil C - Operative Steuerung.docx
Successfully processed: /content/Modul 12 Teil C - Operative Steuerung.mp4
Output saved as: /content/outputs/Modul 12 Teil C - Operative Steuerung.docx

Processing: /content/Modul 12 Teil B - Strategische Steuerung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.995907421683086
Total frames: 132429
Video duration: 4414.90 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   0%|          | 299/132429 [00:01<11:37, 189.33frame/s]

Slide change detected at 9.97 seconds (frame 299). SSIM: 0.4279


Frames Processed:   1%|▏         | 1794/132429 [00:07<08:34, 254.14frame/s]

Slide change detected at 59.81 seconds (frame 1794). SSIM: 0.7681


Frames Processed:  18%|█▊        | 23920/132429 [00:48<03:23, 533.12frame/s]

Slide change detected at 797.44 seconds (frame 23920). SSIM: 0.7969


Frames Processed:  23%|██▎       | 31096/132429 [01:03<04:31, 372.77frame/s]

Slide change detected at 1036.67 seconds (frame 31096). SSIM: 0.7870


Frames Processed:  26%|██▋       | 34983/132429 [01:19<05:46, 280.84frame/s]

Slide change detected at 1166.26 seconds (frame 34983). SSIM: 0.7132


Frames Processed:  31%|███       | 40664/132429 [01:30<03:58, 383.97frame/s]

Slide change detected at 1355.65 seconds (frame 40664). SSIM: 0.7030


Frames Processed:  33%|███▎      | 44252/132429 [01:43<05:08, 286.18frame/s]

Slide change detected at 1475.27 seconds (frame 44252). SSIM: 0.7528


Frames Processed:  36%|███▌      | 47242/132429 [01:47<02:08, 660.62frame/s]

Slide change detected at 1574.95 seconds (frame 47242). SSIM: 0.7332


Frames Processed:  37%|███▋      | 48737/132429 [01:50<01:57, 711.71frame/s]

Slide change detected at 1624.79 seconds (frame 48737). SSIM: 0.7016


Frames Processed:  38%|███▊      | 50531/132429 [01:52<02:04, 659.97frame/s]

Slide change detected at 1684.60 seconds (frame 50531). SSIM: 0.7574


Frames Processed:  41%|████      | 53820/132429 [01:58<02:40, 488.31frame/s]

Slide change detected at 1794.24 seconds (frame 53820). SSIM: 0.7338


Frames Processed:  47%|████▋     | 62192/132429 [02:27<03:37, 322.53frame/s]

Slide change detected at 2073.35 seconds (frame 62192). SSIM: 0.7497


Frames Processed:  48%|████▊     | 63089/132429 [02:30<03:26, 335.64frame/s]

Slide change detected at 2103.25 seconds (frame 63089). SSIM: 0.7510


Frames Processed:  50%|████▉     | 65780/132429 [02:41<04:31, 245.83frame/s]

Slide change detected at 2192.97 seconds (frame 65780). SSIM: 0.7482


Frames Processed:  51%|█████▏    | 67873/132429 [02:51<04:39, 231.06frame/s]

Slide change detected at 2262.74 seconds (frame 67873). SSIM: 0.7449


Frames Processed:  56%|█████▌    | 74152/132429 [03:17<05:13, 185.95frame/s]

Slide change detected at 2472.07 seconds (frame 74152). SSIM: 0.7006


Frames Processed:  56%|█████▌    | 74451/132429 [03:19<04:45, 203.27frame/s]

Slide change detected at 2482.04 seconds (frame 74451). SSIM: 0.7002


Frames Processed:  57%|█████▋    | 75647/132429 [03:23<03:44, 252.54frame/s]

Slide change detected at 2521.91 seconds (frame 75647). SSIM: 0.7049


Frames Processed:  60%|██████    | 79534/132429 [03:37<02:46, 317.08frame/s]

Slide change detected at 2651.50 seconds (frame 79534). SSIM: 0.6465


Frames Processed:  64%|██████▎   | 84318/132429 [03:48<01:28, 543.29frame/s]

Slide change detected at 2810.98 seconds (frame 84318). SSIM: 0.6929


Frames Processed:  64%|██████▍   | 85215/132429 [03:50<01:21, 577.04frame/s]

Slide change detected at 2840.89 seconds (frame 85215). SSIM: 0.7581


Frames Processed:  67%|██████▋   | 88504/132429 [03:56<01:22, 532.58frame/s]

Slide change detected at 2950.54 seconds (frame 88504). SSIM: 0.7674


Frames Processed:  70%|██████▉   | 92690/132429 [04:03<01:21, 488.43frame/s]

Slide change detected at 3090.09 seconds (frame 92690). SSIM: 0.5888


Frames Processed:  75%|███████▍  | 98670/132429 [04:25<01:53, 296.66frame/s]

Slide change detected at 3289.45 seconds (frame 98670). SSIM: 0.7210


Frames Processed:  76%|███████▌  | 100165/132429 [04:29<01:11, 448.47frame/s]

Slide change detected at 3339.29 seconds (frame 100165). SSIM: 0.6463


Frames Processed:  79%|███████▊  | 104052/132429 [04:38<01:32, 305.41frame/s]

Slide change detected at 3468.87 seconds (frame 104052). SSIM: 0.7405


Frames Processed:  83%|████████▎ | 109434/132429 [04:47<00:44, 520.47frame/s]

Slide change detected at 3648.30 seconds (frame 109434). SSIM: 0.7431


Frames Processed:  87%|████████▋ | 115414/132429 [05:05<01:05, 258.23frame/s]

Slide change detected at 3847.66 seconds (frame 115414). SSIM: 0.7595


Frames Processed:  90%|█████████ | 119301/132429 [05:15<00:47, 277.75frame/s]

Slide change detected at 3977.24 seconds (frame 119301). SSIM: 0.7873


Frames Processed:  92%|█████████▏| 122291/132429 [05:27<00:42, 238.40frame/s]

Slide change detected at 4076.92 seconds (frame 122291). SSIM: 0.7541


Frames Processed:  98%|█████████▊| 129168/132429 [05:53<00:13, 234.69frame/s]

Slide change detected at 4306.19 seconds (frame 129168). SSIM: 0.6749


Frames Processed:  98%|█████████▊| 130065/132429 [05:57<00:09, 243.55frame/s]

Slide change detected at 4336.09 seconds (frame 130065). SSIM: 0.6821


Frames Processed:  98%|█████████▊| 130364/132429 [05:58<00:08, 246.92frame/s]

Slide change detected at 4346.06 seconds (frame 130364). SSIM: 0.6846


Frames Processed:  99%|█████████▉| 130962/132429 [06:00<00:05, 254.22frame/s]

Slide change detected at 4366.00 seconds (frame 130962). SSIM: 0.6801


Frames Processed: 100%|█████████▉| 132130/132429 [06:04<00:00, 362.24frame/s]

Slide change detected at 4405.87 seconds (frame 132158). SSIM: 0.8000
Adding final slide change at end of video (4414.90 seconds).
Total slide changes detected: 36

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/36 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 9.97s
Transcribing audio for segment 0 from 0.00s to 9.97s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   3%|▎         | 1/36 [00:06<03:54,  6.69s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 9.97s to 59.81s
Transcribing audio for segment 1 from 9.97s to 59.81s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   6%|▌         | 2/36 [00:19<05:59, 10.57s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 59.81s to 797.44s
Transcribing audio for segment 2 from 59.81s to 797.44s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   8%|▊         | 3/36 [01:04<14:23, 26.18s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 797.44s to 1036.67s
Transcribing audio for segment 3 from 797.44s to 1036.67s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  11%|█         | 4/36 [01:31<14:04, 26.40s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 1036.67s to 1166.26s
Transcribing audio for segment 4 from 1036.67s to 1166.26s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  14%|█▍        | 5/36 [01:46<11:34, 22.41s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 1166.26s to 1355.65s
Transcribing audio for segment 5 from 1166.26s to 1355.65s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  17%|█▋        | 6/36 [02:07<10:55, 21.84s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 1355.65s to 1475.27s
Transcribing audio for segment 6 from 1355.65s to 1475.27s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  19%|█▉        | 7/36 [02:20<09:11, 19.03s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 1475.27s to 1574.95s
Transcribing audio for segment 7 from 1475.27s to 1574.95s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  22%|██▏       | 8/36 [02:35<08:18, 17.79s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 1574.95s to 1624.79s
Transcribing audio for segment 8 from 1574.95s to 1624.79s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  25%|██▌       | 9/36 [02:49<07:23, 16.41s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 1624.79s to 1684.60s
Transcribing audio for segment 9 from 1624.79s to 1684.60s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  28%|██▊       | 10/36 [03:06<07:11, 16.61s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1684.60s to 1794.24s
Transcribing audio for segment 10 from 1684.60s to 1794.24s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  31%|███       | 11/36 [03:22<06:55, 16.61s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1794.24s to 2073.35s
Transcribing audio for segment 11 from 1794.24s to 2073.35s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  33%|███▎      | 12/36 [03:43<07:08, 17.85s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 2073.35s to 2103.25s
Transcribing audio for segment 12 from 2073.35s to 2103.25s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  36%|███▌      | 13/36 [03:55<06:05, 15.90s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 2103.25s to 2192.97s
Transcribing audio for segment 13 from 2103.25s to 2192.97s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  39%|███▉      | 14/36 [04:10<05:44, 15.68s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 2192.97s to 2262.74s
Transcribing audio for segment 14 from 2192.97s to 2262.74s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  42%|████▏     | 15/36 [04:23<05:14, 14.98s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 2262.74s to 2472.07s
Transcribing audio for segment 15 from 2262.74s to 2472.07s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  44%|████▍     | 16/36 [04:41<05:19, 15.98s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 2472.07s to 2482.04s
Transcribing audio for segment 16 from 2472.07s to 2482.04s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  47%|████▋     | 17/36 [04:51<04:26, 14.01s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 2482.04s to 2521.91s
Transcribing audio for segment 17 from 2482.04s to 2521.91s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  50%|█████     | 18/36 [05:05<04:11, 13.98s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 2521.91s to 2651.50s
Transcribing audio for segment 18 from 2521.91s to 2651.50s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  53%|█████▎    | 19/36 [05:21<04:07, 14.58s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 2651.50s to 2810.98s
Transcribing audio for segment 19 from 2651.50s to 2810.98s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  56%|█████▌    | 20/36 [05:40<04:16, 16.06s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 2810.98s to 2840.89s
Transcribing audio for segment 20 from 2810.98s to 2840.89s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  58%|█████▊    | 21/36 [05:53<03:44, 14.98s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 2840.89s to 2950.54s
Transcribing audio for segment 21 from 2840.89s to 2950.54s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  61%|██████    | 22/36 [06:11<03:44, 16.02s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 2950.54s to 3090.09s
Transcribing audio for segment 22 from 2950.54s to 3090.09s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  64%|██████▍   | 23/36 [06:32<03:48, 17.61s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 3090.09s to 3289.45s
Transcribing audio for segment 23 from 3090.09s to 3289.45s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  67%|██████▋   | 24/36 [06:53<03:42, 18.51s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 3289.45s to 3339.29s
Transcribing audio for segment 24 from 3289.45s to 3339.29s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  69%|██████▉   | 25/36 [07:08<03:12, 17.53s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 3339.29s to 3468.87s
Transcribing audio for segment 25 from 3339.29s to 3468.87s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  72%|███████▏  | 26/36 [07:27<02:59, 17.90s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 3468.87s to 3648.30s
Transcribing audio for segment 26 from 3468.87s to 3648.30s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  75%|███████▌  | 27/36 [07:49<02:53, 19.23s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 3648.30s to 3847.66s
Transcribing audio for segment 27 from 3648.30s to 3847.66s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  78%|███████▊  | 28/36 [08:09<02:33, 19.23s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 3847.66s to 3977.24s
Transcribing audio for segment 28 from 3847.66s to 3977.24s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  81%|████████  | 29/36 [08:28<02:14, 19.22s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 3977.24s to 4076.92s
Transcribing audio for segment 29 from 3977.24s to 4076.92s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  83%|████████▎ | 30/36 [08:46<01:53, 18.92s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 4076.92s to 4306.19s
Transcribing audio for segment 30 from 4076.92s to 4306.19s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  86%|████████▌ | 31/36 [09:09<01:40, 20.10s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 4306.19s to 4336.09s
Transcribing audio for segment 31 from 4306.19s to 4336.09s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  89%|████████▉ | 32/36 [09:26<01:16, 19.13s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 4336.09s to 4346.06s
Transcribing audio for segment 32 from 4336.09s to 4346.06s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  92%|█████████▏| 33/36 [09:37<00:50, 16.79s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 4346.06s to 4366.00s
Transcribing audio for segment 33 from 4346.06s to 4366.00s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  94%|█████████▍| 34/36 [09:52<00:32, 16.17s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 4366.00s to 4405.87s
Transcribing audio for segment 34 from 4366.00s to 4405.87s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  97%|█████████▋| 35/36 [10:05<00:15, 15.41s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 4405.87s to 4414.90s
Transcribing audio for segment 35 from 4405.87s to 4414.90s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed: 100%|██████████| 36/36 [10:17<00:00, 17.14s/segment]

Summary generation for segment 35 completed.

Processing completed.





Document saved as /content/outputs/Modul 12 Teil B - Strategische Steuerung.docx
Successfully processed: /content/Modul 12 Teil B - Strategische Steuerung.mp4
Output saved as: /content/outputs/Modul 12 Teil B - Strategische Steuerung.docx

Processing: /content/Brand Management 5_ Markenportfolio-Management.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 12.084670513810503
Total frames: 30385
Video duration: 2514.34 seconds
Frame interval: 120 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 360/30385 [00:01<01:54, 262.63frame/s]

Slide change detected at 29.79 seconds (frame 360). SSIM: 0.6025


Frames Processed:   3%|▎         | 960/30385 [00:03<01:38, 298.07frame/s]

Slide change detected at 79.44 seconds (frame 960). SSIM: 0.6675


Frames Processed:   4%|▍         | 1200/30385 [00:04<01:39, 293.37frame/s]

Slide change detected at 99.30 seconds (frame 1200). SSIM: 0.8478


Frames Processed:   7%|▋         | 2160/30385 [00:07<01:40, 281.04frame/s]

Slide change detected at 178.74 seconds (frame 2160). SSIM: 0.8996


Frames Processed:   8%|▊         | 2520/30385 [00:09<01:58, 234.17frame/s]

Slide change detected at 208.53 seconds (frame 2520). SSIM: 0.8815


Frames Processed:  10%|█         | 3120/30385 [00:11<01:48, 252.04frame/s]

Slide change detected at 258.18 seconds (frame 3120). SSIM: 0.8104


Frames Processed:  16%|█▌        | 4800/30385 [00:17<01:23, 305.10frame/s]

Slide change detected at 397.20 seconds (frame 4800). SSIM: 0.8427


Frames Processed:  17%|█▋        | 5280/30385 [00:19<01:21, 308.61frame/s]

Slide change detected at 436.92 seconds (frame 5280). SSIM: 0.7919


Frames Processed:  20%|██        | 6120/30385 [00:22<01:34, 256.15frame/s]

Slide change detected at 506.43 seconds (frame 6120). SSIM: 0.8148


Frames Processed:  23%|██▎       | 6960/30385 [00:25<01:31, 256.58frame/s]

Slide change detected at 575.94 seconds (frame 6960). SSIM: 0.7333


Frames Processed:  25%|██▍       | 7560/30385 [00:27<01:21, 280.57frame/s]

Slide change detected at 625.59 seconds (frame 7560). SSIM: 0.8060


Frames Processed:  32%|███▏      | 9600/30385 [00:34<01:18, 263.90frame/s]

Slide change detected at 794.39 seconds (frame 9600). SSIM: 0.8018


Frames Processed:  36%|███▌      | 10920/30385 [00:40<01:09, 281.32frame/s]

Slide change detected at 903.62 seconds (frame 10920). SSIM: 0.8760


Frames Processed:  41%|████      | 12480/30385 [00:45<01:04, 276.74frame/s]

Slide change detected at 1032.71 seconds (frame 12480). SSIM: 0.8478


Frames Processed:  42%|████▏     | 12720/30385 [00:46<01:02, 284.20frame/s]

Slide change detected at 1052.57 seconds (frame 12720). SSIM: 0.7362


Frames Processed:  48%|████▊     | 14520/30385 [00:53<01:01, 258.23frame/s]

Slide change detected at 1201.52 seconds (frame 14520). SSIM: 0.7670


Frames Processed:  53%|█████▎    | 15960/30385 [00:58<00:46, 307.39frame/s]

Slide change detected at 1320.68 seconds (frame 15960). SSIM: 0.8165


Frames Processed:  55%|█████▍    | 16680/30385 [01:01<00:52, 260.40frame/s]

Slide change detected at 1380.26 seconds (frame 16680). SSIM: 0.7994


Frames Processed:  56%|█████▌    | 16920/30385 [01:02<00:58, 230.99frame/s]

Slide change detected at 1400.12 seconds (frame 16920). SSIM: 0.7577


Frames Processed:  59%|█████▉    | 17880/30385 [01:06<00:45, 273.38frame/s]

Slide change detected at 1479.56 seconds (frame 17880). SSIM: 0.4429


Frames Processed:  60%|█████▉    | 18120/30385 [01:07<00:46, 265.84frame/s]

Slide change detected at 1499.42 seconds (frame 18120). SSIM: 0.4286


Frames Processed:  62%|██████▏   | 18960/30385 [01:10<00:41, 272.79frame/s]

Slide change detected at 1568.93 seconds (frame 18960). SSIM: 0.6282


Frames Processed:  69%|██████▉   | 21000/30385 [01:18<00:38, 242.78frame/s]

Slide change detected at 1737.74 seconds (frame 21000). SSIM: 0.7994


Frames Processed:  70%|██████▉   | 21120/30385 [01:18<00:36, 256.73frame/s]

Slide change detected at 1747.67 seconds (frame 21120). SSIM: 0.8228


Frames Processed:  73%|███████▎  | 22080/30385 [01:21<00:28, 289.18frame/s]

Slide change detected at 1827.11 seconds (frame 22080). SSIM: 0.7187


Frames Processed:  76%|███████▌  | 23160/30385 [01:25<00:26, 272.95frame/s]

Slide change detected at 1916.48 seconds (frame 23160). SSIM: 0.7732


Frames Processed:  79%|███████▊  | 23880/30385 [01:28<00:28, 228.02frame/s]

Slide change detected at 1976.06 seconds (frame 23880). SSIM: 0.7992


Frames Processed:  81%|████████  | 24480/30385 [01:31<00:23, 248.89frame/s]

Slide change detected at 2025.71 seconds (frame 24480). SSIM: 0.7769


Frames Processed:  84%|████████▎ | 25440/30385 [01:34<00:16, 300.17frame/s]

Slide change detected at 2105.15 seconds (frame 25440). SSIM: 0.7766


Frames Processed:  85%|████████▌ | 25920/30385 [01:36<00:15, 283.66frame/s]

Slide change detected at 2144.87 seconds (frame 25920). SSIM: 0.6644


Frames Processed:  87%|████████▋ | 26400/30385 [01:37<00:14, 279.32frame/s]

Slide change detected at 2184.59 seconds (frame 26400). SSIM: 0.7615


Frames Processed:  88%|████████▊ | 26880/30385 [01:39<00:12, 282.72frame/s]

Slide change detected at 2224.31 seconds (frame 26880). SSIM: 0.6792


Frames Processed:  91%|█████████ | 27720/30385 [01:43<00:12, 208.59frame/s]

Slide change detected at 2293.82 seconds (frame 27720). SSIM: 0.7310


Frames Processed:  92%|█████████▏| 28080/30385 [01:44<00:09, 242.46frame/s]

Slide change detected at 2323.60 seconds (frame 28080). SSIM: 0.8071


Frames Processed:  96%|█████████▌| 29040/30385 [01:48<00:04, 287.97frame/s]

Slide change detected at 2403.04 seconds (frame 29040). SSIM: 0.8049


Frames Processed: 100%|█████████▉| 30265/30385 [01:52<00:00, 268.20frame/s]


Adding final slide change at end of video (2514.34 seconds).
Total slide changes detected: 36

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/36 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 29.79s
Transcribing audio for segment 0 from 0.00s to 29.79s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   3%|▎         | 1/36 [00:09<05:16,  9.04s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 29.79s to 79.44s
Transcribing audio for segment 1 from 29.79s to 79.44s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   6%|▌         | 2/36 [00:18<05:15,  9.29s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 79.44s to 99.30s
Transcribing audio for segment 2 from 79.44s to 99.30s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   8%|▊         | 3/36 [00:27<04:56,  9.00s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 99.30s to 178.74s
Transcribing audio for segment 3 from 99.30s to 178.74s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  11%|█         | 4/36 [00:35<04:42,  8.84s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 178.74s to 208.53s
Transcribing audio for segment 4 from 178.74s to 208.53s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  14%|█▍        | 5/36 [00:43<04:27,  8.63s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 208.53s to 258.18s
Transcribing audio for segment 5 from 208.53s to 258.18s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  17%|█▋        | 6/36 [00:51<04:09,  8.30s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 258.18s to 397.20s
Transcribing audio for segment 6 from 258.18s to 397.20s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  19%|█▉        | 7/36 [01:06<04:59, 10.32s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 397.20s to 436.92s
Transcribing audio for segment 7 from 397.20s to 436.92s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  22%|██▏       | 8/36 [01:14<04:34,  9.82s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 436.92s to 506.43s
Transcribing audio for segment 8 from 436.92s to 506.43s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  25%|██▌       | 9/36 [01:26<04:36, 10.23s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 506.43s to 575.94s
Transcribing audio for segment 9 from 506.43s to 575.94s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  28%|██▊       | 10/36 [01:38<04:41, 10.83s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 575.94s to 625.59s
Transcribing audio for segment 10 from 575.94s to 625.59s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  31%|███       | 11/36 [01:47<04:20, 10.43s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 625.59s to 794.39s
Transcribing audio for segment 11 from 625.59s to 794.39s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  33%|███▎      | 12/36 [02:04<05:00, 12.51s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 794.39s to 903.62s
Transcribing audio for segment 12 from 794.39s to 903.62s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  36%|███▌      | 13/36 [02:20<05:05, 13.28s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 903.62s to 1032.71s
Transcribing audio for segment 13 from 903.62s to 1032.71s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  39%|███▉      | 14/36 [02:33<04:56, 13.48s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1032.71s to 1052.57s
Transcribing audio for segment 14 from 1032.71s to 1052.57s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  42%|████▏     | 15/36 [02:44<04:23, 12.55s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1052.57s to 1201.52s
Transcribing audio for segment 15 from 1052.57s to 1201.52s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  44%|████▍     | 16/36 [03:03<04:47, 14.38s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1201.52s to 1320.68s
Transcribing audio for segment 16 from 1201.52s to 1320.68s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  47%|████▋     | 17/36 [03:16<04:29, 14.20s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1320.68s to 1380.26s
Transcribing audio for segment 17 from 1320.68s to 1380.26s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  50%|█████     | 18/36 [03:26<03:49, 12.75s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1380.26s to 1400.12s
Transcribing audio for segment 18 from 1380.26s to 1400.12s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  53%|█████▎    | 19/36 [03:34<03:12, 11.31s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1400.12s to 1479.56s
Transcribing audio for segment 19 from 1400.12s to 1479.56s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  56%|█████▌    | 20/36 [05:08<09:38, 36.18s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 1479.56s to 1499.42s
Transcribing audio for segment 20 from 1479.56s to 1499.42s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  58%|█████▊    | 21/36 [05:15<06:54, 27.61s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 1499.42s to 1568.93s
Transcribing audio for segment 21 from 1499.42s to 1568.93s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  61%|██████    | 22/36 [05:26<05:15, 22.54s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 1568.93s to 1737.74s
Transcribing audio for segment 22 from 1568.93s to 1737.74s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  64%|██████▍   | 23/36 [05:43<04:31, 20.89s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 1737.74s to 1747.67s
Transcribing audio for segment 23 from 1737.74s to 1747.67s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  67%|██████▋   | 24/36 [05:53<03:30, 17.52s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 1747.67s to 1827.11s
Transcribing audio for segment 24 from 1747.67s to 1827.11s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  69%|██████▉   | 25/36 [06:07<03:02, 16.59s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 1827.11s to 1916.48s
Transcribing audio for segment 25 from 1827.11s to 1916.48s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  72%|███████▏  | 26/36 [06:26<02:51, 17.19s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 1916.48s to 1976.06s
Transcribing audio for segment 26 from 1916.48s to 1976.06s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  75%|███████▌  | 27/36 [06:38<02:21, 15.67s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 1976.06s to 2025.71s
Transcribing audio for segment 27 from 1976.06s to 2025.71s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  78%|███████▊  | 28/36 [06:49<01:55, 14.43s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 2025.71s to 2105.15s
Transcribing audio for segment 28 from 2025.71s to 2105.15s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  81%|████████  | 29/36 [07:04<01:40, 14.31s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 2105.15s to 2144.87s
Transcribing audio for segment 29 from 2105.15s to 2144.87s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  83%|████████▎ | 30/36 [07:16<01:22, 13.74s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 2144.87s to 2184.59s
Transcribing audio for segment 30 from 2144.87s to 2184.59s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  86%|████████▌ | 31/36 [07:27<01:04, 12.91s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 2184.59s to 2224.31s
Transcribing audio for segment 31 from 2184.59s to 2224.31s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  89%|████████▉ | 32/36 [07:37<00:48, 12.09s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 2224.31s to 2293.82s
Transcribing audio for segment 32 from 2224.31s to 2293.82s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  92%|█████████▏| 33/36 [07:48<00:34, 11.65s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 2293.82s to 2323.60s
Transcribing audio for segment 33 from 2293.82s to 2323.60s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  94%|█████████▍| 34/36 [07:57<00:22, 11.08s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 2323.60s to 2403.04s
Transcribing audio for segment 34 from 2323.60s to 2403.04s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  97%|█████████▋| 35/36 [08:12<00:11, 11.97s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 2403.04s to 2514.34s
Transcribing audio for segment 35 from 2403.04s to 2514.34s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed: 100%|██████████| 36/36 [08:24<00:00, 14.02s/segment]

Summary generation for segment 35 completed.

Processing completed.





Document saved as /content/outputs/Brand Management 5_ Markenportfolio-Management.docx
Successfully processed: /content/Brand Management 5_ Markenportfolio-Management.mp4
Output saved as: /content/outputs/Brand Management 5_ Markenportfolio-Management.docx

Processing: /content/B Change Management_ Video 3 Weitere Modelle.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.98016308772221
Total frames: 31808
Video duration: 1060.97 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 299/31808 [00:00<00:34, 903.43frame/s]

Slide change detected at 9.97 seconds (frame 299). SSIM: 0.4501


Frames Processed:   2%|▏         | 598/31808 [00:00<00:31, 1005.85frame/s]

Slide change detected at 19.95 seconds (frame 598). SSIM: 0.4334


Frames Processed:   5%|▍         | 1495/31808 [00:01<00:26, 1140.27frame/s]

Slide change detected at 49.87 seconds (frame 1495). SSIM: 0.8781


Frames Processed:   9%|▉         | 2990/31808 [00:02<00:27, 1042.67frame/s]

Slide change detected at 99.73 seconds (frame 2990). SSIM: 0.8426


Frames Processed:  33%|███▎      | 10465/31808 [00:07<00:13, 1622.25frame/s]

Slide change detected at 339.09 seconds (frame 10166). SSIM: 0.8059


Frames Processed:  37%|███▋      | 11661/31808 [00:08<00:13, 1518.65frame/s]

Slide change detected at 388.96 seconds (frame 11661). SSIM: 0.5527


Frames Processed:  39%|███▊      | 12259/31808 [00:09<00:13, 1427.80frame/s]

Slide change detected at 408.90 seconds (frame 12259). SSIM: 0.5770


Frames Processed:  59%|█████▉    | 18837/31808 [00:13<00:07, 1634.44frame/s]

Slide change detected at 618.34 seconds (frame 18538). SSIM: 0.7910
Slide change detected at 628.32 seconds (frame 18837). SSIM: 0.7858


Frames Processed:  61%|██████    | 19435/31808 [00:13<00:07, 1555.83frame/s]

Slide change detected at 638.29 seconds (frame 19136). SSIM: 0.7774


Frames Processed:  64%|██████▍   | 20332/31808 [00:14<00:08, 1381.28frame/s]

Slide change detected at 678.18 seconds (frame 20332). SSIM: 0.7196


Frames Processed:  66%|██████▌   | 20930/31808 [00:15<00:08, 1355.52frame/s]

Slide change detected at 698.13 seconds (frame 20930). SSIM: 0.7810


Frames Processed:  69%|██████▊   | 21827/31808 [00:15<00:08, 1169.95frame/s]

Slide change detected at 728.05 seconds (frame 21827). SSIM: 0.7927


Frames Processed:  75%|███████▌  | 23920/31808 [00:17<00:06, 1162.90frame/s]

Slide change detected at 797.86 seconds (frame 23920). SSIM: 0.4485


Frames Processed:  76%|███████▌  | 24219/31808 [00:17<00:06, 1213.23frame/s]

Slide change detected at 807.83 seconds (frame 24219). SSIM: 0.4362


Frames Processed:  81%|████████  | 25714/31808 [00:19<00:04, 1410.49frame/s]

Slide change detected at 847.73 seconds (frame 25415). SSIM: 0.8801


Frames Processed:  84%|████████▎ | 26611/31808 [00:19<00:03, 1417.71frame/s]

Slide change detected at 877.65 seconds (frame 26312). SSIM: 0.7743


Frames Processed:  86%|████████▌ | 27209/31808 [00:20<00:03, 1445.36frame/s]

Slide change detected at 907.57 seconds (frame 27209). SSIM: 0.5077


Frames Processed:  86%|████████▋ | 27508/31808 [00:20<00:02, 1439.83frame/s]

Slide change detected at 917.54 seconds (frame 27508). SSIM: 0.5583


Frames Processed:  89%|████████▉ | 28405/31808 [00:20<00:02, 1505.83frame/s]

Slide change detected at 937.49 seconds (frame 28106). SSIM: 0.6492


Frames Processed:  97%|█████████▋| 30797/31808 [00:22<00:00, 1610.84frame/s]

Slide change detected at 1017.27 seconds (frame 30498). SSIM: 0.8018


Frames Processed:  99%|█████████▉| 31509/31808 [00:23<00:00, 1369.06frame/s]

Slide change detected at 1057.17 seconds (frame 31694). SSIM: 0.5941
Adding final slide change at end of video (1060.97 seconds).
Total slide changes detected: 23

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/23 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 9.97s
Transcribing audio for segment 0 from 0.00s to 9.97s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   4%|▍         | 1/23 [00:06<02:19,  6.34s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 9.97s to 19.95s
Transcribing audio for segment 1 from 9.97s to 19.95s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   9%|▊         | 2/23 [00:13<02:22,  6.78s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 19.95s to 49.87s
Transcribing audio for segment 2 from 19.95s to 49.87s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  13%|█▎        | 3/23 [00:21<02:27,  7.36s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 49.87s to 99.73s
Transcribing audio for segment 3 from 49.87s to 99.73s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  17%|█▋        | 4/23 [00:30<02:30,  7.94s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 99.73s to 339.09s
Transcribing audio for segment 4 from 99.73s to 339.09s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  22%|██▏       | 5/23 [00:52<03:53, 13.00s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 339.09s to 388.96s
Transcribing audio for segment 5 from 339.09s to 388.96s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  26%|██▌       | 6/23 [01:02<03:26, 12.14s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 388.96s to 408.90s
Transcribing audio for segment 6 from 388.96s to 408.90s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  30%|███       | 7/23 [01:09<02:46, 10.40s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 408.90s to 618.34s
Transcribing audio for segment 7 from 408.90s to 618.34s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  35%|███▍      | 8/23 [01:25<03:04, 12.32s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 618.34s to 628.32s
Transcribing audio for segment 8 from 618.34s to 628.32s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  39%|███▉      | 9/23 [01:33<02:29, 10.67s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 628.32s to 638.29s
Transcribing audio for segment 9 from 628.32s to 638.29s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  43%|████▎     | 10/23 [01:39<02:02,  9.41s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 638.29s to 678.18s
Transcribing audio for segment 10 from 638.29s to 678.18s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  48%|████▊     | 11/23 [01:49<01:56,  9.69s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 678.18s to 698.13s
Transcribing audio for segment 11 from 678.18s to 698.13s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  52%|█████▏    | 12/23 [01:55<01:33,  8.47s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 698.13s to 728.05s
Transcribing audio for segment 12 from 698.13s to 728.05s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  57%|█████▋    | 13/23 [02:04<01:26,  8.60s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 728.05s to 797.86s
Transcribing audio for segment 13 from 728.05s to 797.86s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  61%|██████    | 14/23 [02:15<01:23,  9.25s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 797.86s to 807.83s
Transcribing audio for segment 14 from 797.86s to 807.83s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  65%|██████▌   | 15/23 [02:21<01:07,  8.38s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 807.83s to 847.73s
Transcribing audio for segment 15 from 807.83s to 847.73s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  70%|██████▉   | 16/23 [02:30<00:59,  8.47s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 847.73s to 877.65s
Transcribing audio for segment 16 from 847.73s to 877.65s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  74%|███████▍  | 17/23 [02:38<00:49,  8.26s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 877.65s to 907.57s
Transcribing audio for segment 17 from 877.65s to 907.57s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  78%|███████▊  | 18/23 [02:46<00:42,  8.44s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 907.57s to 917.54s
Transcribing audio for segment 18 from 907.57s to 917.54s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  83%|████████▎ | 19/23 [02:53<00:30,  7.73s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 917.54s to 937.49s
Transcribing audio for segment 19 from 917.54s to 937.49s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  87%|████████▋ | 20/23 [03:00<00:22,  7.57s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 937.49s to 1017.27s
Transcribing audio for segment 20 from 937.49s to 1017.27s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  91%|█████████▏| 21/23 [03:14<00:19,  9.52s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 1017.27s to 1057.17s
Transcribing audio for segment 21 from 1017.27s to 1057.17s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  96%|█████████▌| 22/23 [03:23<00:09,  9.35s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 1057.17s to 1060.97s
Transcribing audio for segment 22 from 1057.17s to 1060.97s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed: 100%|██████████| 23/23 [03:30<00:00,  9.15s/segment]

Summary generation for segment 22 completed.

Processing completed.





Document saved as /content/outputs/B Change Management_ Video 3 Weitere Modelle.docx
Successfully processed: /content/B Change Management_ Video 3 Weitere Modelle.mp4
Output saved as: /content/outputs/B Change Management_ Video 3 Weitere Modelle.docx

Processing: /content/Modul 12 Teil A - Einführung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.994153678316028
Total frames: 70723
Video duration: 2357.89 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|▏         | 897/70723 [00:05<06:23, 182.31frame/s]

Slide change detected at 29.91 seconds (frame 897). SSIM: 0.5145


Frames Processed:   2%|▏         | 1495/70723 [00:07<05:20, 216.12frame/s]

Slide change detected at 49.84 seconds (frame 1495). SSIM: 0.6130


Frames Processed:   8%|▊         | 5980/70723 [00:26<04:11, 257.28frame/s]

Slide change detected at 199.37 seconds (frame 5980). SSIM: 0.8011


Frames Processed:  15%|█▌        | 10764/70723 [00:46<04:05, 244.73frame/s]

Slide change detected at 358.87 seconds (frame 10764). SSIM: 0.7919


Frames Processed:  23%|██▎       | 16445/70723 [01:08<03:07, 290.02frame/s]

Slide change detected at 548.27 seconds (frame 16445). SSIM: 0.7523


Frames Processed:  30%|███       | 21229/70723 [01:16<01:44, 475.59frame/s]

Slide change detected at 707.77 seconds (frame 21229). SSIM: 0.7825


Frames Processed:  52%|█████▏    | 36478/70723 [02:06<01:26, 393.63frame/s]

Slide change detected at 1216.17 seconds (frame 36478). SSIM: 0.7673


Frames Processed:  55%|█████▍    | 38870/70723 [02:13<01:28, 361.34frame/s]

Slide change detected at 1295.92 seconds (frame 38870). SSIM: 0.7797


Frames Processed:  58%|█████▊    | 41262/70723 [02:17<00:48, 611.71frame/s]

Slide change detected at 1375.67 seconds (frame 41262). SSIM: 0.7621


Frames Processed:  62%|██████▏   | 43953/70723 [02:22<00:49, 541.45frame/s]

Slide change detected at 1465.39 seconds (frame 43953). SSIM: 0.8095


Frames Processed:  65%|██████▌   | 46046/70723 [02:28<01:27, 281.66frame/s]

Slide change detected at 1535.17 seconds (frame 46046). SSIM: 0.7982


Frames Processed:  66%|██████▌   | 46345/70723 [02:29<01:30, 269.17frame/s]

Slide change detected at 1545.13 seconds (frame 46345). SSIM: 0.8176


Frames Processed:  70%|██████▉   | 49335/70723 [02:40<01:24, 254.55frame/s]

Slide change detected at 1644.82 seconds (frame 49335). SSIM: 0.8203


Frames Processed:  73%|███████▎  | 51428/70723 [02:47<01:01, 312.54frame/s]

Slide change detected at 1714.60 seconds (frame 51428). SSIM: 0.7798


Frames Processed:  75%|███████▌  | 53222/70723 [02:52<00:45, 382.97frame/s]

Slide change detected at 1774.41 seconds (frame 53222). SSIM: 0.7978


Frames Processed:  80%|███████▉  | 56511/70723 [03:00<00:38, 365.98frame/s]

Slide change detected at 1884.07 seconds (frame 56511). SSIM: 0.8282


Frames Processed:  84%|████████▎ | 59202/70723 [03:12<00:41, 280.26frame/s]

Slide change detected at 1973.78 seconds (frame 59202). SSIM: 0.7796


Frames Processed:  90%|█████████ | 63986/70723 [03:26<00:16, 397.47frame/s]

Slide change detected at 2133.28 seconds (frame 63986). SSIM: 0.8049


Frames Processed:  91%|█████████▏| 64584/70723 [03:27<00:13, 451.29frame/s]

Slide change detected at 2153.22 seconds (frame 64584). SSIM: 0.8549


Frames Processed:  92%|█████████▏| 64883/70723 [03:28<00:12, 460.10frame/s]

Slide change detected at 2163.19 seconds (frame 64883). SSIM: 0.8534


Frames Processed:  93%|█████████▎| 66079/70723 [03:30<00:09, 468.14frame/s]

Slide change detected at 2203.06 seconds (frame 66079). SSIM: 0.7855


Frames Processed: 100%|█████████▉| 70424/70723 [03:43<00:00, 315.78frame/s]


Adding final slide change at end of video (2357.89 seconds).
Total slide changes detected: 22

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/22 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 29.91s
Transcribing audio for segment 0 from 0.00s to 29.91s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▍         | 1/22 [00:07<02:38,  7.54s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 29.91s to 49.84s
Transcribing audio for segment 1 from 29.91s to 49.84s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   9%|▉         | 2/22 [00:16<02:44,  8.24s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 49.84s to 199.37s
Transcribing audio for segment 2 from 49.84s to 199.37s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  14%|█▎        | 3/22 [00:31<03:36, 11.41s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 199.37s to 358.87s
Transcribing audio for segment 3 from 199.37s to 358.87s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  18%|█▊        | 4/22 [00:45<03:42, 12.35s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 358.87s to 548.27s
Transcribing audio for segment 4 from 358.87s to 548.27s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  23%|██▎       | 5/22 [01:00<03:49, 13.53s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 548.27s to 707.77s
Transcribing audio for segment 5 from 548.27s to 707.77s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  27%|██▋       | 6/22 [01:17<03:53, 14.57s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 707.77s to 1216.17s
Transcribing audio for segment 6 from 707.77s to 1216.17s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  32%|███▏      | 7/22 [01:52<05:17, 21.15s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 1216.17s to 1295.92s
Transcribing audio for segment 7 from 1216.17s to 1295.92s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  36%|███▋      | 8/22 [02:03<04:11, 17.95s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 1295.92s to 1375.67s
Transcribing audio for segment 8 from 1295.92s to 1375.67s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  41%|████      | 9/22 [02:14<03:26, 15.88s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 1375.67s to 1465.39s
Transcribing audio for segment 9 from 1375.67s to 1465.39s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  45%|████▌     | 10/22 [02:26<02:54, 14.54s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1465.39s to 1535.17s
Transcribing audio for segment 10 from 1465.39s to 1535.17s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  50%|█████     | 11/22 [02:38<02:34, 14.02s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1535.17s to 1545.13s
Transcribing audio for segment 11 from 1535.17s to 1545.13s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  55%|█████▍    | 12/22 [02:48<02:06, 12.63s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1545.13s to 1644.82s
Transcribing audio for segment 12 from 1545.13s to 1644.82s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  59%|█████▉    | 13/22 [03:04<02:02, 13.59s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1644.82s to 1714.60s
Transcribing audio for segment 13 from 1644.82s to 1714.60s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  64%|██████▎   | 14/22 [03:16<01:44, 13.07s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1714.60s to 1774.41s
Transcribing audio for segment 14 from 1714.60s to 1774.41s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  68%|██████▊   | 15/22 [03:26<01:26, 12.33s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1774.41s to 1884.07s
Transcribing audio for segment 15 from 1774.41s to 1884.07s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  73%|███████▎  | 16/22 [03:43<01:21, 13.56s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1884.07s to 1973.78s
Transcribing audio for segment 16 from 1884.07s to 1973.78s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  77%|███████▋  | 17/22 [03:56<01:06, 13.36s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1973.78s to 2133.28s
Transcribing audio for segment 17 from 1973.78s to 2133.28s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  82%|████████▏ | 18/22 [04:09<00:53, 13.37s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 2133.28s to 2153.22s
Transcribing audio for segment 18 from 2133.28s to 2153.22s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  86%|████████▋ | 19/22 [04:19<00:37, 12.53s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 2153.22s to 2163.19s
Transcribing audio for segment 19 from 2153.22s to 2163.19s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  91%|█████████ | 20/22 [04:27<00:22, 11.05s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 2163.19s to 2203.06s
Transcribing audio for segment 20 from 2163.19s to 2203.06s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  95%|█████████▌| 21/22 [04:36<00:10, 10.34s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 2203.06s to 2357.89s
Transcribing audio for segment 21 from 2203.06s to 2357.89s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed: 100%|██████████| 22/22 [04:53<00:00, 13.35s/segment]

Summary generation for segment 21 completed.

Processing completed.





Document saved as /content/outputs/Modul 12 Teil A - Einführung.docx
Successfully processed: /content/Modul 12 Teil A - Einführung.mp4
Output saved as: /content/outputs/Modul 12 Teil A - Einführung.docx

Processing: /content/02 Stakeholder Identifizieren.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 25628
Video duration: 854.27 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   2%|▏         | 600/25628 [00:03<02:09, 192.73frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.3094


Frames Processed:   6%|▌         | 1500/25628 [00:08<02:16, 177.00frame/s]

Slide change detected at 50.00 seconds (frame 1500). SSIM: 0.6463


Frames Processed:  23%|██▎       | 6000/25628 [00:27<01:24, 232.95frame/s]

Slide change detected at 200.00 seconds (frame 6000). SSIM: 0.7301


Frames Processed:  36%|███▋      | 9300/25628 [00:42<01:07, 240.83frame/s]

Slide change detected at 310.00 seconds (frame 9300). SSIM: 0.6055


Frames Processed:  37%|███▋      | 9600/25628 [00:43<01:04, 246.64frame/s]

Slide change detected at 320.00 seconds (frame 9600). SSIM: 0.6755


Frames Processed:  39%|███▊      | 9900/25628 [00:44<01:00, 261.03frame/s]

Slide change detected at 330.00 seconds (frame 9900). SSIM: 0.6736


Frames Processed:  42%|████▏     | 10800/25628 [00:48<01:01, 239.30frame/s]

Slide change detected at 360.00 seconds (frame 10800). SSIM: 0.6756


Frames Processed:  43%|████▎     | 11100/25628 [00:49<00:55, 261.10frame/s]

Slide change detected at 370.00 seconds (frame 11100). SSIM: 0.6972


Frames Processed:  44%|████▍     | 11400/25628 [00:50<00:52, 272.11frame/s]

Slide change detected at 380.00 seconds (frame 11400). SSIM: 0.7270


Frames Processed:  47%|████▋     | 12000/25628 [00:52<00:46, 293.44frame/s]

Slide change detected at 400.00 seconds (frame 12000). SSIM: 0.6690


Frames Processed:  48%|████▊     | 12300/25628 [00:53<00:45, 294.57frame/s]

Slide change detected at 410.00 seconds (frame 12300). SSIM: 0.6991


Frames Processed:  49%|████▉     | 12600/25628 [00:54<00:44, 294.13frame/s]

Slide change detected at 420.00 seconds (frame 12600). SSIM: 0.7394


Frames Processed:  50%|█████     | 12900/25628 [00:54<00:41, 304.12frame/s]

Slide change detected at 430.00 seconds (frame 12900). SSIM: 0.7170


Frames Processed:  55%|█████▌    | 14100/25628 [00:59<00:49, 235.08frame/s]

Slide change detected at 470.00 seconds (frame 14100). SSIM: 0.6892


Frames Processed:  56%|█████▌    | 14400/25628 [01:01<00:51, 216.74frame/s]

Slide change detected at 480.00 seconds (frame 14400). SSIM: 0.6260


Frames Processed:  70%|███████   | 18000/25628 [01:15<00:33, 226.59frame/s]

Slide change detected at 600.00 seconds (frame 18000). SSIM: 0.8223


Frames Processed:  89%|████████▉ | 22800/25628 [01:34<00:10, 262.35frame/s]

Slide change detected at 760.00 seconds (frame 22800). SSIM: 0.7778


Frames Processed:  99%|█████████▉| 25328/25628 [01:45<00:01, 239.60frame/s]


Adding final slide change at end of video (854.27 seconds).
Total slide changes detected: 18

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/18 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 20.00s
Transcribing audio for segment 0 from 0.00s to 20.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   6%|▌         | 1/18 [00:05<01:38,  5.79s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 20.00s to 50.00s
Transcribing audio for segment 1 from 20.00s to 50.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  11%|█         | 2/18 [00:14<01:56,  7.26s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 50.00s to 200.00s
Transcribing audio for segment 2 from 50.00s to 200.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  17%|█▋        | 3/18 [00:28<02:39, 10.65s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 200.00s to 310.00s
Transcribing audio for segment 3 from 200.00s to 310.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  22%|██▏       | 4/18 [00:40<02:34, 11.01s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 310.00s to 320.00s
Transcribing audio for segment 4 from 310.00s to 320.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  28%|██▊       | 5/18 [00:51<02:23, 11.06s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 320.00s to 330.00s
Transcribing audio for segment 5 from 320.00s to 330.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  33%|███▎      | 6/18 [01:01<02:07, 10.65s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 330.00s to 360.00s
Transcribing audio for segment 6 from 330.00s to 360.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  39%|███▉      | 7/18 [01:14<02:05, 11.45s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 360.00s to 370.00s
Transcribing audio for segment 7 from 360.00s to 370.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  44%|████▍     | 8/18 [01:22<01:42, 10.25s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 370.00s to 380.00s
Transcribing audio for segment 8 from 370.00s to 380.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  50%|█████     | 9/18 [01:29<01:24,  9.35s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 380.00s to 400.00s
Transcribing audio for segment 9 from 380.00s to 400.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  56%|█████▌    | 10/18 [01:39<01:15,  9.47s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 400.00s to 410.00s
Transcribing audio for segment 10 from 400.00s to 410.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  61%|██████    | 11/18 [01:48<01:05,  9.35s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 410.00s to 420.00s
Transcribing audio for segment 11 from 410.00s to 420.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  67%|██████▋   | 12/18 [01:56<00:53,  8.85s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 420.00s to 430.00s
Transcribing audio for segment 12 from 420.00s to 430.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  72%|███████▏  | 13/18 [02:02<00:40,  8.18s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 430.00s to 470.00s
Transcribing audio for segment 13 from 430.00s to 470.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  78%|███████▊  | 14/18 [02:10<00:32,  8.00s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 470.00s to 480.00s
Transcribing audio for segment 14 from 470.00s to 480.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  83%|████████▎ | 15/18 [02:17<00:23,  7.92s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 480.00s to 600.00s
Transcribing audio for segment 15 from 480.00s to 600.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  89%|████████▉ | 16/18 [02:31<00:19,  9.73s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 600.00s to 760.00s
Transcribing audio for segment 16 from 600.00s to 760.00s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  94%|█████████▍| 17/18 [02:46<00:11, 11.26s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 760.00s to 854.27s
Transcribing audio for segment 17 from 760.00s to 854.27s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed: 100%|██████████| 18/18 [02:57<00:00,  9.88s/segment]

Summary generation for segment 17 completed.

Processing completed.





Document saved as /content/outputs/02 Stakeholder Identifizieren.docx
Successfully processed: /content/02 Stakeholder Identifizieren.mp4
Output saved as: /content/outputs/02 Stakeholder Identifizieren.docx

Processing: /content/D Video Organizational Design Session 1 Ihre SWOT + HF Sätze Kohorte 2.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.707116376486532
Total frames: 11476
Video duration: 386.30 seconds
Frame interval: 297 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   3%|▎         | 297/11476 [00:00<00:10, 1039.47frame/s]

Slide change detected at 10.00 seconds (frame 297). SSIM: 0.8983


Frames Processed:   5%|▌         | 594/11476 [00:00<00:09, 1175.04frame/s]

Slide change detected at 20.00 seconds (frame 594). SSIM: 0.8892


Frames Processed:   8%|▊         | 891/11476 [00:00<00:08, 1256.75frame/s]

Slide change detected at 29.99 seconds (frame 891). SSIM: 0.8605


Frames Processed:  13%|█▎        | 1485/11476 [00:01<00:07, 1340.63frame/s]

Slide change detected at 39.99 seconds (frame 1188). SSIM: 0.8735
Slide change detected at 49.99 seconds (frame 1485). SSIM: 0.8430


Frames Processed:  16%|█▌        | 1782/11476 [00:01<00:07, 1227.42frame/s]

Slide change detected at 59.99 seconds (frame 1782). SSIM: 0.8027


Frames Processed:  18%|█▊        | 2079/11476 [00:01<00:07, 1204.77frame/s]

Slide change detected at 69.98 seconds (frame 2079). SSIM: 0.8421


Frames Processed:  21%|██        | 2376/11476 [00:01<00:07, 1182.43frame/s]

Slide change detected at 79.98 seconds (frame 2376). SSIM: 0.8133


Frames Processed:  23%|██▎       | 2673/11476 [00:02<00:07, 1185.63frame/s]

Slide change detected at 89.98 seconds (frame 2673). SSIM: 0.8333


Frames Processed:  26%|██▌       | 2970/11476 [00:02<00:07, 1198.57frame/s]

Slide change detected at 99.98 seconds (frame 2970). SSIM: 0.8646


Frames Processed:  28%|██▊       | 3267/11476 [00:02<00:06, 1224.96frame/s]

Slide change detected at 109.97 seconds (frame 3267). SSIM: 0.8331


Frames Processed:  31%|███       | 3564/11476 [00:02<00:06, 1258.90frame/s]

Slide change detected at 119.97 seconds (frame 3564). SSIM: 0.8689


Frames Processed:  34%|███▎      | 3861/11476 [00:03<00:06, 1191.83frame/s]

Slide change detected at 129.97 seconds (frame 3861). SSIM: 0.8155


Frames Processed:  36%|███▌      | 4158/11476 [00:03<00:06, 1141.62frame/s]

Slide change detected at 139.97 seconds (frame 4158). SSIM: 0.8270


Frames Processed:  39%|███▉      | 4455/11476 [00:03<00:06, 1149.18frame/s]

Slide change detected at 149.96 seconds (frame 4455). SSIM: 0.8448


Frames Processed:  41%|████▏     | 4752/11476 [00:03<00:05, 1151.45frame/s]

Slide change detected at 159.96 seconds (frame 4752). SSIM: 0.8441


Frames Processed:  44%|████▍     | 5049/11476 [00:04<00:05, 1153.84frame/s]

Slide change detected at 169.96 seconds (frame 5049). SSIM: 0.8641


Frames Processed:  47%|████▋     | 5346/11476 [00:04<00:05, 1162.77frame/s]

Slide change detected at 179.96 seconds (frame 5346). SSIM: 0.7826


Frames Processed:  49%|████▉     | 5643/11476 [00:04<00:04, 1203.57frame/s]

Slide change detected at 189.95 seconds (frame 5643). SSIM: 0.8621


Frames Processed:  52%|█████▏    | 5940/11476 [00:05<00:04, 1127.03frame/s]

Slide change detected at 199.95 seconds (frame 5940). SSIM: 0.8549


Frames Processed:  54%|█████▍    | 6237/11476 [00:05<00:04, 1072.71frame/s]

Slide change detected at 209.95 seconds (frame 6237). SSIM: 0.8502


Frames Processed:  57%|█████▋    | 6534/11476 [00:05<00:04, 1048.61frame/s]

Slide change detected at 219.95 seconds (frame 6534). SSIM: 0.8409


Frames Processed:  60%|█████▉    | 6831/11476 [00:05<00:04, 1080.18frame/s]

Slide change detected at 229.94 seconds (frame 6831). SSIM: 0.8728


Frames Processed:  62%|██████▏   | 7128/11476 [00:06<00:03, 1090.87frame/s]

Slide change detected at 239.94 seconds (frame 7128). SSIM: 0.8405


Frames Processed:  65%|██████▍   | 7425/11476 [00:06<00:04, 993.12frame/s] 

Slide change detected at 249.94 seconds (frame 7425). SSIM: 0.8365


Frames Processed:  67%|██████▋   | 7722/11476 [00:06<00:04, 925.92frame/s]

Slide change detected at 259.94 seconds (frame 7722). SSIM: 0.8507


Frames Processed:  70%|██████▉   | 8019/11476 [00:07<00:03, 890.83frame/s]

Slide change detected at 269.94 seconds (frame 8019). SSIM: 0.8270


Frames Processed:  75%|███████▌  | 8613/11476 [00:07<00:03, 890.14frame/s]

Slide change detected at 289.93 seconds (frame 8613). SSIM: 0.8490


Frames Processed:  78%|███████▊  | 8910/11476 [00:08<00:02, 867.61frame/s]

Slide change detected at 299.93 seconds (frame 8910). SSIM: 0.8468


Frames Processed:  80%|████████  | 9207/11476 [00:08<00:02, 930.71frame/s]

Slide change detected at 309.93 seconds (frame 9207). SSIM: 0.8657


Frames Processed:  83%|████████▎ | 9504/11476 [00:08<00:01, 1024.80frame/s]

Slide change detected at 319.92 seconds (frame 9504). SSIM: 0.8824


Frames Processed:  88%|████████▊ | 10098/11476 [00:09<00:01, 1196.54frame/s]

Slide change detected at 329.92 seconds (frame 9801). SSIM: 0.8545
Slide change detected at 339.92 seconds (frame 10098). SSIM: 0.8498


Frames Processed:  91%|█████████ | 10395/11476 [00:09<00:00, 1144.98frame/s]

Slide change detected at 349.92 seconds (frame 10395). SSIM: 0.8156


Frames Processed:  93%|█████████▎| 10692/11476 [00:09<00:00, 1175.35frame/s]

Slide change detected at 359.91 seconds (frame 10692). SSIM: 0.8412


Frames Processed:  96%|█████████▌| 10989/11476 [00:09<00:00, 1189.60frame/s]

Slide change detected at 369.91 seconds (frame 10989). SSIM: 0.8122


Frames Processed:  97%|█████████▋| 11179/11476 [00:10<00:00, 1096.75frame/s]

Slide change detected at 379.91 seconds (frame 11286). SSIM: 0.8832
Adding final slide change at end of video (386.30 seconds).
Total slide changes detected: 38

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/38 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 10.00s
Transcribing audio for segment 0 from 0.00s to 10.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   3%|▎         | 1/38 [00:06<04:00,  6.51s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 10.00s to 20.00s
Transcribing audio for segment 1 from 10.00s to 20.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   5%|▌         | 2/38 [00:12<03:37,  6.04s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 20.00s to 29.99s
Transcribing audio for segment 2 from 20.00s to 29.99s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   8%|▊         | 3/38 [00:18<03:32,  6.06s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 29.99s to 39.99s
Transcribing audio for segment 3 from 29.99s to 39.99s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  11%|█         | 4/38 [00:24<03:31,  6.22s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 39.99s to 49.99s
Transcribing audio for segment 4 from 39.99s to 49.99s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  13%|█▎        | 5/38 [00:29<03:08,  5.72s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 49.99s to 59.99s
Transcribing audio for segment 5 from 49.99s to 59.99s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  16%|█▌        | 6/38 [00:35<03:03,  5.72s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 59.99s to 69.98s
Transcribing audio for segment 6 from 59.99s to 69.98s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  18%|█▊        | 7/38 [00:41<03:02,  5.90s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 69.98s to 79.98s
Transcribing audio for segment 7 from 69.98s to 79.98s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  21%|██        | 8/38 [00:46<02:50,  5.68s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 79.98s to 89.98s
Transcribing audio for segment 8 from 79.98s to 89.98s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  24%|██▎       | 9/38 [00:53<02:56,  6.10s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 89.98s to 99.98s
Transcribing audio for segment 9 from 89.98s to 99.98s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  26%|██▋       | 10/38 [01:01<03:03,  6.54s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 99.98s to 109.97s
Transcribing audio for segment 10 from 99.98s to 109.97s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  29%|██▉       | 11/38 [01:08<02:58,  6.60s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 109.97s to 119.97s
Transcribing audio for segment 11 from 109.97s to 119.97s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  32%|███▏      | 12/38 [01:14<02:49,  6.53s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 119.97s to 129.97s
Transcribing audio for segment 12 from 119.97s to 129.97s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  34%|███▍      | 13/38 [01:19<02:32,  6.11s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 129.97s to 139.97s
Transcribing audio for segment 13 from 129.97s to 139.97s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  37%|███▋      | 14/38 [01:26<02:29,  6.25s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 139.97s to 149.96s
Transcribing audio for segment 14 from 139.97s to 149.96s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  39%|███▉      | 15/38 [01:31<02:20,  6.11s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 149.96s to 159.96s
Transcribing audio for segment 15 from 149.96s to 159.96s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  42%|████▏     | 16/38 [01:38<02:17,  6.25s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 159.96s to 169.96s
Transcribing audio for segment 16 from 159.96s to 169.96s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  45%|████▍     | 17/38 [01:44<02:11,  6.26s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 169.96s to 179.96s
Transcribing audio for segment 17 from 169.96s to 179.96s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  47%|████▋     | 18/38 [01:50<02:00,  6.03s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 179.96s to 189.95s
Transcribing audio for segment 18 from 179.96s to 189.95s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  50%|█████     | 19/38 [01:56<01:54,  6.04s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 189.95s to 199.95s
Transcribing audio for segment 19 from 189.95s to 199.95s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  53%|█████▎    | 20/38 [02:00<01:40,  5.58s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 199.95s to 209.95s
Transcribing audio for segment 20 from 199.95s to 209.95s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  55%|█████▌    | 21/38 [02:05<01:31,  5.39s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 209.95s to 219.95s
Transcribing audio for segment 21 from 209.95s to 219.95s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  58%|█████▊    | 22/38 [02:13<01:35,  5.98s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 219.95s to 229.94s
Transcribing audio for segment 22 from 219.95s to 229.94s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  61%|██████    | 23/38 [02:19<01:30,  6.04s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 229.94s to 239.94s
Transcribing audio for segment 23 from 229.94s to 239.94s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  63%|██████▎   | 24/38 [02:25<01:26,  6.15s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 239.94s to 249.94s
Transcribing audio for segment 24 from 239.94s to 249.94s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  66%|██████▌   | 25/38 [02:30<01:12,  5.59s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 249.94s to 259.94s
Transcribing audio for segment 25 from 249.94s to 259.94s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  68%|██████▊   | 26/38 [02:36<01:09,  5.81s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 259.94s to 269.94s
Transcribing audio for segment 26 from 259.94s to 269.94s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  71%|███████   | 27/38 [02:42<01:05,  5.96s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 269.94s to 289.93s
Transcribing audio for segment 27 from 269.94s to 289.93s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  74%|███████▎  | 28/38 [02:50<01:05,  6.51s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 289.93s to 299.93s
Transcribing audio for segment 28 from 289.93s to 299.93s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  76%|███████▋  | 29/38 [02:57<01:00,  6.74s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 299.93s to 309.93s
Transcribing audio for segment 29 from 299.93s to 309.93s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  79%|███████▉  | 30/38 [03:03<00:52,  6.57s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 309.93s to 319.92s
Transcribing audio for segment 30 from 309.93s to 319.92s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  82%|████████▏ | 31/38 [03:11<00:47,  6.80s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 319.92s to 329.92s
Transcribing audio for segment 31 from 319.92s to 329.92s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  84%|████████▍ | 32/38 [03:18<00:40,  6.79s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 329.92s to 339.92s
Transcribing audio for segment 32 from 329.92s to 339.92s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  87%|████████▋ | 33/38 [03:24<00:33,  6.68s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 339.92s to 349.92s
Transcribing audio for segment 33 from 339.92s to 349.92s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  89%|████████▉ | 34/38 [03:29<00:24,  6.23s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 349.92s to 359.91s
Transcribing audio for segment 34 from 349.92s to 359.91s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  92%|█████████▏| 35/38 [03:37<00:19,  6.60s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 359.91s to 369.91s
Transcribing audio for segment 35 from 359.91s to 369.91s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed:  95%|█████████▍| 36/38 [03:42<00:12,  6.37s/segment]

Summary generation for segment 35 completed.

Processing segment 36: 369.91s to 379.91s
Transcribing audio for segment 36 from 369.91s to 379.91s...
Transcription for segment 36 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 36...


Segments Processed:  97%|█████████▋| 37/38 [03:49<00:06,  6.34s/segment]

Summary generation for segment 36 completed.

Processing segment 37: 379.91s to 386.30s
Transcribing audio for segment 37 from 379.91s to 386.30s...
Transcription for segment 37 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 37...


Segments Processed: 100%|██████████| 38/38 [03:55<00:00,  6.19s/segment]

Summary generation for segment 37 completed.

Processing completed.





Document saved as /content/outputs/D Video Organizational Design Session 1 Ihre SWOT + HF Sätze Kohorte 2.docx
Successfully processed: /content/D Video Organizational Design Session 1 Ihre SWOT + HF Sätze Kohorte 2.mp4
Output saved as: /content/outputs/D Video Organizational Design Session 1 Ihre SWOT + HF Sätze Kohorte 2.docx

Processing: /content/Fabasoft Management Academy - Basismodul 1 - Dynamische Stabilität von Organisationen TEIL 1.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.999787866139407
Total frames: 98522
Video duration: 3284.09 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 598/98522 [00:02<06:26, 253.41frame/s]

Slide change detected at 19.93 seconds (frame 598). SSIM: 0.6308


Frames Processed:   4%|▍         | 4186/98522 [00:15<05:30, 285.70frame/s]

Slide change detected at 139.53 seconds (frame 4186). SSIM: 0.6337


Frames Processed:   8%|▊         | 7475/98522 [00:27<05:18, 285.99frame/s]

Slide change detected at 249.17 seconds (frame 7475). SSIM: 0.5000


Frames Processed:  15%|█▍        | 14352/98522 [00:53<04:52, 287.77frame/s]

Slide change detected at 478.40 seconds (frame 14352). SSIM: 0.4584


Frames Processed:  16%|█▌        | 15847/98522 [00:57<04:12, 327.39frame/s]

Slide change detected at 528.24 seconds (frame 15847). SSIM: 0.5290


Frames Processed:  23%|██▎       | 23023/98522 [01:20<03:47, 331.58frame/s]

Slide change detected at 767.44 seconds (frame 23023). SSIM: 0.5530


Frames Processed:  29%|██▉       | 28405/98522 [01:36<03:17, 355.16frame/s]

Slide change detected at 946.84 seconds (frame 28405). SSIM: 0.5539


Frames Processed:  33%|███▎      | 32890/98522 [01:48<02:44, 398.36frame/s]

Slide change detected at 1096.34 seconds (frame 32890). SSIM: 0.7995


Frames Processed:  38%|███▊      | 37674/98522 [02:01<02:28, 408.58frame/s]

Slide change detected at 1255.81 seconds (frame 37674). SSIM: 0.7784


Frames Processed:  42%|████▏     | 40963/98522 [02:10<02:33, 375.83frame/s]

Slide change detected at 1365.44 seconds (frame 40963). SSIM: 0.8737


Frames Processed:  44%|████▍     | 43355/98522 [02:16<02:15, 406.73frame/s]

Slide change detected at 1445.18 seconds (frame 43355). SSIM: 0.8452


Frames Processed:  46%|████▌     | 45448/98522 [02:22<02:32, 346.98frame/s]

Slide change detected at 1514.94 seconds (frame 45448). SSIM: 0.8866


Frames Processed:  49%|████▉     | 48139/98522 [02:28<01:56, 433.54frame/s]

Slide change detected at 1604.64 seconds (frame 48139). SSIM: 0.8790


Frames Processed:  54%|█████▍    | 53222/98522 [02:40<01:38, 458.35frame/s]

Slide change detected at 1774.08 seconds (frame 53222). SSIM: 0.8562


Frames Processed:  57%|█████▋    | 56212/98522 [02:48<01:57, 360.81frame/s]

Slide change detected at 1873.75 seconds (frame 56212). SSIM: 0.8667


Frames Processed:  61%|██████    | 59800/98522 [02:55<01:16, 507.44frame/s]

Slide change detected at 1993.35 seconds (frame 59800). SSIM: 0.7876


Frames Processed:  63%|██████▎   | 61893/98522 [03:00<01:33, 392.46frame/s]

Slide change detected at 2063.11 seconds (frame 61893). SSIM: 0.7606


Frames Processed:  70%|███████   | 69368/98522 [03:14<01:01, 470.50frame/s]

Slide change detected at 2312.28 seconds (frame 69368). SSIM: 0.8810


Frames Processed:  75%|███████▍  | 73853/98522 [03:21<00:38, 641.19frame/s]

Slide change detected at 2461.78 seconds (frame 73853). SSIM: 0.8405


Frames Processed:  77%|███████▋  | 75946/98522 [03:25<00:39, 566.30frame/s]

Slide change detected at 2531.55 seconds (frame 75946). SSIM: 0.8472


Frames Processed:  84%|████████▍ | 82524/98522 [03:34<00:21, 751.02frame/s]

Slide change detected at 2750.82 seconds (frame 82524). SSIM: 0.8096


Frames Processed:  86%|████████▌ | 84617/98522 [03:37<00:19, 700.18frame/s]

Slide change detected at 2820.59 seconds (frame 84617). SSIM: 0.8322


Frames Processed:  90%|████████▉ | 88205/98522 [03:42<00:12, 830.88frame/s]

Slide change detected at 2940.19 seconds (frame 88205). SSIM: 0.8888


Frames Processed:  98%|█████████▊| 96876/98522 [04:06<00:07, 231.30frame/s]

Slide change detected at 3229.22 seconds (frame 96876). SSIM: 0.8153


Frames Processed: 100%|█████████▉| 98223/98522 [04:11<00:00, 390.89frame/s]


Adding final slide change at end of video (3284.09 seconds).
Total slide changes detected: 25

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/25 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.93s
Transcribing audio for segment 0 from 0.00s to 19.93s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   4%|▍         | 1/25 [00:05<02:04,  5.17s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.93s to 139.53s
Transcribing audio for segment 1 from 19.93s to 139.53s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   8%|▊         | 2/25 [00:18<03:43,  9.72s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 139.53s to 249.17s
Transcribing audio for segment 2 from 139.53s to 249.17s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  12%|█▏        | 3/25 [00:29<03:46, 10.30s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 249.17s to 478.40s
Transcribing audio for segment 3 from 249.17s to 478.40s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  16%|█▌        | 4/25 [00:46<04:36, 13.18s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 478.40s to 528.24s
Transcribing audio for segment 4 from 478.40s to 528.24s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  20%|██        | 5/25 [00:54<03:45, 11.25s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 528.24s to 767.44s
Transcribing audio for segment 5 from 528.24s to 767.44s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  24%|██▍       | 6/25 [01:15<04:33, 14.41s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 767.44s to 946.84s
Transcribing audio for segment 6 from 767.44s to 946.84s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  28%|██▊       | 7/25 [01:31<04:28, 14.94s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 946.84s to 1096.34s
Transcribing audio for segment 7 from 946.84s to 1096.34s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  32%|███▏      | 8/25 [01:44<04:07, 14.57s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 1096.34s to 1255.81s
Transcribing audio for segment 8 from 1096.34s to 1255.81s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  36%|███▌      | 9/25 [01:59<03:54, 14.67s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 1255.81s to 1365.44s
Transcribing audio for segment 9 from 1255.81s to 1365.44s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  40%|████      | 10/25 [02:13<03:35, 14.35s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1365.44s to 1445.18s
Transcribing audio for segment 10 from 1365.44s to 1445.18s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  44%|████▍     | 11/25 [02:23<03:01, 12.94s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1445.18s to 1514.94s
Transcribing audio for segment 11 from 1445.18s to 1514.94s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  48%|████▊     | 12/25 [02:35<02:44, 12.66s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1514.94s to 1604.64s
Transcribing audio for segment 12 from 1514.94s to 1604.64s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  52%|█████▏    | 13/25 [02:45<02:22, 11.86s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1604.64s to 1774.08s
Transcribing audio for segment 13 from 1604.64s to 1774.08s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  56%|█████▌    | 14/25 [03:04<02:34, 14.04s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1774.08s to 1873.75s
Transcribing audio for segment 14 from 1774.08s to 1873.75s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  60%|██████    | 15/25 [03:18<02:20, 14.10s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1873.75s to 1993.35s
Transcribing audio for segment 15 from 1873.75s to 1993.35s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  64%|██████▍   | 16/25 [04:52<05:43, 38.20s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1993.35s to 2063.11s
Transcribing audio for segment 16 from 1993.35s to 2063.11s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  68%|██████▊   | 17/25 [05:04<04:03, 30.43s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 2063.11s to 2312.28s
Transcribing audio for segment 17 from 2063.11s to 2312.28s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  72%|███████▏  | 18/25 [05:23<03:08, 26.99s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 2312.28s to 2461.78s
Transcribing audio for segment 18 from 2312.28s to 2461.78s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  76%|███████▌  | 19/25 [05:43<02:28, 24.68s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 2461.78s to 2531.55s
Transcribing audio for segment 19 from 2461.78s to 2531.55s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  80%|████████  | 20/25 [05:54<01:43, 20.62s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 2531.55s to 2750.82s
Transcribing audio for segment 20 from 2531.55s to 2750.82s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  84%|████████▍ | 21/25 [06:16<01:24, 21.02s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 2750.82s to 2820.59s
Transcribing audio for segment 21 from 2750.82s to 2820.59s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  88%|████████▊ | 22/25 [06:28<00:54, 18.29s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 2820.59s to 2940.19s
Transcribing audio for segment 22 from 2820.59s to 2940.19s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  92%|█████████▏| 23/25 [06:41<00:33, 16.88s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 2940.19s to 3229.22s
Transcribing audio for segment 23 from 2940.19s to 3229.22s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  96%|█████████▌| 24/25 [07:05<00:18, 18.96s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 3229.22s to 3284.09s
Transcribing audio for segment 24 from 3229.22s to 3284.09s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed: 100%|██████████| 25/25 [07:21<00:00, 17.66s/segment]

Summary generation for segment 24 completed.

Processing completed.





Document saved as /content/outputs/Fabasoft Management Academy - Basismodul 1 - Dynamische Stabilität von Organisationen TEIL 1.docx
Successfully processed: /content/Fabasoft Management Academy - Basismodul 1 - Dynamische Stabilität von Organisationen TEIL 1.mp4
Output saved as: /content/outputs/Fabasoft Management Academy - Basismodul 1 - Dynamische Stabilität von Organisationen TEIL 1.docx

Processing: /content/Managing High-Performance Teams - Teil 02.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 52075
Video duration: 1735.83 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 300/52075 [00:00<02:39, 324.78frame/s]

Slide change detected at 10.00 seconds (frame 300). SSIM: 0.4908


Frames Processed:   3%|▎         | 1500/52075 [00:03<01:46, 474.79frame/s]

Slide change detected at 50.00 seconds (frame 1500). SSIM: 0.7127


Frames Processed:  10%|▉         | 5100/52075 [00:09<01:12, 647.95frame/s]

Slide change detected at 170.00 seconds (frame 5100). SSIM: 0.8526


Frames Processed:  17%|█▋        | 8700/52075 [00:15<01:32, 469.11frame/s]

Slide change detected at 290.00 seconds (frame 8700). SSIM: 0.8638


Frames Processed:  32%|███▏      | 16800/52075 [00:29<01:14, 471.76frame/s]

Slide change detected at 560.00 seconds (frame 16800). SSIM: 0.8816


Frames Processed:  37%|███▋      | 19200/52075 [00:32<00:51, 640.16frame/s]

Slide change detected at 640.00 seconds (frame 19200). SSIM: 0.8961


Frames Processed:  37%|███▋      | 19500/52075 [00:33<00:52, 624.86frame/s]

Slide change detected at 650.00 seconds (frame 19500). SSIM: 0.8077


Frames Processed:  49%|████▉     | 25500/52075 [00:43<00:45, 577.73frame/s]

Slide change detected at 850.00 seconds (frame 25500). SSIM: 0.8105


Frames Processed:  89%|████████▊ | 46200/52075 [01:18<00:09, 632.47frame/s]

Slide change detected at 1540.00 seconds (frame 46200). SSIM: 0.8950


Frames Processed:  96%|█████████▌| 50100/52075 [01:25<00:03, 628.43frame/s]

Slide change detected at 1670.00 seconds (frame 50100). SSIM: 0.8310


Frames Processed:  99%|█████████▉| 51775/52075 [01:27<00:00, 589.02frame/s]


Adding final slide change at end of video (1735.83 seconds).
Total slide changes detected: 11

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/11 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 10.00s
Transcribing audio for segment 0 from 0.00s to 10.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   9%|▉         | 1/11 [00:06<01:05,  6.53s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 10.00s to 50.00s
Transcribing audio for segment 1 from 10.00s to 50.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  18%|█▊        | 2/11 [00:17<01:19,  8.87s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 50.00s to 170.00s
Transcribing audio for segment 2 from 50.00s to 170.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  27%|██▋       | 3/11 [00:30<01:26, 10.80s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 170.00s to 290.00s
Transcribing audio for segment 3 from 170.00s to 290.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  36%|███▋      | 4/11 [00:42<01:20, 11.45s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 290.00s to 560.00s
Transcribing audio for segment 4 from 290.00s to 560.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  45%|████▌     | 5/11 [01:04<01:32, 15.39s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 560.00s to 640.00s
Transcribing audio for segment 5 from 560.00s to 640.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  55%|█████▍    | 6/11 [01:17<01:12, 14.49s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 640.00s to 650.00s
Transcribing audio for segment 6 from 640.00s to 650.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  64%|██████▎   | 7/11 [01:25<00:49, 12.39s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 650.00s to 850.00s
Transcribing audio for segment 7 from 650.00s to 850.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  73%|███████▎  | 8/11 [01:47<00:45, 15.25s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 850.00s to 1540.00s
Transcribing audio for segment 8 from 850.00s to 1540.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  82%|████████▏ | 9/11 [02:25<00:44, 22.34s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 1540.00s to 1670.00s
Transcribing audio for segment 9 from 1540.00s to 1670.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  91%|█████████ | 10/11 [02:39<00:19, 19.76s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1670.00s to 1735.83s
Transcribing audio for segment 10 from 1670.00s to 1735.83s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed: 100%|██████████| 11/11 [02:52<00:00, 15.65s/segment]

Summary generation for segment 10 completed.

Processing completed.





Document saved as /content/outputs/Managing High-Performance Teams - Teil 02.docx
Successfully processed: /content/Managing High-Performance Teams - Teil 02.mp4
Output saved as: /content/outputs/Managing High-Performance Teams - Teil 02.docx

Processing: /content/Basic Finance - Teil 01.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 64449
Video duration: 2148.30 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 600/64449 [00:01<03:06, 342.44frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.5446


Frames Processed:   4%|▎         | 2400/64449 [00:05<02:05, 493.50frame/s]

Slide change detected at 80.00 seconds (frame 2400). SSIM: 0.8589


Frames Processed:  10%|█         | 6600/64449 [00:11<01:17, 748.13frame/s]

Slide change detected at 220.00 seconds (frame 6600). SSIM: 0.8025


Frames Processed:  15%|█▍        | 9600/64449 [00:15<01:08, 796.65frame/s]

Slide change detected at 320.00 seconds (frame 9600). SSIM: 0.8143


Frames Processed:  20%|█▉        | 12600/64449 [00:19<01:07, 766.13frame/s]

Slide change detected at 420.00 seconds (frame 12600). SSIM: 0.8806


Frames Processed:  25%|██▍       | 15900/64449 [00:25<01:48, 448.21frame/s]

Slide change detected at 530.00 seconds (frame 15900). SSIM: 0.8546


Frames Processed:  30%|██▉       | 19200/64449 [00:36<02:35, 291.37frame/s]

Slide change detected at 640.00 seconds (frame 19200). SSIM: 0.7994


Frames Processed:  33%|███▎      | 21000/64449 [00:41<02:01, 356.56frame/s]

Slide change detected at 700.00 seconds (frame 21000). SSIM: 0.8333


Frames Processed:  44%|████▍     | 28200/64449 [01:02<02:02, 295.56frame/s]

Slide change detected at 940.00 seconds (frame 28200). SSIM: 0.7699


Frames Processed:  46%|████▌     | 29700/64449 [01:06<01:23, 414.81frame/s]

Slide change detected at 990.00 seconds (frame 29700). SSIM: 0.6939


Frames Processed:  49%|████▉     | 31500/64449 [01:08<00:56, 581.03frame/s]

Slide change detected at 1050.00 seconds (frame 31500). SSIM: 0.6892


Frames Processed:  58%|█████▊    | 37200/64449 [01:22<01:00, 452.31frame/s]

Slide change detected at 1240.00 seconds (frame 37200). SSIM: 0.8185


Frames Processed:  62%|██████▏   | 39900/64449 [01:29<01:07, 364.09frame/s]

Slide change detected at 1330.00 seconds (frame 39900). SSIM: 0.7656


Frames Processed:  68%|██████▊   | 44100/64449 [01:35<00:24, 847.36frame/s]

Slide change detected at 1470.00 seconds (frame 44100). SSIM: 0.7995


Frames Processed:  69%|██████▉   | 44700/64449 [01:35<00:23, 844.87frame/s]

Slide change detected at 1490.00 seconds (frame 44700). SSIM: 0.8067


Frames Processed:  70%|██████▉   | 45000/64449 [01:36<00:23, 828.34frame/s]

Slide change detected at 1500.00 seconds (frame 45000). SSIM: 0.8028


Frames Processed:  74%|███████▍  | 48000/64449 [01:39<00:23, 709.61frame/s]

Slide change detected at 1600.00 seconds (frame 48000). SSIM: 0.8994


Frames Processed:  76%|███████▋  | 49200/64449 [01:42<00:27, 546.90frame/s]

Slide change detected at 1640.00 seconds (frame 49200). SSIM: 0.8504


Frames Processed:  86%|████████▌ | 55200/64449 [01:56<00:21, 438.72frame/s]

Slide change detected at 1840.00 seconds (frame 55200). SSIM: 0.7783


Frames Processed:  96%|█████████▋| 62100/64449 [02:07<00:04, 520.02frame/s]

Slide change detected at 2070.00 seconds (frame 62100). SSIM: 0.7789


Frames Processed:  97%|█████████▋| 62700/64449 [02:08<00:03, 532.14frame/s]

Slide change detected at 2090.00 seconds (frame 62700). SSIM: 0.7807


Frames Processed:  98%|█████████▊| 63000/64449 [02:09<00:02, 541.08frame/s]

Slide change detected at 2100.00 seconds (frame 63000). SSIM: 0.8500


Frames Processed: 100%|█████████▉| 64149/64449 [02:11<00:00, 489.34frame/s]


Adding final slide change at end of video (2148.30 seconds).
Total slide changes detected: 23

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/23 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 20.00s
Transcribing audio for segment 0 from 0.00s to 20.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   4%|▍         | 1/23 [00:06<02:30,  6.83s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 20.00s to 80.00s
Transcribing audio for segment 1 from 20.00s to 80.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   9%|▊         | 2/23 [00:16<02:57,  8.45s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 80.00s to 220.00s
Transcribing audio for segment 2 from 80.00s to 220.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  13%|█▎        | 3/23 [00:33<04:05, 12.29s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 220.00s to 320.00s
Transcribing audio for segment 3 from 220.00s to 320.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  17%|█▋        | 4/23 [00:46<03:57, 12.50s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 320.00s to 420.00s
Transcribing audio for segment 4 from 320.00s to 420.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  22%|██▏       | 5/23 [01:00<03:54, 13.03s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 420.00s to 530.00s
Transcribing audio for segment 5 from 420.00s to 530.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  26%|██▌       | 6/23 [01:14<03:51, 13.63s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 530.00s to 640.00s
Transcribing audio for segment 6 from 530.00s to 640.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  30%|███       | 7/23 [01:28<03:37, 13.57s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 640.00s to 700.00s
Transcribing audio for segment 7 from 640.00s to 700.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  35%|███▍      | 8/23 [01:39<03:12, 12.81s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 700.00s to 940.00s
Transcribing audio for segment 8 from 700.00s to 940.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  39%|███▉      | 9/23 [01:58<03:28, 14.86s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 940.00s to 990.00s
Transcribing audio for segment 9 from 940.00s to 990.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  43%|████▎     | 10/23 [02:08<02:51, 13.19s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 990.00s to 1050.00s
Transcribing audio for segment 10 from 990.00s to 1050.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  48%|████▊     | 11/23 [02:19<02:29, 12.46s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1050.00s to 1240.00s
Transcribing audio for segment 11 from 1050.00s to 1240.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  52%|█████▏    | 12/23 [02:40<02:46, 15.15s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1240.00s to 1330.00s
Transcribing audio for segment 12 from 1240.00s to 1330.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  57%|█████▋    | 13/23 [02:56<02:34, 15.46s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1330.00s to 1470.00s
Transcribing audio for segment 13 from 1330.00s to 1470.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  61%|██████    | 14/23 [03:12<02:20, 15.60s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1470.00s to 1490.00s
Transcribing audio for segment 14 from 1470.00s to 1490.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  65%|██████▌   | 15/23 [03:24<01:55, 14.47s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1490.00s to 1500.00s
Transcribing audio for segment 15 from 1490.00s to 1500.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  70%|██████▉   | 16/23 [03:35<01:33, 13.36s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1500.00s to 1600.00s
Transcribing audio for segment 16 from 1500.00s to 1600.00s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  74%|███████▍  | 17/23 [03:48<01:20, 13.37s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1600.00s to 1640.00s
Transcribing audio for segment 17 from 1600.00s to 1640.00s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  78%|███████▊  | 18/23 [03:57<01:00, 12.14s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1640.00s to 1840.00s
Transcribing audio for segment 18 from 1640.00s to 1840.00s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  83%|████████▎ | 19/23 [04:18<00:58, 14.58s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1840.00s to 2070.00s
Transcribing audio for segment 19 from 1840.00s to 2070.00s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  87%|████████▋ | 20/23 [04:41<00:51, 17.14s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 2070.00s to 2090.00s
Transcribing audio for segment 20 from 2070.00s to 2090.00s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  91%|█████████▏| 21/23 [04:58<00:34, 17.25s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 2090.00s to 2100.00s
Transcribing audio for segment 21 from 2090.00s to 2100.00s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  96%|█████████▌| 22/23 [05:09<00:15, 15.23s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 2100.00s to 2148.30s
Transcribing audio for segment 22 from 2100.00s to 2148.30s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed: 100%|██████████| 23/23 [05:21<00:00, 13.97s/segment]

Summary generation for segment 22 completed.

Processing completed.





Document saved as /content/outputs/Basic Finance - Teil 01.docx
Successfully processed: /content/Basic Finance - Teil 01.mp4
Output saved as: /content/outputs/Basic Finance - Teil 01.docx

Processing: /content/Brand Management 1_ Einleitung.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 101107
Video duration: 3370.23 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   4%|▍         | 3900/101107 [00:18<07:16, 222.78frame/s]

Slide change detected at 130.00 seconds (frame 3900). SSIM: 0.6358


Frames Processed:   4%|▍         | 4200/101107 [00:19<08:09, 198.01frame/s]

Slide change detected at 140.00 seconds (frame 4200). SSIM: 0.6378


Frames Processed:  18%|█▊        | 18300/101107 [01:19<05:00, 275.87frame/s]

Slide change detected at 610.00 seconds (frame 18300). SSIM: 0.4561


Frames Processed:  19%|█▉        | 19500/101107 [01:22<03:41, 368.23frame/s]

Slide change detected at 650.00 seconds (frame 19500). SSIM: 0.3772


Frames Processed:  20%|█▉        | 19800/101107 [01:22<03:26, 393.61frame/s]

Slide change detected at 660.00 seconds (frame 19800). SSIM: 0.6681


Frames Processed:  20%|█▉        | 20100/101107 [01:23<03:14, 416.36frame/s]

Slide change detected at 670.00 seconds (frame 20100). SSIM: 0.7453


Frames Processed:  20%|██        | 20400/101107 [01:24<03:16, 410.18frame/s]

Slide change detected at 680.00 seconds (frame 20400). SSIM: 0.8060


Frames Processed:  20%|██        | 20700/101107 [01:25<03:31, 379.68frame/s]

Slide change detected at 690.00 seconds (frame 20700). SSIM: 0.8145


Frames Processed:  21%|██        | 21000/101107 [01:25<03:42, 359.61frame/s]

Slide change detected at 700.00 seconds (frame 21000). SSIM: 0.8996


Frames Processed:  21%|██        | 21300/101107 [01:26<03:49, 347.22frame/s]

Slide change detected at 710.00 seconds (frame 21300). SSIM: 0.8828


Frames Processed:  22%|██▏       | 22500/101107 [01:29<02:58, 439.29frame/s]

Slide change detected at 750.00 seconds (frame 22500). SSIM: 0.8235


Frames Processed:  23%|██▎       | 22800/101107 [01:30<03:16, 398.85frame/s]

Slide change detected at 760.00 seconds (frame 22800). SSIM: 0.8416


Frames Processed:  23%|██▎       | 23100/101107 [01:31<03:26, 377.48frame/s]

Slide change detected at 770.00 seconds (frame 23100). SSIM: 0.2477


Frames Processed:  23%|██▎       | 23400/101107 [01:32<03:29, 371.75frame/s]

Slide change detected at 780.00 seconds (frame 23400). SSIM: 0.2680


Frames Processed:  23%|██▎       | 23700/101107 [01:32<03:32, 363.82frame/s]

Slide change detected at 790.00 seconds (frame 23700). SSIM: 0.8534


Frames Processed:  24%|██▎       | 24000/101107 [01:33<03:33, 360.84frame/s]

Slide change detected at 800.00 seconds (frame 24000). SSIM: 0.2216


Frames Processed:  25%|██▍       | 24900/101107 [01:36<03:35, 354.45frame/s]

Slide change detected at 830.00 seconds (frame 24900). SSIM: 0.2489


Frames Processed:  25%|██▍       | 25200/101107 [01:37<03:42, 340.74frame/s]

Slide change detected at 840.00 seconds (frame 25200). SSIM: 0.8404


Frames Processed:  25%|██▌       | 25500/101107 [01:38<04:22, 288.27frame/s]

Slide change detected at 850.00 seconds (frame 25500). SSIM: 0.7828


Frames Processed:  26%|██▌       | 25800/101107 [01:40<04:43, 265.27frame/s]

Slide change detected at 860.00 seconds (frame 25800). SSIM: 0.8383


Frames Processed:  26%|██▌       | 26100/101107 [01:41<04:46, 261.98frame/s]

Slide change detected at 870.00 seconds (frame 26100). SSIM: 0.2688


Frames Processed:  26%|██▌       | 26400/101107 [01:42<04:35, 271.39frame/s]

Slide change detected at 880.00 seconds (frame 26400). SSIM: 0.2571


Frames Processed:  27%|██▋       | 27000/101107 [01:44<04:22, 282.18frame/s]

Slide change detected at 900.00 seconds (frame 27000). SSIM: 0.7956


Frames Processed:  27%|██▋       | 27300/101107 [01:45<04:38, 264.87frame/s]

Slide change detected at 910.00 seconds (frame 27300). SSIM: 0.5694


Frames Processed:  30%|██▉       | 30000/101107 [01:56<05:18, 223.10frame/s]

Slide change detected at 1000.00 seconds (frame 30000). SSIM: 0.3000


Frames Processed:  31%|███▏      | 31800/101107 [02:06<06:52, 168.07frame/s]

Slide change detected at 1060.00 seconds (frame 31800). SSIM: 0.2063


Frames Processed:  32%|███▏      | 32700/101107 [02:10<05:51, 194.37frame/s]

Slide change detected at 1090.00 seconds (frame 32700). SSIM: 0.1060


Frames Processed:  34%|███▎      | 33900/101107 [02:15<05:08, 217.97frame/s]

Slide change detected at 1130.00 seconds (frame 33900). SSIM: 0.0914


Frames Processed:  35%|███▌      | 35700/101107 [02:24<04:57, 220.13frame/s]

Slide change detected at 1190.00 seconds (frame 35700). SSIM: 0.6945


Frames Processed:  39%|███▉      | 39300/101107 [02:39<03:44, 274.71frame/s]

Slide change detected at 1310.00 seconds (frame 39300). SSIM: 0.4326


Frames Processed:  40%|███▉      | 40200/101107 [02:42<03:01, 336.09frame/s]

Slide change detected at 1340.00 seconds (frame 40200). SSIM: 0.5041


Frames Processed:  41%|████      | 41100/101107 [02:45<03:39, 273.34frame/s]

Slide change detected at 1370.00 seconds (frame 41100). SSIM: 0.3102


Frames Processed:  41%|████      | 41400/101107 [02:46<03:42, 268.27frame/s]

Slide change detected at 1380.00 seconds (frame 41400). SSIM: 0.3351


Frames Processed:  43%|████▎     | 43500/101107 [02:52<02:38, 363.64frame/s]

Slide change detected at 1450.00 seconds (frame 43500). SSIM: 0.4805


Frames Processed:  45%|████▌     | 45900/101107 [02:59<03:03, 301.44frame/s]

Slide change detected at 1530.00 seconds (frame 45900). SSIM: 0.8136


Frames Processed:  51%|█████     | 51300/101107 [03:13<02:24, 344.98frame/s]

Slide change detected at 1710.00 seconds (frame 51300). SSIM: 0.8958


Frames Processed:  54%|█████▎    | 54300/101107 [03:22<02:39, 293.73frame/s]

Slide change detected at 1810.00 seconds (frame 54300). SSIM: 0.5921


Frames Processed:  60%|██████    | 60900/101107 [03:41<01:54, 350.35frame/s]

Slide change detected at 2030.00 seconds (frame 60900). SSIM: 0.8278


Frames Processed:  63%|██████▎   | 63600/101107 [03:47<01:30, 413.83frame/s]

Slide change detected at 2120.00 seconds (frame 63600). SSIM: 0.8488


Frames Processed:  69%|██████▉   | 69900/101107 [04:04<01:26, 360.31frame/s]

Slide change detected at 2330.00 seconds (frame 69900). SSIM: 0.8483


Frames Processed:  73%|███████▎  | 73800/101107 [04:13<01:06, 407.92frame/s]

Slide change detected at 2460.00 seconds (frame 73800). SSIM: 0.6204


Frames Processed:  74%|███████▍  | 75300/101107 [04:18<01:32, 279.39frame/s]

Slide change detected at 2510.00 seconds (frame 75300). SSIM: 0.3736


Frames Processed:  76%|███████▋  | 77100/101107 [04:24<01:12, 328.89frame/s]

Slide change detected at 2570.00 seconds (frame 77100). SSIM: 0.2785


Frames Processed:  79%|███████▊  | 79500/101107 [04:31<01:17, 280.15frame/s]

Slide change detected at 2650.00 seconds (frame 79500). SSIM: 0.2672


Frames Processed:  80%|████████  | 81000/101107 [04:35<00:58, 345.57frame/s]

Slide change detected at 2700.00 seconds (frame 81000). SSIM: 0.4266


Frames Processed:  82%|████████▏ | 83400/101107 [04:40<00:35, 504.76frame/s]

Slide change detected at 2780.00 seconds (frame 83400). SSIM: 0.4352


Frames Processed:  84%|████████▍ | 84900/101107 [04:44<00:48, 335.06frame/s]

Slide change detected at 2830.00 seconds (frame 84900). SSIM: 0.2844


Frames Processed:  87%|████████▋ | 88200/101107 [04:54<00:43, 299.86frame/s]

Slide change detected at 2940.00 seconds (frame 88200). SSIM: 0.2778


Frames Processed:  88%|████████▊ | 89400/101107 [05:00<00:51, 225.74frame/s]

Slide change detected at 2980.00 seconds (frame 89400). SSIM: 0.4422


Frames Processed:  90%|█████████ | 91200/101107 [05:07<00:41, 240.50frame/s]

Slide change detected at 3040.00 seconds (frame 91200). SSIM: 0.3957


Frames Processed:  92%|█████████▏| 93000/101107 [05:15<00:28, 280.62frame/s]

Slide change detected at 3100.00 seconds (frame 93000). SSIM: 0.4585


Frames Processed:  94%|█████████▍| 95100/101107 [05:18<00:10, 580.87frame/s]

Slide change detected at 3170.00 seconds (frame 95100). SSIM: 0.7747


Frames Processed:  94%|█████████▍| 95400/101107 [05:19<00:09, 589.99frame/s]

Slide change detected at 3180.00 seconds (frame 95400). SSIM: 0.7149


Frames Processed:  96%|█████████▌| 97200/101107 [05:23<00:10, 368.08frame/s]

Slide change detected at 3240.00 seconds (frame 97200). SSIM: 0.4528


Frames Processed:  97%|█████████▋| 98100/101107 [05:28<00:13, 222.51frame/s]

Slide change detected at 3270.00 seconds (frame 98100). SSIM: 0.3442


Frames Processed:  98%|█████████▊| 99000/101107 [05:32<00:10, 209.33frame/s]

Slide change detected at 3300.00 seconds (frame 99000). SSIM: 0.2760


Frames Processed:  99%|█████████▉| 99900/101107 [05:36<00:04, 251.37frame/s]

Slide change detected at 3330.00 seconds (frame 99900). SSIM: 0.3692


Frames Processed: 100%|█████████▉| 100807/101107 [05:38<00:01, 297.85frame/s]


Adding final slide change at end of video (3370.23 seconds).
Total slide changes detected: 58

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/58 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 130.00s
Transcribing audio for segment 0 from 0.00s to 130.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   2%|▏         | 1/58 [00:18<17:32, 18.47s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 130.00s to 140.00s
Transcribing audio for segment 1 from 130.00s to 140.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   3%|▎         | 2/58 [00:24<10:34, 11.33s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 140.00s to 610.00s
Transcribing audio for segment 2 from 140.00s to 610.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   5%|▌         | 3/58 [00:48<15:40, 17.10s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 610.00s to 650.00s
Transcribing audio for segment 3 from 610.00s to 650.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:   7%|▋         | 4/58 [00:57<12:23, 13.76s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 650.00s to 660.00s
Transcribing audio for segment 4 from 650.00s to 660.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:   9%|▊         | 5/58 [01:03<09:45, 11.05s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 660.00s to 670.00s
Transcribing audio for segment 5 from 660.00s to 670.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  10%|█         | 6/58 [01:10<08:14,  9.51s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 670.00s to 680.00s
Transcribing audio for segment 6 from 670.00s to 680.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  12%|█▏        | 7/58 [01:17<07:22,  8.67s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 680.00s to 690.00s
Transcribing audio for segment 7 from 680.00s to 690.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  14%|█▍        | 8/58 [01:25<07:01,  8.43s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 690.00s to 700.00s
Transcribing audio for segment 8 from 690.00s to 700.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  16%|█▌        | 9/58 [01:34<07:08,  8.75s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 700.00s to 710.00s
Transcribing audio for segment 9 from 700.00s to 710.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  17%|█▋        | 10/58 [01:41<06:35,  8.23s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 710.00s to 750.00s
Transcribing audio for segment 10 from 710.00s to 750.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  19%|█▉        | 11/58 [01:50<06:41,  8.55s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 750.00s to 760.00s
Transcribing audio for segment 11 from 750.00s to 760.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  21%|██        | 12/58 [01:57<06:12,  8.10s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 760.00s to 770.00s
Transcribing audio for segment 12 from 760.00s to 770.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  22%|██▏       | 13/58 [02:05<05:57,  7.95s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 770.00s to 780.00s
Transcribing audio for segment 13 from 770.00s to 780.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  24%|██▍       | 14/58 [02:11<05:20,  7.28s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 780.00s to 790.00s
Transcribing audio for segment 14 from 780.00s to 790.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  26%|██▌       | 15/58 [02:19<05:23,  7.53s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 790.00s to 800.00s
Transcribing audio for segment 15 from 790.00s to 800.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  28%|██▊       | 16/58 [02:28<05:36,  8.02s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 800.00s to 830.00s
Transcribing audio for segment 16 from 800.00s to 830.00s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  29%|██▉       | 17/58 [02:36<05:22,  7.86s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 830.00s to 840.00s
Transcribing audio for segment 17 from 830.00s to 840.00s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  31%|███       | 18/58 [02:42<05:00,  7.52s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 840.00s to 850.00s
Transcribing audio for segment 18 from 840.00s to 850.00s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  33%|███▎      | 19/58 [02:48<04:31,  6.95s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 850.00s to 860.00s
Transcribing audio for segment 19 from 850.00s to 860.00s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  34%|███▍      | 20/58 [02:55<04:24,  6.95s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 860.00s to 870.00s
Transcribing audio for segment 20 from 860.00s to 870.00s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  36%|███▌      | 21/58 [03:02<04:15,  6.89s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 870.00s to 880.00s
Transcribing audio for segment 21 from 870.00s to 880.00s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  38%|███▊      | 22/58 [03:09<04:18,  7.18s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 880.00s to 900.00s
Transcribing audio for segment 22 from 880.00s to 900.00s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  40%|███▉      | 23/58 [03:18<04:28,  7.67s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 900.00s to 910.00s
Transcribing audio for segment 23 from 900.00s to 910.00s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  41%|████▏     | 24/58 [03:26<04:21,  7.68s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 910.00s to 1000.00s
Transcribing audio for segment 24 from 910.00s to 1000.00s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  43%|████▎     | 25/58 [03:37<04:49,  8.78s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 1000.00s to 1060.00s
Transcribing audio for segment 25 from 1000.00s to 1060.00s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  45%|████▍     | 26/58 [03:48<05:00,  9.40s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 1060.00s to 1090.00s
Transcribing audio for segment 26 from 1060.00s to 1090.00s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  47%|████▋     | 27/58 [03:59<05:08,  9.94s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 1090.00s to 1130.00s
Transcribing audio for segment 27 from 1090.00s to 1130.00s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  48%|████▊     | 28/58 [04:07<04:40,  9.36s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 1130.00s to 1190.00s
Transcribing audio for segment 28 from 1130.00s to 1190.00s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  50%|█████     | 29/58 [04:19<04:50, 10.02s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 1190.00s to 1310.00s
Transcribing audio for segment 29 from 1190.00s to 1310.00s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  52%|█████▏    | 30/58 [04:34<05:19, 11.41s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 1310.00s to 1340.00s
Transcribing audio for segment 30 from 1310.00s to 1340.00s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  53%|█████▎    | 31/58 [04:41<04:38, 10.31s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 1340.00s to 1370.00s
Transcribing audio for segment 31 from 1340.00s to 1370.00s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  55%|█████▌    | 32/58 [04:50<04:17,  9.90s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 1370.00s to 1380.00s
Transcribing audio for segment 32 from 1370.00s to 1380.00s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  57%|█████▋    | 33/58 [04:59<03:58,  9.55s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 1380.00s to 1450.00s
Transcribing audio for segment 33 from 1380.00s to 1450.00s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  59%|█████▊    | 34/58 [05:12<04:14, 10.60s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 1450.00s to 1530.00s
Transcribing audio for segment 34 from 1450.00s to 1530.00s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  60%|██████    | 35/58 [05:33<05:16, 13.77s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 1530.00s to 1710.00s
Transcribing audio for segment 35 from 1530.00s to 1710.00s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed:  62%|██████▏   | 36/58 [05:50<05:24, 14.74s/segment]

Summary generation for segment 35 completed.

Processing segment 36: 1710.00s to 1810.00s
Transcribing audio for segment 36 from 1710.00s to 1810.00s...
Transcription for segment 36 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 36...


Segments Processed:  64%|██████▍   | 37/58 [06:07<05:25, 15.51s/segment]

Summary generation for segment 36 completed.

Processing segment 37: 1810.00s to 2030.00s
Transcribing audio for segment 37 from 1810.00s to 2030.00s...
Transcription for segment 37 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 37...


Segments Processed:  66%|██████▌   | 38/58 [06:29<05:43, 17.19s/segment]

Summary generation for segment 37 completed.

Processing segment 38: 2030.00s to 2120.00s
Transcribing audio for segment 38 from 2030.00s to 2120.00s...
Transcription for segment 38 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 38...


Segments Processed:  67%|██████▋   | 39/58 [06:42<05:03, 15.96s/segment]

Summary generation for segment 38 completed.

Processing segment 39: 2120.00s to 2330.00s
Transcribing audio for segment 39 from 2120.00s to 2330.00s...
Transcription for segment 39 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 39...


Segments Processed:  69%|██████▉   | 40/58 [07:03<05:14, 17.46s/segment]

Summary generation for segment 39 completed.

Processing segment 40: 2330.00s to 2460.00s
Transcribing audio for segment 40 from 2330.00s to 2460.00s...
Transcription for segment 40 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 40...


Segments Processed:  71%|███████   | 41/58 [07:18<04:46, 16.88s/segment]

Summary generation for segment 40 completed.

Processing segment 41: 2460.00s to 2510.00s
Transcribing audio for segment 41 from 2460.00s to 2510.00s...
Transcription for segment 41 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 41...


Segments Processed:  72%|███████▏  | 42/58 [07:29<04:01, 15.12s/segment]

Summary generation for segment 41 completed.

Processing segment 42: 2510.00s to 2570.00s
Transcribing audio for segment 42 from 2510.00s to 2570.00s...
Transcription for segment 42 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 42...


Segments Processed:  74%|███████▍  | 43/58 [07:40<03:29, 13.97s/segment]

Summary generation for segment 42 completed.

Processing segment 43: 2570.00s to 2650.00s
Transcribing audio for segment 43 from 2570.00s to 2650.00s...
Transcription for segment 43 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 43...


Segments Processed:  76%|███████▌  | 44/58 [08:02<03:48, 16.34s/segment]

Summary generation for segment 43 completed.

Processing segment 44: 2650.00s to 2700.00s
Transcribing audio for segment 44 from 2650.00s to 2700.00s...
Transcription for segment 44 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 44...


Segments Processed:  78%|███████▊  | 45/58 [08:14<03:14, 14.97s/segment]

Summary generation for segment 44 completed.

Processing segment 45: 2700.00s to 2780.00s
Transcribing audio for segment 45 from 2700.00s to 2780.00s...
Transcription for segment 45 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 45...


Segments Processed:  79%|███████▉  | 46/58 [08:31<03:04, 15.40s/segment]

Summary generation for segment 45 completed.

Processing segment 46: 2780.00s to 2830.00s
Transcribing audio for segment 46 from 2780.00s to 2830.00s...
Transcription for segment 46 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 46...


Segments Processed:  81%|████████  | 47/58 [08:46<02:49, 15.41s/segment]

Summary generation for segment 46 completed.

Processing segment 47: 2830.00s to 2940.00s
Transcribing audio for segment 47 from 2830.00s to 2940.00s...
Transcription for segment 47 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 47...


Segments Processed:  83%|████████▎ | 48/58 [09:04<02:41, 16.16s/segment]

Summary generation for segment 47 completed.

Processing segment 48: 2940.00s to 2980.00s
Transcribing audio for segment 48 from 2940.00s to 2980.00s...
Transcription for segment 48 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 48...


Segments Processed:  84%|████████▍ | 49/58 [09:16<02:14, 14.94s/segment]

Summary generation for segment 48 completed.

Processing segment 49: 2980.00s to 3040.00s
Transcribing audio for segment 49 from 2980.00s to 3040.00s...
Transcription for segment 49 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 49...


Segments Processed:  86%|████████▌ | 50/58 [09:28<01:53, 14.20s/segment]

Summary generation for segment 49 completed.

Processing segment 50: 3040.00s to 3100.00s
Transcribing audio for segment 50 from 3040.00s to 3100.00s...
Transcription for segment 50 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 50...


Segments Processed:  88%|████████▊ | 51/58 [09:41<01:36, 13.82s/segment]

Summary generation for segment 50 completed.

Processing segment 51: 3100.00s to 3170.00s
Transcribing audio for segment 51 from 3100.00s to 3170.00s...
Transcription for segment 51 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 51...


Segments Processed:  90%|████████▉ | 52/58 [09:56<01:24, 14.11s/segment]

Summary generation for segment 51 completed.

Processing segment 52: 3170.00s to 3180.00s
Transcribing audio for segment 52 from 3170.00s to 3180.00s...
Transcription for segment 52 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 52...


Segments Processed:  91%|█████████▏| 53/58 [10:08<01:07, 13.50s/segment]

Summary generation for segment 52 completed.

Processing segment 53: 3180.00s to 3240.00s
Transcribing audio for segment 53 from 3180.00s to 3240.00s...
Transcription for segment 53 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 53...


Segments Processed:  93%|█████████▎| 54/58 [10:24<00:56, 14.21s/segment]

Summary generation for segment 53 completed.

Processing segment 54: 3240.00s to 3270.00s
Transcribing audio for segment 54 from 3240.00s to 3270.00s...
Transcription for segment 54 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 54...


Segments Processed:  95%|█████████▍| 55/58 [10:36<00:40, 13.63s/segment]

Summary generation for segment 54 completed.

Processing segment 55: 3270.00s to 3300.00s
Transcribing audio for segment 55 from 3270.00s to 3300.00s...
Transcription for segment 55 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 55...


Segments Processed:  97%|█████████▋| 56/58 [10:48<00:26, 13.05s/segment]

Summary generation for segment 55 completed.

Processing segment 56: 3300.00s to 3330.00s
Transcribing audio for segment 56 from 3300.00s to 3330.00s...
Transcription for segment 56 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 56...


Segments Processed:  98%|█████████▊| 57/58 [11:02<00:13, 13.20s/segment]

Summary generation for segment 56 completed.

Processing segment 57: 3330.00s to 3370.23s
Transcribing audio for segment 57 from 3330.00s to 3370.23s...
Transcription for segment 57 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 57...


Segments Processed: 100%|██████████| 58/58 [11:16<00:00, 11.66s/segment]

Summary generation for segment 57 completed.

Processing completed.





Document saved as /content/outputs/Brand Management 1_ Einleitung.docx
Successfully processed: /content/Brand Management 1_ Einleitung.mp4
Output saved as: /content/outputs/Brand Management 1_ Einleitung.docx

Processing: /content/Basic Finance - Teil 03.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 57182
Video duration: 1906.07 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 300/57182 [00:01<03:22, 280.31frame/s]

Slide change detected at 10.00 seconds (frame 300). SSIM: 0.5995


Frames Processed:   5%|▌         | 3000/57182 [00:05<01:32, 588.50frame/s]

Slide change detected at 100.00 seconds (frame 3000). SSIM: 0.7190


Frames Processed:   9%|▉         | 5400/57182 [00:10<01:34, 550.20frame/s]

Slide change detected at 180.00 seconds (frame 5400). SSIM: 0.8795


Frames Processed:  11%|█         | 6300/57182 [00:11<01:34, 536.87frame/s]

Slide change detected at 210.00 seconds (frame 6300). SSIM: 0.8514


Frames Processed:  25%|██▌       | 14400/57182 [00:28<01:46, 401.78frame/s]

Slide change detected at 480.00 seconds (frame 14400). SSIM: 0.8576


Frames Processed:  27%|██▋       | 15300/57182 [00:30<01:30, 461.55frame/s]

Slide change detected at 510.00 seconds (frame 15300). SSIM: 0.8461


Frames Processed:  32%|███▏      | 18300/57182 [00:36<01:19, 486.91frame/s]

Slide change detected at 610.00 seconds (frame 18300). SSIM: 0.7950


Frames Processed:  39%|███▉      | 22200/57182 [00:47<01:31, 384.41frame/s]

Slide change detected at 740.00 seconds (frame 22200). SSIM: 0.8587


Frames Processed:  46%|████▌     | 26100/57182 [00:58<01:27, 356.07frame/s]

Slide change detected at 870.00 seconds (frame 26100). SSIM: 0.8897


Frames Processed:  57%|█████▋    | 32700/57182 [01:16<01:02, 393.54frame/s]

Slide change detected at 1090.00 seconds (frame 32700). SSIM: 0.8410


Frames Processed:  62%|██████▏   | 35400/57182 [01:24<01:02, 346.03frame/s]

Slide change detected at 1180.00 seconds (frame 35400). SSIM: 0.8965


Frames Processed:  62%|██████▏   | 35700/57182 [01:25<01:00, 357.77frame/s]

Slide change detected at 1190.00 seconds (frame 35700). SSIM: 0.8651


Frames Processed:  68%|██████▊   | 39000/57182 [01:33<00:53, 341.65frame/s]

Slide change detected at 1300.00 seconds (frame 39000). SSIM: 0.8510


Frames Processed:  69%|██████▉   | 39600/57182 [01:35<00:47, 367.12frame/s]

Slide change detected at 1320.00 seconds (frame 39600). SSIM: 0.8504


Frames Processed:  70%|██████▉   | 39900/57182 [01:36<00:41, 413.52frame/s]

Slide change detected at 1330.00 seconds (frame 39900). SSIM: 0.8540


Frames Processed:  71%|███████▏  | 40800/57182 [01:37<00:26, 612.48frame/s]

Slide change detected at 1360.00 seconds (frame 40800). SSIM: 0.8563


Frames Processed:  77%|███████▋  | 44100/57182 [01:42<00:17, 748.49frame/s]

Slide change detected at 1470.00 seconds (frame 44100). SSIM: 0.8488


Frames Processed:  78%|███████▊  | 44700/57182 [01:42<00:17, 704.26frame/s]

Slide change detected at 1490.00 seconds (frame 44700). SSIM: 0.8616


Frames Processed:  86%|████████▌ | 49200/57182 [01:51<00:13, 572.20frame/s]

Slide change detected at 1640.00 seconds (frame 49200). SSIM: 0.7475


Frames Processed:  94%|█████████▍| 53700/57182 [01:58<00:05, 622.44frame/s]

Slide change detected at 1790.00 seconds (frame 53700). SSIM: 0.7684


Frames Processed:  99%|█████████▉| 56882/57182 [02:04<00:00, 455.40frame/s]


Adding final slide change at end of video (1906.07 seconds).
Total slide changes detected: 21

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/21 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 10.00s
Transcribing audio for segment 0 from 0.00s to 10.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   5%|▍         | 1/21 [00:08<02:48,  8.42s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 10.00s to 100.00s
Transcribing audio for segment 1 from 10.00s to 100.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  10%|▉         | 2/21 [00:17<02:50,  8.99s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 100.00s to 180.00s
Transcribing audio for segment 2 from 100.00s to 180.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  14%|█▍        | 3/21 [00:30<03:09, 10.50s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 180.00s to 210.00s
Transcribing audio for segment 3 from 180.00s to 210.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  19%|█▉        | 4/21 [00:41<03:02, 10.73s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 210.00s to 480.00s
Transcribing audio for segment 4 from 210.00s to 480.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  24%|██▍       | 5/21 [01:03<03:58, 14.93s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 480.00s to 510.00s
Transcribing audio for segment 5 from 480.00s to 510.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  29%|██▊       | 6/21 [01:15<03:29, 13.96s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 510.00s to 610.00s
Transcribing audio for segment 6 from 510.00s to 610.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  33%|███▎      | 7/21 [01:30<03:21, 14.40s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 610.00s to 740.00s
Transcribing audio for segment 7 from 610.00s to 740.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  38%|███▊      | 8/21 [01:47<03:15, 15.04s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 740.00s to 870.00s
Transcribing audio for segment 8 from 740.00s to 870.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  43%|████▎     | 9/21 [02:12<03:37, 18.08s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 870.00s to 1090.00s
Transcribing audio for segment 9 from 870.00s to 1090.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  48%|████▊     | 10/21 [02:41<03:55, 21.43s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1090.00s to 1180.00s
Transcribing audio for segment 10 from 1090.00s to 1180.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  52%|█████▏    | 11/21 [02:52<03:05, 18.52s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1180.00s to 1190.00s
Transcribing audio for segment 11 from 1180.00s to 1190.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  57%|█████▋    | 12/21 [03:00<02:15, 15.07s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1190.00s to 1300.00s
Transcribing audio for segment 12 from 1190.00s to 1300.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  62%|██████▏   | 13/21 [03:22<02:18, 17.32s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1300.00s to 1320.00s
Transcribing audio for segment 13 from 1300.00s to 1320.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  67%|██████▋   | 14/21 [03:31<01:42, 14.62s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1320.00s to 1330.00s
Transcribing audio for segment 14 from 1320.00s to 1330.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  71%|███████▏  | 15/21 [03:44<01:25, 14.31s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1330.00s to 1360.00s
Transcribing audio for segment 15 from 1330.00s to 1360.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  76%|███████▌  | 16/21 [03:54<01:05, 13.04s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1360.00s to 1470.00s
Transcribing audio for segment 16 from 1360.00s to 1470.00s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  81%|████████  | 17/21 [04:14<01:00, 15.15s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1470.00s to 1490.00s
Transcribing audio for segment 17 from 1470.00s to 1490.00s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  86%|████████▌ | 18/21 [04:23<00:39, 13.23s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1490.00s to 1640.00s
Transcribing audio for segment 18 from 1490.00s to 1640.00s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  90%|█████████ | 19/21 [04:41<00:29, 14.52s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1640.00s to 1790.00s
Transcribing audio for segment 19 from 1640.00s to 1790.00s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  95%|█████████▌| 20/21 [05:05<00:17, 17.37s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 1790.00s to 1906.07s
Transcribing audio for segment 20 from 1790.00s to 1906.07s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed: 100%|██████████| 21/21 [05:22<00:00, 15.36s/segment]

Summary generation for segment 20 completed.

Processing completed.





Document saved as /content/outputs/Basic Finance - Teil 03.docx
Successfully processed: /content/Basic Finance - Teil 03.mp4
Output saved as: /content/outputs/Basic Finance - Teil 03.docx

Processing: /content/Brand Management 6_ Marken-Monitoring.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 12.287284642215411
Total frames: 10044
Video duration: 817.43 seconds
Frame interval: 122 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   2%|▏         | 244/10044 [00:00<00:34, 287.18frame/s]

Slide change detected at 19.86 seconds (frame 244). SSIM: 0.7341


Frames Processed:   6%|▌         | 610/10044 [00:02<00:32, 288.47frame/s]

Slide change detected at 49.64 seconds (frame 610). SSIM: 0.8502


Frames Processed:  11%|█         | 1098/10044 [00:04<00:34, 256.78frame/s]

Slide change detected at 89.36 seconds (frame 1098). SSIM: 0.8201


Frames Processed:  23%|██▎       | 2318/10044 [00:08<00:28, 266.77frame/s]

Slide change detected at 188.65 seconds (frame 2318). SSIM: 0.6953


Frames Processed:  35%|███▌      | 3538/10044 [00:14<00:30, 215.46frame/s]

Slide change detected at 287.94 seconds (frame 3538). SSIM: 0.7176


Frames Processed:  44%|████▎     | 4392/10044 [00:17<00:19, 288.89frame/s]

Slide change detected at 357.44 seconds (frame 4392). SSIM: 0.8075


Frames Processed:  74%|███████▍  | 7442/10044 [00:28<00:09, 267.00frame/s]

Slide change detected at 605.67 seconds (frame 7442). SSIM: 0.7068


Frames Processed:  79%|███████▉  | 7930/10044 [00:30<00:07, 270.47frame/s]

Slide change detected at 645.38 seconds (frame 7930). SSIM: 0.7333


Frames Processed:  83%|████████▎ | 8296/10044 [00:31<00:06, 285.86frame/s]

Slide change detected at 675.17 seconds (frame 8296). SSIM: 0.7764


Frames Processed:  86%|████████▌ | 8662/10044 [00:33<00:05, 274.81frame/s]

Slide change detected at 704.96 seconds (frame 8662). SSIM: 0.7591


Frames Processed:  91%|█████████ | 9150/10044 [00:35<00:03, 260.98frame/s]

Slide change detected at 744.67 seconds (frame 9150). SSIM: 0.5706


Frames Processed:  99%|█████████▉| 9922/10044 [00:38<00:00, 258.85frame/s]


Adding final slide change at end of video (817.43 seconds).
Total slide changes detected: 12

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/12 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.86s
Transcribing audio for segment 0 from 0.00s to 19.86s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   8%|▊         | 1/12 [00:07<01:17,  7.07s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.86s to 49.64s
Transcribing audio for segment 1 from 19.86s to 49.64s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  17%|█▋        | 2/12 [00:15<01:16,  7.64s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 49.64s to 89.36s
Transcribing audio for segment 2 from 49.64s to 89.36s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  25%|██▌       | 3/12 [00:24<01:17,  8.65s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 89.36s to 188.65s
Transcribing audio for segment 3 from 89.36s to 188.65s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  33%|███▎      | 4/12 [00:37<01:20, 10.12s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 188.65s to 287.94s
Transcribing audio for segment 4 from 188.65s to 287.94s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  42%|████▏     | 5/12 [00:49<01:16, 10.87s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 287.94s to 357.44s
Transcribing audio for segment 5 from 287.94s to 357.44s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  50%|█████     | 6/12 [01:00<01:04, 10.74s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 357.44s to 605.67s
Transcribing audio for segment 6 from 357.44s to 605.67s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  58%|█████▊    | 7/12 [01:21<01:10, 14.13s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 605.67s to 645.38s
Transcribing audio for segment 7 from 605.67s to 645.38s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  67%|██████▋   | 8/12 [01:34<00:55, 13.97s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 645.38s to 675.17s
Transcribing audio for segment 8 from 645.38s to 675.17s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  75%|███████▌  | 9/12 [01:46<00:39, 13.31s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 675.17s to 704.96s
Transcribing audio for segment 9 from 675.17s to 704.96s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  83%|████████▎ | 10/12 [01:57<00:25, 12.63s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 704.96s to 744.67s
Transcribing audio for segment 10 from 704.96s to 744.67s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  92%|█████████▏| 11/12 [02:08<00:11, 11.94s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 744.67s to 817.43s
Transcribing audio for segment 11 from 744.67s to 817.43s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed: 100%|██████████| 12/12 [02:18<00:00, 11.53s/segment]

Summary generation for segment 11 completed.

Processing completed.





Document saved as /content/outputs/Brand Management 6_ Marken-Monitoring.docx
Successfully processed: /content/Brand Management 6_ Marken-Monitoring.mp4
Output saved as: /content/outputs/Brand Management 6_ Marken-Monitoring.docx

Processing: /content/B Video Organizational Design： Teil 1 Begriffe rund um Organisation.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 11.56642436543753
Total frames: 4950
Video duration: 427.96 seconds
Frame interval: 115 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   2%|▏         | 115/4950 [00:00<00:11, 425.48frame/s]

Slide change detected at 9.94 seconds (frame 115). SSIM: 0.4264


Frames Processed:   5%|▍         | 230/4950 [00:00<00:10, 454.54frame/s]

Slide change detected at 19.89 seconds (frame 230). SSIM: 0.4326


Frames Processed:  21%|██        | 1035/4950 [00:02<00:07, 529.61frame/s]

Slide change detected at 89.48 seconds (frame 1035). SSIM: 0.6594


Frames Processed:  42%|████▏     | 2070/4950 [00:03<00:05, 545.84frame/s]

Slide change detected at 178.97 seconds (frame 2070). SSIM: 0.7082


Frames Processed:  51%|█████     | 2530/4950 [00:04<00:04, 550.82frame/s]

Slide change detected at 218.74 seconds (frame 2530). SSIM: 0.8020


Frames Processed:  58%|█████▊    | 2875/4950 [00:05<00:04, 492.10frame/s]

Slide change detected at 248.56 seconds (frame 2875). SSIM: 0.6682


Frames Processed:  74%|███████▍  | 3680/4950 [00:06<00:02, 537.66frame/s]

Slide change detected at 318.16 seconds (frame 3680). SSIM: 0.8824


Frames Processed:  77%|███████▋  | 3795/4950 [00:07<00:02, 539.29frame/s]

Slide change detected at 328.10 seconds (frame 3795). SSIM: 0.6576


Frames Processed:  98%|█████████▊| 4835/4950 [00:09<00:00, 521.95frame/s]

Slide change detected at 427.53 seconds (frame 4945). SSIM: 0.3895
Adding final slide change at end of video (427.96 seconds).
Total slide changes detected: 10

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/10 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 9.94s
Transcribing audio for segment 0 from 0.00s to 9.94s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:  10%|█         | 1/10 [00:06<00:57,  6.37s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 9.94s to 19.89s
Transcribing audio for segment 1 from 9.94s to 19.89s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  20%|██        | 2/10 [00:13<00:53,  6.75s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 19.89s to 89.48s
Transcribing audio for segment 2 from 19.89s to 89.48s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  30%|███       | 3/10 [00:23<00:59,  8.44s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 89.48s to 178.97s
Transcribing audio for segment 3 from 89.48s to 178.97s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  40%|████      | 4/10 [00:33<00:54,  9.09s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 178.97s to 218.74s
Transcribing audio for segment 4 from 178.97s to 218.74s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  50%|█████     | 5/10 [00:44<00:47,  9.60s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 218.74s to 248.56s
Transcribing audio for segment 5 from 218.74s to 248.56s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  60%|██████    | 6/10 [00:54<00:38,  9.59s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 248.56s to 318.16s
Transcribing audio for segment 6 from 248.56s to 318.16s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  70%|███████   | 7/10 [01:07<00:32, 10.92s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 318.16s to 328.10s
Transcribing audio for segment 7 from 318.16s to 328.10s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  80%|████████  | 8/10 [01:17<00:21, 10.66s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 328.10s to 427.53s
Transcribing audio for segment 8 from 328.10s to 427.53s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  90%|█████████ | 9/10 [01:31<00:11, 11.47s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 427.53s to 427.96s
Transcribing audio for segment 9 from 427.53s to 427.96s...
Error during transcription of segment 9: Error code: 400 - {'error': {'message': 'Audio file is too short. Minimum audio length is 0.1 seconds.', 'type': 'invalid_request_error', 'param': 'file', 'code': 'audio_too_short'}}
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed: 100%|██████████| 10/10 [01:36<00:00,  9.64s/segment]

Summary generation for segment 9 completed.

Processing completed.





Document saved as /content/outputs/B Video Organizational Design： Teil 1 Begriffe rund um Organisation.docx
Successfully processed: /content/B Video Organizational Design： Teil 1 Begriffe rund um Organisation.mp4
Output saved as: /content/outputs/B Video Organizational Design： Teil 1 Begriffe rund um Organisation.docx

Processing: /content/Fabasoft Management Academy - Basismodul1 - Dynamische Stabilität von Organisationen TEIL 3.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.999961818339738
Total frames: 66524
Video duration: 2217.47 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   3%|▎         | 1794/66524 [00:03<01:53, 568.59frame/s]

Slide change detected at 59.80 seconds (frame 1794). SSIM: 0.7977


Frames Processed:   8%|▊         | 5382/66524 [00:09<01:54, 534.10frame/s]

Slide change detected at 179.40 seconds (frame 5382). SSIM: 0.7977


Frames Processed:  17%|█▋        | 11362/66524 [00:19<01:27, 632.23frame/s]

Slide change detected at 378.73 seconds (frame 11362). SSIM: 0.7982


Frames Processed:  26%|██▌       | 17043/66524 [00:30<01:35, 516.34frame/s]

Slide change detected at 568.10 seconds (frame 17043). SSIM: 0.8245


Frames Processed:  28%|██▊       | 18837/66524 [00:33<01:24, 565.52frame/s]

Slide change detected at 627.90 seconds (frame 18837). SSIM: 0.7837


Frames Processed:  41%|████      | 27209/66524 [00:47<01:02, 629.81frame/s]

Slide change detected at 906.97 seconds (frame 27209). SSIM: 0.6500


Frames Processed:  50%|█████     | 33488/66524 [00:59<01:08, 485.10frame/s]

Slide change detected at 1116.27 seconds (frame 33488). SSIM: 0.6440


Frames Processed:  54%|█████▍    | 36179/66524 [01:05<01:05, 461.70frame/s]

Slide change detected at 1205.97 seconds (frame 36179). SSIM: 0.7895


Frames Processed:  62%|██████▏   | 41262/66524 [01:13<00:42, 595.13frame/s]

Slide change detected at 1375.40 seconds (frame 41262). SSIM: 0.8176


Frames Processed:  68%|██████▊   | 45149/66524 [01:20<00:32, 666.83frame/s]

Slide change detected at 1504.97 seconds (frame 45149). SSIM: 0.8473


Frames Processed:  78%|███████▊  | 52026/66524 [01:32<00:30, 482.18frame/s]

Slide change detected at 1734.20 seconds (frame 52026). SSIM: 0.8404


Frames Processed:  79%|███████▊  | 52325/66524 [01:33<00:31, 452.98frame/s]

Slide change detected at 1744.17 seconds (frame 52325). SSIM: 0.5906


Frames Processed:  80%|████████  | 53222/66524 [01:34<00:25, 520.28frame/s]

Slide change detected at 1774.07 seconds (frame 53222). SSIM: 0.6125


Frames Processed:  90%|█████████ | 60099/66524 [01:47<00:11, 547.29frame/s]

Slide change detected at 2003.30 seconds (frame 60099). SSIM: 0.6280


Frames Processed:  96%|█████████▌| 63986/66524 [01:53<00:03, 698.80frame/s]

Slide change detected at 2132.87 seconds (frame 63986). SSIM: 0.7095


Frames Processed: 100%|█████████▉| 66225/66524 [01:57<00:00, 563.71frame/s]


Adding final slide change at end of video (2217.47 seconds).
Total slide changes detected: 16

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/16 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 59.80s
Transcribing audio for segment 0 from 0.00s to 59.80s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   6%|▋         | 1/16 [00:08<02:02,  8.15s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 59.80s to 179.40s
Transcribing audio for segment 1 from 59.80s to 179.40s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  12%|█▎        | 2/16 [00:21<02:36, 11.18s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 179.40s to 378.73s
Transcribing audio for segment 2 from 179.40s to 378.73s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  19%|█▉        | 3/16 [00:40<03:09, 14.59s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 378.73s to 568.10s
Transcribing audio for segment 3 from 378.73s to 568.10s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  25%|██▌       | 4/16 [00:57<03:07, 15.65s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 568.10s to 627.90s
Transcribing audio for segment 4 from 568.10s to 627.90s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  31%|███▏      | 5/16 [01:10<02:41, 14.70s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 627.90s to 906.97s
Transcribing audio for segment 5 from 627.90s to 906.97s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  38%|███▊      | 6/16 [01:31<02:48, 16.81s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 906.97s to 1116.27s
Transcribing audio for segment 6 from 906.97s to 1116.27s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  44%|████▍     | 7/16 [01:48<02:33, 17.08s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 1116.27s to 1205.97s
Transcribing audio for segment 7 from 1116.27s to 1205.97s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  50%|█████     | 8/16 [02:02<02:06, 15.81s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 1205.97s to 1375.40s
Transcribing audio for segment 8 from 1205.97s to 1375.40s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  56%|█████▋    | 9/16 [02:20<01:56, 16.70s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 1375.40s to 1504.97s
Transcribing audio for segment 9 from 1375.40s to 1504.97s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  62%|██████▎   | 10/16 [02:38<01:42, 17.07s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1504.97s to 1734.20s
Transcribing audio for segment 10 from 1504.97s to 1734.20s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  69%|██████▉   | 11/16 [02:58<01:29, 17.89s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1734.20s to 1744.17s
Transcribing audio for segment 11 from 1734.20s to 1744.17s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  75%|███████▌  | 12/16 [03:07<01:00, 15.14s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1744.17s to 1774.07s
Transcribing audio for segment 12 from 1744.17s to 1774.07s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  81%|████████▏ | 13/16 [03:15<00:39, 13.02s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1774.07s to 2003.30s
Transcribing audio for segment 13 from 1774.07s to 2003.30s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  88%|████████▊ | 14/16 [03:37<00:31, 15.86s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 2003.30s to 2132.87s
Transcribing audio for segment 14 from 2003.30s to 2132.87s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  94%|█████████▍| 15/16 [03:54<00:16, 16.24s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 2132.87s to 2217.47s
Transcribing audio for segment 15 from 2132.87s to 2217.47s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed: 100%|██████████| 16/16 [04:07<00:00, 15.44s/segment]

Summary generation for segment 15 completed.

Processing completed.





Document saved as /content/outputs/Fabasoft Management Academy - Basismodul1 - Dynamische Stabilität von Organisationen TEIL 3.docx
Successfully processed: /content/Fabasoft Management Academy - Basismodul1 - Dynamische Stabilität von Organisationen TEIL 3.mp4
Output saved as: /content/outputs/Fabasoft Management Academy - Basismodul1 - Dynamische Stabilität von Organisationen TEIL 3.docx

Processing: /content/04 Stakeholder Analyse.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 30.0
Total frames: 49398
Video duration: 1646.60 seconds
Frame interval: 300 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 300/49398 [00:00<02:33, 320.47frame/s]

Slide change detected at 10.00 seconds (frame 300). SSIM: 0.4509


Frames Processed:   1%|          | 600/49398 [00:01<02:31, 322.67frame/s]

Slide change detected at 20.00 seconds (frame 600). SSIM: 0.4955


Frames Processed:   2%|▏         | 900/49398 [00:02<01:58, 408.82frame/s]

Slide change detected at 30.00 seconds (frame 900). SSIM: 0.5723


Frames Processed:   5%|▍         | 2400/49398 [00:05<01:35, 490.68frame/s]

Slide change detected at 80.00 seconds (frame 2400). SSIM: 0.7003


Frames Processed:   8%|▊         | 3900/49398 [00:08<01:50, 411.41frame/s]

Slide change detected at 130.00 seconds (frame 3900). SSIM: 0.8437


Frames Processed:  12%|█▏        | 6000/49398 [00:13<01:34, 460.64frame/s]

Slide change detected at 200.00 seconds (frame 6000). SSIM: 0.7309


Frames Processed:  43%|████▎     | 21300/49398 [00:45<00:53, 521.55frame/s]

Slide change detected at 710.00 seconds (frame 21300). SSIM: 0.6707


Frames Processed:  58%|█████▊    | 28500/49398 [01:00<00:43, 485.53frame/s]

Slide change detected at 950.00 seconds (frame 28500). SSIM: 0.6973


Frames Processed:  66%|██████▌   | 32700/49398 [01:10<00:32, 507.14frame/s]

Slide change detected at 1090.00 seconds (frame 32700). SSIM: 0.6927


Frames Processed:  67%|██████▋   | 33000/49398 [01:10<00:35, 458.04frame/s]

Slide change detected at 1100.00 seconds (frame 33000). SSIM: 0.7324


Frames Processed:  71%|███████   | 35100/49398 [01:15<00:33, 427.92frame/s]

Slide change detected at 1170.00 seconds (frame 35100). SSIM: 0.8506


Frames Processed:  75%|███████▍  | 36900/49398 [01:19<00:25, 495.64frame/s]

Slide change detected at 1230.00 seconds (frame 36900). SSIM: 0.6624


Frames Processed:  81%|████████  | 39900/49398 [01:25<00:18, 517.87frame/s]

Slide change detected at 1330.00 seconds (frame 39900). SSIM: 0.6008


Frames Processed:  93%|█████████▎| 45900/49398 [01:37<00:06, 532.96frame/s]

Slide change detected at 1530.00 seconds (frame 45900). SSIM: 0.7332


Frames Processed:  99%|█████████▉| 49098/49398 [01:45<00:00, 465.60frame/s]

Slide change detected at 1640.00 seconds (frame 49200). SSIM: 0.7335
Adding final slide change at end of video (1646.60 seconds).
Total slide changes detected: 16

Step 2: Processing slides and audio...



Segments Processed:   0%|          | 0/16 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 10.00s
Transcribing audio for segment 0 from 0.00s to 10.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   6%|▋         | 1/16 [00:08<02:10,  8.69s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 10.00s to 20.00s
Transcribing audio for segment 1 from 10.00s to 20.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:  12%|█▎        | 2/16 [00:14<01:40,  7.18s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 20.00s to 30.00s
Transcribing audio for segment 2 from 20.00s to 30.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:  19%|█▉        | 3/16 [00:22<01:35,  7.32s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 30.00s to 80.00s
Transcribing audio for segment 3 from 30.00s to 80.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  25%|██▌       | 4/16 [00:32<01:40,  8.39s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 80.00s to 130.00s
Transcribing audio for segment 4 from 80.00s to 130.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  31%|███▏      | 5/16 [00:42<01:37,  8.90s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 130.00s to 200.00s
Transcribing audio for segment 5 from 130.00s to 200.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  38%|███▊      | 6/16 [00:53<01:35,  9.58s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 200.00s to 710.00s
Transcribing audio for segment 6 from 200.00s to 710.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  44%|████▍     | 7/16 [01:27<02:39, 17.72s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 710.00s to 950.00s
Transcribing audio for segment 7 from 710.00s to 950.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  50%|█████     | 8/16 [01:48<02:30, 18.76s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 950.00s to 1090.00s
Transcribing audio for segment 8 from 950.00s to 1090.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  56%|█████▋    | 9/16 [02:01<01:59, 17.05s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 1090.00s to 1100.00s
Transcribing audio for segment 9 from 1090.00s to 1100.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  62%|██████▎   | 10/16 [02:10<01:26, 14.43s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 1100.00s to 1170.00s
Transcribing audio for segment 10 from 1100.00s to 1170.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  69%|██████▉   | 11/16 [02:24<01:11, 14.24s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 1170.00s to 1230.00s
Transcribing audio for segment 11 from 1170.00s to 1230.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  75%|███████▌  | 12/16 [02:34<00:52, 13.13s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 1230.00s to 1330.00s
Transcribing audio for segment 12 from 1230.00s to 1330.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  81%|████████▏ | 13/16 [02:48<00:39, 13.29s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 1330.00s to 1530.00s
Transcribing audio for segment 13 from 1330.00s to 1530.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  88%|████████▊ | 14/16 [03:09<00:31, 15.70s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1530.00s to 1640.00s
Transcribing audio for segment 14 from 1530.00s to 1640.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  94%|█████████▍| 15/16 [03:24<00:15, 15.31s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1640.00s to 1646.60s
Transcribing audio for segment 15 from 1640.00s to 1646.60s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed: 100%|██████████| 16/16 [03:36<00:00, 13.51s/segment]

Summary generation for segment 15 completed.

Processing completed.





Document saved as /content/outputs/04 Stakeholder Analyse.docx
Successfully processed: /content/04 Stakeholder Analyse.mp4
Output saved as: /content/outputs/04 Stakeholder Analyse.docx

Processing: /content/Fabasoft Mgmt Academy _ Basismodul3 _ Sustainable Marketing.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 16.00000477713092
Total frames: 80383
Video duration: 5023.94 seconds
Frame interval: 160 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   0%|          | 160/80383 [00:00<05:45, 232.53frame/s]

Slide change detected at 10.00 seconds (frame 160). SSIM: 0.4963


Frames Processed:   0%|          | 320/80383 [00:01<04:15, 313.03frame/s]

Slide change detected at 20.00 seconds (frame 320). SSIM: 0.8311


Frames Processed:   1%|          | 640/80383 [00:02<04:02, 328.24frame/s]

Slide change detected at 40.00 seconds (frame 640). SSIM: 0.6159


Frames Processed:   1%|          | 960/80383 [00:02<03:52, 341.80frame/s]

Slide change detected at 60.00 seconds (frame 960). SSIM: 0.8703


Frames Processed:   2%|▏         | 1440/80383 [00:04<03:48, 345.28frame/s]

Slide change detected at 90.00 seconds (frame 1440). SSIM: 0.8178


Frames Processed:   3%|▎         | 2400/80383 [00:07<04:22, 296.66frame/s]

Slide change detected at 150.00 seconds (frame 2400). SSIM: 0.8332


Frames Processed:   4%|▎         | 2880/80383 [00:09<04:48, 268.95frame/s]

Slide change detected at 180.00 seconds (frame 2880). SSIM: 0.7914


Frames Processed:   4%|▍         | 3200/80383 [00:10<04:11, 306.35frame/s]

Slide change detected at 200.00 seconds (frame 3200). SSIM: 0.7100


Frames Processed:   5%|▍         | 3680/80383 [00:11<03:44, 341.86frame/s]

Slide change detected at 230.00 seconds (frame 3680). SSIM: 0.7868


Frames Processed:   9%|▉         | 7040/80383 [00:21<03:54, 312.87frame/s]

Slide change detected at 440.00 seconds (frame 7040). SSIM: 0.7802


Frames Processed:  12%|█▏        | 9280/80383 [00:27<03:23, 348.70frame/s]

Slide change detected at 580.00 seconds (frame 9280). SSIM: 0.7966


Frames Processed:  16%|█▌        | 12640/80383 [00:38<03:28, 324.94frame/s]

Slide change detected at 790.00 seconds (frame 12640). SSIM: 0.8794


Frames Processed:  17%|█▋        | 13280/80383 [00:39<03:07, 357.16frame/s]

Slide change detected at 830.00 seconds (frame 13280). SSIM: 0.7539


Frames Processed:  20%|██        | 16320/80383 [00:49<03:56, 271.17frame/s]

Slide change detected at 1020.00 seconds (frame 16320). SSIM: 0.7541


Frames Processed:  23%|██▎       | 18880/80383 [00:56<02:56, 347.60frame/s]

Slide change detected at 1180.00 seconds (frame 18880). SSIM: 0.7572


Frames Processed:  27%|██▋       | 21600/80383 [01:05<03:09, 310.93frame/s]

Slide change detected at 1350.00 seconds (frame 21600). SSIM: 0.7510


Frames Processed:  28%|██▊       | 22720/80383 [01:08<02:46, 346.66frame/s]

Slide change detected at 1420.00 seconds (frame 22720). SSIM: 0.7795


Frames Processed:  32%|███▏      | 25920/80383 [01:17<02:40, 338.79frame/s]

Slide change detected at 1620.00 seconds (frame 25920). SSIM: 0.7745


Frames Processed:  33%|███▎      | 26880/80383 [01:20<02:33, 349.65frame/s]

Slide change detected at 1680.00 seconds (frame 26880). SSIM: 0.8238


Frames Processed:  41%|████▏     | 33280/80383 [01:39<02:15, 346.81frame/s]

Slide change detected at 2080.00 seconds (frame 33280). SSIM: 0.8075


Frames Processed:  44%|████▍     | 35200/80383 [01:45<02:08, 351.73frame/s]

Slide change detected at 2200.00 seconds (frame 35200). SSIM: 0.8450


Frames Processed:  47%|████▋     | 37920/80383 [01:52<02:05, 339.19frame/s]

Slide change detected at 2370.00 seconds (frame 37920). SSIM: 0.7306


Frames Processed:  57%|█████▋    | 45920/80383 [02:16<01:30, 382.39frame/s]

Slide change detected at 2870.00 seconds (frame 45920). SSIM: 0.7749


Frames Processed:  62%|██████▏   | 49600/80383 [02:27<01:25, 359.07frame/s]

Slide change detected at 3100.00 seconds (frame 49600). SSIM: 0.8029


Frames Processed:  65%|██████▌   | 52320/80383 [02:35<01:42, 273.67frame/s]

Slide change detected at 3270.00 seconds (frame 52320). SSIM: 0.7428


Frames Processed:  65%|██████▌   | 52640/80383 [02:36<01:29, 308.57frame/s]

Slide change detected at 3290.00 seconds (frame 52640). SSIM: 0.6877


Frames Processed:  69%|██████▊   | 55200/80383 [02:44<01:12, 349.28frame/s]

Slide change detected at 3450.00 seconds (frame 55200). SSIM: 0.7744


Frames Processed:  71%|███████   | 56960/80383 [02:50<01:11, 327.32frame/s]

Slide change detected at 3560.00 seconds (frame 56960). SSIM: 0.7636


Frames Processed:  71%|███████   | 57120/80383 [02:50<01:14, 312.60frame/s]

Slide change detected at 3570.00 seconds (frame 57120). SSIM: 0.7235


Frames Processed:  73%|███████▎  | 58400/80383 [02:54<01:01, 359.03frame/s]

Slide change detected at 3650.00 seconds (frame 58400). SSIM: 0.8287


Frames Processed:  75%|███████▌  | 60480/80383 [03:00<01:10, 282.23frame/s]

Slide change detected at 3780.00 seconds (frame 60480). SSIM: 0.8240


Frames Processed:  77%|███████▋  | 62080/80383 [03:05<00:52, 350.68frame/s]

Slide change detected at 3880.00 seconds (frame 62080). SSIM: 0.8052


Frames Processed:  81%|████████  | 65120/80383 [03:15<00:54, 279.49frame/s]

Slide change detected at 4070.00 seconds (frame 65120). SSIM: 0.8342


Frames Processed:  85%|████████▍ | 68160/80383 [03:24<00:45, 265.89frame/s]

Slide change detected at 4260.00 seconds (frame 68160). SSIM: 0.7650


Frames Processed:  89%|████████▉ | 71520/80383 [03:35<00:25, 346.88frame/s]

Slide change detected at 4470.00 seconds (frame 71520). SSIM: 0.7491


Frames Processed:  94%|█████████▍| 75520/80383 [03:47<00:13, 362.08frame/s]

Slide change detected at 4720.00 seconds (frame 75520). SSIM: 0.8039


Frames Processed:  97%|█████████▋| 78240/80383 [03:55<00:07, 271.33frame/s]

Slide change detected at 4890.00 seconds (frame 78240). SSIM: 0.8216


Frames Processed:  99%|█████████▉| 79680/80383 [04:00<00:02, 349.69frame/s]

Slide change detected at 4980.00 seconds (frame 79680). SSIM: 0.7184


Frames Processed:  99%|█████████▉| 79840/80383 [04:00<00:01, 354.64frame/s]

Slide change detected at 4990.00 seconds (frame 79840). SSIM: 0.4050


Frames Processed: 100%|█████████▉| 80223/80383 [04:01<00:00, 331.78frame/s]


Adding final slide change at end of video (5023.94 seconds).
Total slide changes detected: 40

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/40 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 10.00s
Transcribing audio for segment 0 from 0.00s to 10.00s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   2%|▎         | 1/40 [00:05<03:35,  5.53s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 10.00s to 20.00s
Transcribing audio for segment 1 from 10.00s to 20.00s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   5%|▌         | 2/40 [00:18<06:26, 10.18s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 20.00s to 40.00s
Transcribing audio for segment 2 from 20.00s to 40.00s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   8%|▊         | 3/40 [00:24<04:55,  7.98s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 40.00s to 60.00s
Transcribing audio for segment 3 from 40.00s to 60.00s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:  10%|█         | 4/40 [00:31<04:30,  7.51s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 60.00s to 90.00s
Transcribing audio for segment 4 from 60.00s to 90.00s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:  12%|█▎        | 5/40 [00:38<04:25,  7.59s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 90.00s to 150.00s
Transcribing audio for segment 5 from 90.00s to 150.00s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  15%|█▌        | 6/40 [00:47<04:33,  8.06s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 150.00s to 180.00s
Transcribing audio for segment 6 from 150.00s to 180.00s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  18%|█▊        | 7/40 [00:52<03:51,  7.03s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 180.00s to 200.00s
Transcribing audio for segment 7 from 180.00s to 200.00s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  20%|██        | 8/40 [01:00<03:53,  7.30s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 200.00s to 230.00s
Transcribing audio for segment 8 from 200.00s to 230.00s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  22%|██▎       | 9/40 [01:09<03:59,  7.71s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 230.00s to 440.00s
Transcribing audio for segment 9 from 230.00s to 440.00s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  25%|██▌       | 10/40 [01:24<05:02, 10.09s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 440.00s to 580.00s
Transcribing audio for segment 10 from 440.00s to 580.00s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  28%|██▊       | 11/40 [01:40<05:42, 11.80s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 580.00s to 790.00s
Transcribing audio for segment 11 from 580.00s to 790.00s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  30%|███       | 12/40 [01:55<05:57, 12.78s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 790.00s to 830.00s
Transcribing audio for segment 12 from 790.00s to 830.00s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  32%|███▎      | 13/40 [02:04<05:15, 11.68s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 830.00s to 1020.00s
Transcribing audio for segment 13 from 830.00s to 1020.00s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  35%|███▌      | 14/40 [02:19<05:30, 12.70s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 1020.00s to 1180.00s
Transcribing audio for segment 14 from 1020.00s to 1180.00s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  38%|███▊      | 15/40 [02:34<05:30, 13.24s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 1180.00s to 1350.00s
Transcribing audio for segment 15 from 1180.00s to 1350.00s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  40%|████      | 16/40 [02:47<05:20, 13.35s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 1350.00s to 1420.00s
Transcribing audio for segment 16 from 1350.00s to 1420.00s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  42%|████▎     | 17/40 [02:56<04:37, 12.08s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1420.00s to 1620.00s
Transcribing audio for segment 17 from 1420.00s to 1620.00s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  45%|████▌     | 18/40 [03:12<04:51, 13.25s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1620.00s to 1680.00s
Transcribing audio for segment 18 from 1620.00s to 1680.00s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  48%|████▊     | 19/40 [03:21<04:11, 11.97s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1680.00s to 2080.00s
Transcribing audio for segment 19 from 1680.00s to 2080.00s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  50%|█████     | 20/40 [03:47<05:24, 16.23s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 2080.00s to 2200.00s
Transcribing audio for segment 20 from 2080.00s to 2200.00s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  52%|█████▎    | 21/40 [04:00<04:46, 15.08s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 2200.00s to 2370.00s
Transcribing audio for segment 21 from 2200.00s to 2370.00s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  55%|█████▌    | 22/40 [04:17<04:43, 15.73s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 2370.00s to 2870.00s
Transcribing audio for segment 22 from 2370.00s to 2870.00s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  57%|█████▊    | 23/40 [04:45<05:29, 19.37s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 2870.00s to 3100.00s
Transcribing audio for segment 23 from 2870.00s to 3100.00s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  60%|██████    | 24/40 [05:04<05:08, 19.31s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 3100.00s to 3270.00s
Transcribing audio for segment 24 from 3100.00s to 3270.00s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  62%|██████▎   | 25/40 [05:18<04:24, 17.60s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 3270.00s to 3290.00s
Transcribing audio for segment 25 from 3270.00s to 3290.00s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  65%|██████▌   | 26/40 [05:24<03:19, 14.22s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 3290.00s to 3450.00s
Transcribing audio for segment 26 from 3290.00s to 3450.00s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  68%|██████▊   | 27/40 [05:39<03:09, 14.56s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 3450.00s to 3560.00s
Transcribing audio for segment 27 from 3450.00s to 3560.00s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  70%|███████   | 28/40 [05:51<02:45, 13.83s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 3560.00s to 3570.00s
Transcribing audio for segment 28 from 3560.00s to 3570.00s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  72%|███████▎  | 29/40 [05:59<02:10, 11.82s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 3570.00s to 3650.00s
Transcribing audio for segment 29 from 3570.00s to 3650.00s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  75%|███████▌  | 30/40 [06:09<01:53, 11.38s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 3650.00s to 3780.00s
Transcribing audio for segment 30 from 3650.00s to 3780.00s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  78%|███████▊  | 31/40 [06:24<01:53, 12.59s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 3780.00s to 3880.00s
Transcribing audio for segment 31 from 3780.00s to 3880.00s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  80%|████████  | 32/40 [06:39<01:46, 13.31s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 3880.00s to 4070.00s
Transcribing audio for segment 32 from 3880.00s to 4070.00s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  82%|████████▎ | 33/40 [06:56<01:39, 14.28s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 4070.00s to 4260.00s
Transcribing audio for segment 33 from 4070.00s to 4260.00s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  85%|████████▌ | 34/40 [07:15<01:34, 15.74s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 4260.00s to 4470.00s
Transcribing audio for segment 34 from 4260.00s to 4470.00s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  88%|████████▊ | 35/40 [07:34<01:23, 16.71s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 4470.00s to 4720.00s
Transcribing audio for segment 35 from 4470.00s to 4720.00s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed:  90%|█████████ | 36/40 [07:54<01:10, 17.58s/segment]

Summary generation for segment 35 completed.

Processing segment 36: 4720.00s to 4890.00s
Transcribing audio for segment 36 from 4720.00s to 4890.00s...
Transcription for segment 36 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 36...


Segments Processed:  92%|█████████▎| 37/40 [08:13<00:54, 18.19s/segment]

Summary generation for segment 36 completed.

Processing segment 37: 4890.00s to 4980.00s
Transcribing audio for segment 37 from 4890.00s to 4980.00s...
Transcription for segment 37 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 37...


Segments Processed:  95%|█████████▌| 38/40 [09:36<01:15, 37.52s/segment]

Summary generation for segment 37 completed.

Processing segment 38: 4980.00s to 4990.00s
Transcribing audio for segment 38 from 4980.00s to 4990.00s...
Transcription for segment 38 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 38...


Segments Processed:  98%|█████████▊| 39/40 [09:46<00:29, 29.40s/segment]

Summary generation for segment 38 completed.

Processing segment 39: 4990.00s to 5023.94s
Transcribing audio for segment 39 from 4990.00s to 5023.94s...
Transcription for segment 39 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 39...


Segments Processed: 100%|██████████| 40/40 [09:55<00:00, 14.88s/segment]

Summary generation for segment 39 completed.

Processing completed.





Document saved as /content/outputs/Fabasoft Mgmt Academy _ Basismodul3 _ Sustainable Marketing.docx
Successfully processed: /content/Fabasoft Mgmt Academy _ Basismodul3 _ Sustainable Marketing.mp4
Output saved as: /content/outputs/Fabasoft Mgmt Academy _ Basismodul3 _ Sustainable Marketing.docx

Processing: /content/Video 3_Innovation Management and Tools.mp4
Starting main processing...
Step 1: Detecting slide changes...
Starting slide change detection...
Video FPS: 29.999936920444778
Total frames: 82911
Video duration: 2763.71 seconds
Frame interval: 299 frames (every 10 seconds)
Analyzing frames for slide changes...


Frames Processed:   1%|          | 598/82911 [00:01<03:03, 448.31frame/s]

Slide change detected at 19.93 seconds (frame 598). SSIM: 0.4042


Frames Processed:   1%|          | 897/82911 [00:01<02:52, 476.62frame/s]

Slide change detected at 29.90 seconds (frame 897). SSIM: 0.6217


Frames Processed:   3%|▎         | 2392/82911 [00:04<02:25, 551.71frame/s]

Slide change detected at 79.73 seconds (frame 2392). SSIM: 0.8624


Frames Processed:   4%|▍         | 3289/82911 [00:06<02:38, 503.43frame/s]

Slide change detected at 109.63 seconds (frame 3289). SSIM: 0.8630


Frames Processed:   4%|▍         | 3588/82911 [00:07<03:12, 412.12frame/s]

Slide change detected at 119.60 seconds (frame 3588). SSIM: 0.7789


Frames Processed:   6%|▋         | 5382/82911 [00:10<02:14, 574.37frame/s]

Slide change detected at 179.40 seconds (frame 5382). SSIM: 0.8135


Frames Processed:  11%|█         | 9269/82911 [00:16<01:44, 701.84frame/s]

Slide change detected at 308.97 seconds (frame 9269). SSIM: 0.7914


Frames Processed:  14%|█▎        | 11362/82911 [00:20<02:28, 483.07frame/s]

Slide change detected at 378.73 seconds (frame 11362). SSIM: 0.7709


Frames Processed:  19%|█▉        | 15847/82911 [00:28<02:47, 400.81frame/s]

Slide change detected at 528.23 seconds (frame 15847). SSIM: 0.7029


Frames Processed:  22%|██▏       | 18538/82911 [00:35<02:44, 390.77frame/s]

Slide change detected at 617.93 seconds (frame 18538). SSIM: 0.6889


Frames Processed:  23%|██▎       | 18837/82911 [00:35<02:36, 410.40frame/s]

Slide change detected at 627.90 seconds (frame 18837). SSIM: 0.7379


Frames Processed:  25%|██▍       | 20631/82911 [00:38<01:56, 532.38frame/s]

Slide change detected at 687.70 seconds (frame 20631). SSIM: 0.7325


Frames Processed:  26%|██▌       | 21528/82911 [00:40<01:45, 582.81frame/s]

Slide change detected at 717.60 seconds (frame 21528). SSIM: 0.7565


Frames Processed:  27%|██▋       | 22724/82911 [00:42<01:39, 603.87frame/s]

Slide change detected at 757.47 seconds (frame 22724). SSIM: 0.7167


Frames Processed:  28%|██▊       | 23322/82911 [00:43<01:38, 603.55frame/s]

Slide change detected at 777.40 seconds (frame 23322). SSIM: 0.6538


Frames Processed:  31%|███       | 25415/82911 [00:47<02:12, 434.74frame/s]

Slide change detected at 847.17 seconds (frame 25415). SSIM: 0.8351


Frames Processed:  37%|███▋      | 30498/82911 [00:54<01:06, 786.21frame/s]

Slide change detected at 1016.60 seconds (frame 30498). SSIM: 0.8275


Frames Processed:  38%|███▊      | 31096/82911 [00:56<01:29, 577.52frame/s]

Slide change detected at 1036.54 seconds (frame 31096). SSIM: 0.8003


Frames Processed:  41%|████      | 33787/82911 [01:02<01:49, 448.90frame/s]

Slide change detected at 1126.24 seconds (frame 33787). SSIM: 0.6859


Frames Processed:  44%|████▎     | 36179/82911 [01:06<01:28, 530.22frame/s]

Slide change detected at 1205.97 seconds (frame 36179). SSIM: 0.4792


Frames Processed:  46%|████▌     | 38272/82911 [01:11<01:33, 475.35frame/s]

Slide change detected at 1275.74 seconds (frame 38272). SSIM: 0.5637


Frames Processed:  47%|████▋     | 38571/82911 [01:11<01:38, 448.38frame/s]

Slide change detected at 1285.70 seconds (frame 38571). SSIM: 0.2578


Frames Processed:  47%|████▋     | 38870/82911 [01:13<02:35, 283.47frame/s]

Slide change detected at 1295.67 seconds (frame 38870). SSIM: 0.4607


Frames Processed:  47%|████▋     | 39169/82911 [01:14<02:40, 271.83frame/s]

Slide change detected at 1305.64 seconds (frame 39169). SSIM: 0.4971


Frames Processed:  48%|████▊     | 39468/82911 [01:16<02:50, 255.31frame/s]

Slide change detected at 1315.60 seconds (frame 39468). SSIM: 0.4915


Frames Processed:  48%|████▊     | 39767/82911 [01:16<02:29, 289.44frame/s]

Slide change detected at 1325.57 seconds (frame 39767). SSIM: 0.4285


Frames Processed:  48%|████▊     | 40066/82911 [01:18<02:37, 272.54frame/s]

Slide change detected at 1335.54 seconds (frame 40066). SSIM: 0.4212


Frames Processed:  49%|████▊     | 40365/82911 [01:18<02:19, 305.36frame/s]

Slide change detected at 1345.50 seconds (frame 40365). SSIM: 0.3671


Frames Processed:  49%|████▉     | 40664/82911 [01:20<02:26, 287.90frame/s]

Slide change detected at 1355.47 seconds (frame 40664). SSIM: 0.6138


Frames Processed:  49%|████▉     | 40963/82911 [01:20<02:12, 317.68frame/s]

Slide change detected at 1365.44 seconds (frame 40963). SSIM: 0.6024


Frames Processed:  50%|████▉     | 41262/82911 [01:21<01:59, 349.37frame/s]

Slide change detected at 1375.40 seconds (frame 41262). SSIM: 0.1225


Frames Processed:  51%|█████     | 42159/82911 [01:22<01:17, 523.59frame/s]

Slide change detected at 1405.30 seconds (frame 42159). SSIM: 0.1054


Frames Processed:  52%|█████▏    | 42757/82911 [01:23<01:08, 584.89frame/s]

Slide change detected at 1425.24 seconds (frame 42757). SSIM: 0.7782


Frames Processed:  53%|█████▎    | 44252/82911 [01:26<01:08, 561.84frame/s]

Slide change detected at 1475.07 seconds (frame 44252). SSIM: 0.8556


Frames Processed:  59%|█████▉    | 49036/82911 [01:33<00:54, 620.73frame/s]

Slide change detected at 1634.54 seconds (frame 49036). SSIM: 0.8860


Frames Processed:  60%|█████▉    | 49335/82911 [01:34<01:00, 556.18frame/s]

Slide change detected at 1644.50 seconds (frame 49335). SSIM: 0.7495


Frames Processed:  61%|██████    | 50531/82911 [01:36<00:59, 543.57frame/s]

Slide change detected at 1684.37 seconds (frame 50531). SSIM: 0.5693


Frames Processed:  62%|██████▏   | 51129/82911 [01:37<01:01, 515.81frame/s]

Slide change detected at 1704.30 seconds (frame 51129). SSIM: 0.5603


Frames Processed:  63%|██████▎   | 52325/82911 [01:40<01:18, 392.09frame/s]

Slide change detected at 1744.17 seconds (frame 52325). SSIM: 0.5241


Frames Processed:  67%|██████▋   | 55913/82911 [01:47<00:52, 514.07frame/s]

Slide change detected at 1863.77 seconds (frame 55913). SSIM: 0.8367


Frames Processed:  68%|██████▊   | 56212/82911 [01:48<00:48, 555.08frame/s]

Slide change detected at 1873.74 seconds (frame 56212). SSIM: 0.6955


Frames Processed:  69%|██████▉   | 57109/82911 [01:49<00:45, 566.09frame/s]

Slide change detected at 1903.64 seconds (frame 57109). SSIM: 0.7634


Frames Processed:  70%|███████   | 58305/82911 [01:51<00:42, 573.21frame/s]

Slide change detected at 1943.50 seconds (frame 58305). SSIM: 0.6768


Frames Processed:  71%|███████   | 58903/82911 [01:53<00:48, 496.55frame/s]

Slide change detected at 1963.44 seconds (frame 58903). SSIM: 0.6558


Frames Processed:  74%|███████▍  | 61295/82911 [01:58<00:41, 524.01frame/s]

Slide change detected at 2043.17 seconds (frame 61295). SSIM: 0.7685


Frames Processed:  76%|███████▋  | 63388/82911 [02:01<00:28, 685.55frame/s]

Slide change detected at 2112.94 seconds (frame 63388). SSIM: 0.7957


Frames Processed:  77%|███████▋  | 63986/82911 [02:02<00:29, 652.25frame/s]

Slide change detected at 2132.87 seconds (frame 63986). SSIM: 0.5356


Frames Processed:  79%|███████▉  | 65780/82911 [02:04<00:25, 674.16frame/s]

Slide change detected at 2192.67 seconds (frame 65780). SSIM: 0.8233


Frames Processed:  80%|███████▉  | 66079/82911 [02:05<00:26, 638.57frame/s]

Slide change detected at 2202.64 seconds (frame 66079). SSIM: 0.7191


Frames Processed:  84%|████████▍ | 69966/82911 [02:13<00:27, 469.44frame/s]

Slide change detected at 2332.20 seconds (frame 69966). SSIM: 0.5576


Frames Processed:  88%|████████▊ | 72657/82911 [02:19<00:21, 480.16frame/s]

Slide change detected at 2421.91 seconds (frame 72657). SSIM: 0.5705


Frames Processed:  89%|████████▉ | 74152/82911 [02:23<00:20, 432.64frame/s]

Slide change detected at 2471.74 seconds (frame 74152). SSIM: 0.7666


Frames Processed:  92%|█████████▏| 76245/82911 [02:26<00:11, 605.21frame/s]

Slide change detected at 2541.51 seconds (frame 76245). SSIM: 0.5837


Frames Processed:  93%|█████████▎| 77142/82911 [02:28<00:10, 556.82frame/s]

Slide change detected at 2571.41 seconds (frame 77142). SSIM: 0.5782


Frames Processed:  96%|█████████▌| 79235/82911 [02:31<00:05, 656.56frame/s]

Slide change detected at 2641.17 seconds (frame 79235). SSIM: 0.7181


Frames Processed:  98%|█████████▊| 81328/82911 [02:35<00:03, 493.45frame/s]

Slide change detected at 2710.94 seconds (frame 81328). SSIM: 0.8487


Frames Processed: 100%|█████████▉| 82612/82911 [02:37<00:00, 523.17frame/s]


Adding final slide change at end of video (2763.71 seconds).
Total slide changes detected: 57

Step 2: Processing slides and audio...


Segments Processed:   0%|          | 0/57 [00:00<?, ?segment/s]


Processing segment 0: 0.00s to 19.93s
Transcribing audio for segment 0 from 0.00s to 19.93s...
Transcription for segment 0 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 0...


Segments Processed:   2%|▏         | 1/57 [00:07<06:46,  7.25s/segment]

Summary generation for segment 0 completed.

Processing segment 1: 19.93s to 29.90s
Transcribing audio for segment 1 from 19.93s to 29.90s...
Transcription for segment 1 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 1...


Segments Processed:   4%|▎         | 2/57 [00:14<06:44,  7.35s/segment]

Summary generation for segment 1 completed.

Processing segment 2: 29.90s to 79.73s
Transcribing audio for segment 2 from 29.90s to 79.73s...
Transcription for segment 2 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 2...


Segments Processed:   5%|▌         | 3/57 [00:23<07:10,  7.97s/segment]

Summary generation for segment 2 completed.

Processing segment 3: 79.73s to 109.63s
Transcribing audio for segment 3 from 79.73s to 109.63s...
Transcription for segment 3 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 3...


Segments Processed:   7%|▋         | 4/57 [00:31<07:15,  8.22s/segment]

Summary generation for segment 3 completed.

Processing segment 4: 109.63s to 119.60s
Transcribing audio for segment 4 from 109.63s to 119.60s...
Transcription for segment 4 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 4...


Segments Processed:   9%|▉         | 5/57 [00:40<07:16,  8.39s/segment]

Summary generation for segment 4 completed.

Processing segment 5: 119.60s to 179.40s
Transcribing audio for segment 5 from 119.60s to 179.40s...
Transcription for segment 5 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 5...


Segments Processed:  11%|█         | 6/57 [00:50<07:32,  8.87s/segment]

Summary generation for segment 5 completed.

Processing segment 6: 179.40s to 308.97s
Transcribing audio for segment 6 from 179.40s to 308.97s...
Transcription for segment 6 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 6...


Segments Processed:  12%|█▏        | 7/57 [01:19<12:47, 15.36s/segment]

Summary generation for segment 6 completed.

Processing segment 7: 308.97s to 378.73s
Transcribing audio for segment 7 from 308.97s to 378.73s...
Transcription for segment 7 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 7...


Segments Processed:  14%|█▍        | 8/57 [01:31<11:39, 14.27s/segment]

Summary generation for segment 7 completed.

Processing segment 8: 378.73s to 528.23s
Transcribing audio for segment 8 from 378.73s to 528.23s...
Transcription for segment 8 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 8...


Segments Processed:  16%|█▌        | 9/57 [01:47<11:57, 14.95s/segment]

Summary generation for segment 8 completed.

Processing segment 9: 528.23s to 617.93s
Transcribing audio for segment 9 from 528.23s to 617.93s...
Transcription for segment 9 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 9...


Segments Processed:  18%|█▊        | 10/57 [02:01<11:33, 14.77s/segment]

Summary generation for segment 9 completed.

Processing segment 10: 617.93s to 627.90s
Transcribing audio for segment 10 from 617.93s to 627.90s...
Transcription for segment 10 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 10...


Segments Processed:  19%|█▉        | 11/57 [02:08<09:25, 12.29s/segment]

Summary generation for segment 10 completed.

Processing segment 11: 627.90s to 687.70s
Transcribing audio for segment 11 from 627.90s to 687.70s...
Transcription for segment 11 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 11...


Segments Processed:  21%|██        | 12/57 [02:20<09:08, 12.18s/segment]

Summary generation for segment 11 completed.

Processing segment 12: 687.70s to 717.60s
Transcribing audio for segment 12 from 687.70s to 717.60s...
Transcription for segment 12 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 12...


Segments Processed:  23%|██▎       | 13/57 [02:29<08:18, 11.34s/segment]

Summary generation for segment 12 completed.

Processing segment 13: 717.60s to 757.47s
Transcribing audio for segment 13 from 717.60s to 757.47s...
Transcription for segment 13 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 13...


Segments Processed:  25%|██▍       | 14/57 [02:38<07:26, 10.39s/segment]

Summary generation for segment 13 completed.

Processing segment 14: 757.47s to 777.40s
Transcribing audio for segment 14 from 757.47s to 777.40s...
Transcription for segment 14 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 14...


Segments Processed:  26%|██▋       | 15/57 [02:51<07:58, 11.39s/segment]

Summary generation for segment 14 completed.

Processing segment 15: 777.40s to 847.17s
Transcribing audio for segment 15 from 777.40s to 847.17s...
Transcription for segment 15 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 15...


Segments Processed:  28%|██▊       | 16/57 [03:06<08:23, 12.28s/segment]

Summary generation for segment 15 completed.

Processing segment 16: 847.17s to 1016.60s
Transcribing audio for segment 16 from 847.17s to 1016.60s...
Transcription for segment 16 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 16...


Segments Processed:  30%|██▉       | 17/57 [03:23<09:17, 13.93s/segment]

Summary generation for segment 16 completed.

Processing segment 17: 1016.60s to 1036.54s
Transcribing audio for segment 17 from 1016.60s to 1036.54s...
Transcription for segment 17 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 17...


Segments Processed:  32%|███▏      | 18/57 [03:31<07:52, 12.11s/segment]

Summary generation for segment 17 completed.

Processing segment 18: 1036.54s to 1126.24s
Transcribing audio for segment 18 from 1036.54s to 1126.24s...
Transcription for segment 18 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 18...


Segments Processed:  33%|███▎      | 19/57 [03:48<08:29, 13.42s/segment]

Summary generation for segment 18 completed.

Processing segment 19: 1126.24s to 1205.97s
Transcribing audio for segment 19 from 1126.24s to 1205.97s...
Transcription for segment 19 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 19...


Segments Processed:  35%|███▌      | 20/57 [04:01<08:17, 13.44s/segment]

Summary generation for segment 19 completed.

Processing segment 20: 1205.97s to 1275.74s
Transcribing audio for segment 20 from 1205.97s to 1275.74s...
Transcription for segment 20 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 20...


Segments Processed:  37%|███▋      | 21/57 [04:23<09:36, 16.01s/segment]

Summary generation for segment 20 completed.

Processing segment 21: 1275.74s to 1285.70s
Transcribing audio for segment 21 from 1275.74s to 1285.70s...
Transcription for segment 21 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 21...


Segments Processed:  39%|███▊      | 22/57 [04:32<08:07, 13.92s/segment]

Summary generation for segment 21 completed.

Processing segment 22: 1285.70s to 1295.67s
Transcribing audio for segment 22 from 1285.70s to 1295.67s...
Transcription for segment 22 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 22...


Segments Processed:  40%|████      | 23/57 [04:41<06:55, 12.23s/segment]

Summary generation for segment 22 completed.

Processing segment 23: 1295.67s to 1305.64s
Transcribing audio for segment 23 from 1295.67s to 1305.64s...
Transcription for segment 23 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 23...


Segments Processed:  42%|████▏     | 24/57 [04:48<05:52, 10.68s/segment]

Summary generation for segment 23 completed.

Processing segment 24: 1305.64s to 1315.60s
Transcribing audio for segment 24 from 1305.64s to 1315.60s...
Transcription for segment 24 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 24...


Segments Processed:  44%|████▍     | 25/57 [04:55<05:13,  9.79s/segment]

Summary generation for segment 24 completed.

Processing segment 25: 1315.60s to 1325.57s
Transcribing audio for segment 25 from 1315.60s to 1325.57s...
Transcription for segment 25 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 25...


Segments Processed:  46%|████▌     | 26/57 [05:06<05:06,  9.89s/segment]

Summary generation for segment 25 completed.

Processing segment 26: 1325.57s to 1335.54s
Transcribing audio for segment 26 from 1325.57s to 1335.54s...
Transcription for segment 26 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 26...


Segments Processed:  47%|████▋     | 27/57 [05:15<04:56,  9.90s/segment]

Summary generation for segment 26 completed.

Processing segment 27: 1335.54s to 1345.50s
Transcribing audio for segment 27 from 1335.54s to 1345.50s...
Transcription for segment 27 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 27...


Segments Processed:  49%|████▉     | 28/57 [05:33<05:52, 12.17s/segment]

Summary generation for segment 27 completed.

Processing segment 28: 1345.50s to 1355.47s
Transcribing audio for segment 28 from 1345.50s to 1355.47s...
Transcription for segment 28 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 28...


Segments Processed:  51%|█████     | 29/57 [05:41<05:05, 10.92s/segment]

Summary generation for segment 28 completed.

Processing segment 29: 1355.47s to 1365.44s
Transcribing audio for segment 29 from 1355.47s to 1365.44s...
Transcription for segment 29 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 29...


Segments Processed:  53%|█████▎    | 30/57 [05:50<04:37, 10.28s/segment]

Summary generation for segment 29 completed.

Processing segment 30: 1365.44s to 1375.40s
Transcribing audio for segment 30 from 1365.44s to 1375.40s...
Transcription for segment 30 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 30...


Segments Processed:  54%|█████▍    | 31/57 [06:00<04:24, 10.19s/segment]

Summary generation for segment 30 completed.

Processing segment 31: 1375.40s to 1405.30s
Transcribing audio for segment 31 from 1375.40s to 1405.30s...
Transcription for segment 31 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 31...


Segments Processed:  56%|█████▌    | 32/57 [06:07<03:51,  9.27s/segment]

Summary generation for segment 31 completed.

Processing segment 32: 1405.30s to 1425.24s
Transcribing audio for segment 32 from 1405.30s to 1425.24s...
Transcription for segment 32 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 32...


Segments Processed:  58%|█████▊    | 33/57 [06:19<04:00, 10.04s/segment]

Summary generation for segment 32 completed.

Processing segment 33: 1425.24s to 1475.07s
Transcribing audio for segment 33 from 1425.24s to 1475.07s...
Transcription for segment 33 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 33...


Segments Processed:  60%|█████▉    | 34/57 [06:29<03:55, 10.24s/segment]

Summary generation for segment 33 completed.

Processing segment 34: 1475.07s to 1634.54s
Transcribing audio for segment 34 from 1475.07s to 1634.54s...
Transcription for segment 34 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 34...


Segments Processed:  61%|██████▏   | 35/57 [06:46<04:25, 12.05s/segment]

Summary generation for segment 34 completed.

Processing segment 35: 1634.54s to 1644.50s
Transcribing audio for segment 35 from 1634.54s to 1644.50s...
Transcription for segment 35 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 35...


Segments Processed:  63%|██████▎   | 36/57 [06:54<03:50, 10.97s/segment]

Summary generation for segment 35 completed.

Processing segment 36: 1644.50s to 1684.37s
Transcribing audio for segment 36 from 1644.50s to 1684.37s...
Transcription for segment 36 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 36...


Segments Processed:  65%|██████▍   | 37/57 [07:04<03:32, 10.65s/segment]

Summary generation for segment 36 completed.

Processing segment 37: 1684.37s to 1704.30s
Transcribing audio for segment 37 from 1684.37s to 1704.30s...
Transcription for segment 37 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 37...


Segments Processed:  67%|██████▋   | 38/57 [07:15<03:22, 10.68s/segment]

Summary generation for segment 37 completed.

Processing segment 38: 1704.30s to 1744.17s
Transcribing audio for segment 38 from 1704.30s to 1744.17s...
Transcription for segment 38 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 38...


Segments Processed:  68%|██████▊   | 39/57 [07:24<03:06, 10.38s/segment]

Summary generation for segment 38 completed.

Processing segment 39: 1744.17s to 1863.77s
Transcribing audio for segment 39 from 1744.17s to 1863.77s...
Transcription for segment 39 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 39...


Segments Processed:  70%|███████   | 40/57 [07:40<03:24, 12.03s/segment]

Summary generation for segment 39 completed.

Processing segment 40: 1863.77s to 1873.74s
Transcribing audio for segment 40 from 1863.77s to 1873.74s...
Transcription for segment 40 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 40...


Segments Processed:  72%|███████▏  | 41/57 [07:51<03:04, 11.54s/segment]

Summary generation for segment 40 completed.

Processing segment 41: 1873.74s to 1903.64s
Transcribing audio for segment 41 from 1873.74s to 1903.64s...
Transcription for segment 41 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 41...


Segments Processed:  74%|███████▎  | 42/57 [08:02<02:51, 11.42s/segment]

Summary generation for segment 41 completed.

Processing segment 42: 1903.64s to 1943.50s
Transcribing audio for segment 42 from 1903.64s to 1943.50s...
Transcription for segment 42 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 42...


Segments Processed:  75%|███████▌  | 43/57 [08:15<02:47, 11.94s/segment]

Summary generation for segment 42 completed.

Processing segment 43: 1943.50s to 1963.44s
Transcribing audio for segment 43 from 1943.50s to 1963.44s...
Transcription for segment 43 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 43...


Segments Processed:  77%|███████▋  | 44/57 [08:24<02:22, 10.97s/segment]

Summary generation for segment 43 completed.

Processing segment 44: 1963.44s to 2043.17s
Transcribing audio for segment 44 from 1963.44s to 2043.17s...
Transcription for segment 44 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 44...


Segments Processed:  79%|███████▉  | 45/57 [08:37<02:19, 11.64s/segment]

Summary generation for segment 44 completed.

Processing segment 45: 2043.17s to 2112.94s
Transcribing audio for segment 45 from 2043.17s to 2112.94s...
Transcription for segment 45 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 45...


Segments Processed:  81%|████████  | 46/57 [08:51<02:15, 12.28s/segment]

Summary generation for segment 45 completed.

Processing segment 46: 2112.94s to 2132.87s
Transcribing audio for segment 46 from 2112.94s to 2132.87s...
Transcription for segment 46 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 46...


Segments Processed:  82%|████████▏ | 47/57 [09:01<01:56, 11.65s/segment]

Summary generation for segment 46 completed.

Processing segment 47: 2132.87s to 2192.67s
Transcribing audio for segment 47 from 2132.87s to 2192.67s...
Transcription for segment 47 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 47...


Segments Processed:  84%|████████▍ | 48/57 [09:21<02:06, 14.10s/segment]

Summary generation for segment 47 completed.

Processing segment 48: 2192.67s to 2202.64s
Transcribing audio for segment 48 from 2192.67s to 2202.64s...
Transcription for segment 48 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 48...


Segments Processed:  86%|████████▌ | 49/57 [09:30<01:42, 12.80s/segment]

Summary generation for segment 48 completed.

Processing segment 49: 2202.64s to 2332.20s
Transcribing audio for segment 49 from 2202.64s to 2332.20s...
Transcription for segment 49 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 49...


Segments Processed:  88%|████████▊ | 50/57 [09:48<01:38, 14.11s/segment]

Summary generation for segment 49 completed.

Processing segment 50: 2332.20s to 2421.91s
Transcribing audio for segment 50 from 2332.20s to 2421.91s...
Transcription for segment 50 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 50...


Segments Processed:  89%|████████▉ | 51/57 [10:00<01:22, 13.71s/segment]

Summary generation for segment 50 completed.

Processing segment 51: 2421.91s to 2471.74s
Transcribing audio for segment 51 from 2421.91s to 2471.74s...
Transcription for segment 51 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 51...


Segments Processed:  91%|█████████ | 52/57 [10:11<01:04, 12.93s/segment]

Summary generation for segment 51 completed.

Processing segment 52: 2471.74s to 2541.51s
Transcribing audio for segment 52 from 2471.74s to 2541.51s...
Transcription for segment 52 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 52...


Segments Processed:  93%|█████████▎| 53/57 [10:26<00:53, 13.29s/segment]

Summary generation for segment 52 completed.

Processing segment 53: 2541.51s to 2571.41s
Transcribing audio for segment 53 from 2541.51s to 2571.41s...
Transcription for segment 53 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 53...


Segments Processed:  95%|█████████▍| 54/57 [10:35<00:36, 12.12s/segment]

Summary generation for segment 53 completed.

Processing segment 54: 2571.41s to 2641.17s
Transcribing audio for segment 54 from 2571.41s to 2641.17s...
Transcription for segment 54 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 54...


Segments Processed:  96%|█████████▋| 55/57 [10:50<00:25, 12.88s/segment]

Summary generation for segment 54 completed.

Processing segment 55: 2641.17s to 2710.94s
Transcribing audio for segment 55 from 2641.17s to 2710.94s...
Transcription for segment 55 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 55...


Segments Processed:  98%|█████████▊| 56/57 [11:03<00:13, 13.14s/segment]

Summary generation for segment 55 completed.

Processing segment 56: 2710.94s to 2763.71s
Transcribing audio for segment 56 from 2710.94s to 2763.71s...
Transcription for segment 56 completed.
Encoding slide image to base64...
Image encoding completed.
Generating summary for segment 56...


Segments Processed: 100%|██████████| 57/57 [11:16<00:00, 11.86s/segment]

Summary generation for segment 56 completed.

Processing completed.





Document saved as /content/outputs/Video 3_Innovation Management and Tools.docx
Successfully processed: /content/Video 3_Innovation Management and Tools.mp4
Output saved as: /content/outputs/Video 3_Innovation Management and Tools.docx

Processing complete!


In [None]:
import shutil
import os

# Replace these paths with your folder and output file paths
folder_to_compress = "/content/outputs"
output_zip = "/content/words"

# Ensure the parent directory of the output exists
os.makedirs(os.path.dirname(output_zip), exist_ok=True)

# Compress the folder
shutil.make_archive(output_zip, 'zip', folder_to_compress)

print(f"Folder '{folder_to_compress}' has been compressed to '{output_zip}.zip'")


Folder '/content/outputs' has been compressed to '/content/words.zip'
Folder '/content/outputs' has been compressed to '/content/words.zip'


In [None]:
import shutil
import os

def move_file(source_path, destination_path):
    """
    Move a file from source to destination path.
    Creates destination directory if it doesn't exist.
    """
    # Create destination directory if needed
    os.makedirs(os.path.dirname(destination_path), exist_ok=True)

    # Move the file
    shutil.move(source_path, destination_path)

In [None]:
move_file('/content/words.zip', '/content/drive/MyDrive/file.zip')