## Import Packages

In [None]:
!pip install git+https://github.com/openai/whisper.git
!pip install yt-dlp
!pip install torchaudio librosa pydub ffmpeg-python panns-inference

In [5]:
!pip install -q kokoro>=0.9.2 soundfile
!apt-get -qq -y install espeak-ng > /dev/null 2>&1

In [None]:
!pip install moviepy langchain-google-genai langchain-core

## Mount Google Drive and extract audio

In [3]:
from google.colab import drive
from pydub import AudioSegment
import os

def process_gdrive_audio(input_file_path, output_filename="audio.wav"):
    # Check if the input file exists
    if not os.path.exists(input_file_path):
        raise FileNotFoundError(f"The file {input_file_path} does not exist in your Google Drive.")

    # Load the file and extract audio using pydub
    try:
        audio = AudioSegment.from_file(input_file_path)
        # Convert to mono and set frame rate to 16kHz
        audio = audio.set_channels(1).set_frame_rate(16000)
        # Export as WAV
        audio.export(output_filename, format="wav")
        print(f"Audio exported to {output_filename}")
    except Exception as e:
        print(f"Error processing the file: {e}")
        raise

    return output_filename

# Mount Google Drive
drive.mount('/content/drive')

# Specify the path to your file in Google Drive
# Example: If your file is in "My Drive/videos/myvideo.mp4", use:
input_file_path = '/content/drive/My Drive/videos/panda.mp4'  # Replace with your file's path
audio_path = process_gdrive_audio(input_file_path)

Mounted at /content/drive


## Extract transcript with timestamps

In [None]:
import whisper

model = whisper.load_model("large")
result = model.transcribe(audio_path, verbose=True)

# Print transcript with timestamps
for seg in result['segments']:
    print(f"[{seg['start']:.2f}s - {seg['end']:.2f}s]: {seg['text']}")

100%|█████████████████████████████████████| 2.88G/2.88G [01:07<00:00, 45.8MiB/s]


Detecting language using up to the first 30 seconds. Use `--language` to specify the language
Detected language: English
[00:00.000 --> 00:12.840]  Get all the metal you can find!
[00:12.840 --> 00:17.880]  Rob the metal!
[00:17.880 --> 00:29.600]  Help! Help! Help! Help!
[00:29.600 --> 00:39.640]  That's everything! Let's get out of here!
[00:39.640 --> 00:40.640]  Wings of Justice!
[00:40.640 --> 00:41.640]  Pull the trigger!
[00:41.640 --> 00:42.640]  Help! Help! Help!
[00:42.640 --> 00:43.640]  Help! Help!
[00:43.640 --> 00:44.640]  Help! Help!
[00:44.640 --> 00:45.640]  Help! Help!
[00:45.640 --> 00:46.640]  Help! Help!
[00:46.640 --> 00:47.640]  Help! Help!
[00:47.640 --> 00:47.720]  Help! Help! Help!
[00:47.880 --> 01:10.140]  돌
[01:10.140 --> 01:12.440]  So, um...
[01:12.440 --> 01:16.500]  Wait!
[01:16.500 --> 01:17.640]  Go get the monster!
[01:47.640 --> 01:49.640]  Thanks, Mantis.
[01:49.640 --> 01:51.640]  Ooh.
[01:51.640 --> 01:53.640]  Ooh.
[01:53.640 --> 01:55.640]  Uh.

## Save transcript as json

In [None]:
import json

output = {
    "transcript": result['segments'],
}

with open("film_analysis_output.json", "w") as f:
    json.dump(output, f, indent=2)

print("✅ Analysis completed and saved as JSON.")

✅ Analysis completed and saved as JSON.


## Generate the trailer video

In [None]:
import json
import os
from typing import List, Dict
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from moviepy.editor import VideoFileClip, concatenate_videoclips
from google.colab import userdata
import math

# Input video file path
input_file_path = '/content/drive/My Drive/videos/panda.mp4'
output_trailer_path = '/content/drive/My Drive/videos/panda_trailer.mp4'

# Configuration for Google Gemini API
if not os.getenv("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

# Initialize the Gemini Flash 2.0 model
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7)

def load_transcript(file_path: str) -> List[Dict]:
    """Load the entire transcript JSON, extract start, end, text."""
    try:
        with open(file_path, "r") as f:
            data = json.load(f)
            # Handle case where transcript is wrapped in a dict
            if isinstance(data, dict) and "transcript" in data:
                data = data["transcript"]
            # Ensure the loaded data is a list
            if not isinstance(data, list):
                raise ValueError(f"Transcript data in {file_path} is not a list. Got type: {type(data)}")
            # Extract only start, end, text
            cleaned_data = []
            for item in data:
                if not isinstance(item, dict) or not all(key in item for key in ["start", "end", "text"]):
                    print(f"Skipping invalid segment: {item}")
                    continue  # Skip invalid entries
                segment = {
                    "start": item["start"],
                    "end": item["end"],
                    "text": item["text"]
                }
                cleaned_data.append(segment)
            if not cleaned_data:
                raise ValueError("No valid transcript entries found with start, end, text keys")
            print(f"Total segments loaded: {len(cleaned_data)}")
            return cleaned_data
    except Exception as e:
        print(f"Error loading transcript: {e}")
        return []

def distribute_segments(transcript_data: List[Dict], num_requests: int = 4) -> List[List[Dict]]:
    """Distribute transcript segments evenly across 4 API requests."""
    if not transcript_data:
        print("Warning: Transcript data is empty")
        return []
    try:
        total_segments = len(transcript_data)
        segments_per_request = math.ceil(total_segments / num_requests)
        distributed_segments = [transcript_data[i:i + segments_per_request] for i in range(0, total_segments, segments_per_request)]
        # Ensure exactly 4 requests, padding with empty lists if needed
        while len(distributed_segments) < num_requests:
            distributed_segments.append([])
        print(f"Distributed {total_segments} segments across {num_requests} API requests: {[len(group) for group in distributed_segments]} segments per request")
        return distributed_segments
    except Exception as e:
        print(f"Error distributing segments: {e}")
        return []

def generate_trailer_chunk(chunk: List[Dict], request_count: int) -> List[Dict]:
    """Generate a trailer chunk from a group of transcript segments in one API call."""
    if not chunk:
        return []

    ai_prompt = """
    You are a professional video editor and trailer creator. I will give you a transcript chunk of a video in JSON format, where each entry contains:
    - "start": Start timestamp in seconds
    - "end": End timestamp
    - "text": Spoken text

    Your job is to select the most **dramatic, emotional, action-packed, or suspenseful** lines to create a trailer that covers the entire video’s narrative arc.

    Rules:
    - Choose 1 to 5 impactful lines per chunk to ensure comprehensive coverage
    - Maintain original timestamps
    - Only use content from the transcript (do not create new text)
    - Prioritize scenes with action verbs, yelling, dramatic phrases (e.g., "Run!", "Help!", "That’s everything!") to represent the video’s key moments
    - Ensure selected segments span the chunk’s timeline to cover beginning, middle, and end
    - Return the result as a JSON list like this:

    [
      {{
        "start": 17.88,
        "end": 29.6,
        "text": "Help! Help! Help! Help!"
      }}
    ]

    Here is the transcript chunk:

    {transcript}
    """
    # Format the transcript chunk as a string
    transcript_str = json.dumps(chunk, indent=2)

    # Create prompt template
    prompt_template = ChatPromptTemplate.from_template(ai_prompt)

    # Create the chain
    chain = prompt_template | model | JsonOutputParser()

    try:
        # Invoke the chain
        trailer_data = chain.invoke({"transcript": transcript_str})
        # Validate the returned trailer data
        if not isinstance(trailer_data, list):
            print(f"Error: Trailer chunk is not a list. Got: {trailer_data}")
            return []
        for segment in trailer_data:
            if not isinstance(segment, dict) or not all(key in segment for key in ["start", "end", "text"]):
                print(f"Error: Invalid trailer segment: {segment}")
                return []
        print(f"API request {request_count} processed with {len(chunk)} segments")
        return trailer_data
    except Exception as e:
        print(f"Error generating trailer chunk: {e}")
        print(f"Trailer data (if available): {trailer_data if 'trailer_data' in locals() else 'None'}")
        return []

def generate_trailer(transcript_data: List[Dict]) -> List[Dict]:
    """Generate a trailer by processing transcript in 4 API requests."""
    # Distribute segments across 4 requests
    distributed_segments = distribute_segments(transcript_data, num_requests=4)
    if not distributed_segments:
        return []

    # Process each group of segments in one API call
    trailer_segments = []
    for request_count, chunk in enumerate(distributed_segments, 1):
        if chunk:
            print(f"Processing API request {request_count} with {len(chunk)} segment(s)")
            chunk_trailer = generate_trailer_chunk(chunk, request_count)
            trailer_segments.extend(chunk_trailer)

    # Sort by start time
    trailer_segments = sorted(trailer_segments, key=lambda x: x["start"])
    total_duration = sum(segment["end"] - segment["start"] for segment in trailer_segments)
    print(f"Total trailer duration: {total_duration:.2f} seconds")
    return trailer_segments

def create_trailer_video(trailer_segments: List[Dict], input_video: str, output_path: str):
    """Create the trailer video by concatenating clips using moviepy."""
    try:
        # Load the input video
        video = VideoFileClip(input_video)
        clips = []

        # Extract clips based on trailer segments
        for segment in trailer_segments:
            start = segment["start"]
            end = segment["end"]
            clip = video.subclip(start, end)
            clips.append(clip)

        # Concatenate clips
        final_trailer = concatenate_videoclips(clips, method="compose")

        # Write the output video
        final_trailer.write_videofile(output_path, codec="libx264", audio_codec="aac")
        print(f"Trailer video saved to {output_path}")

        # Close clips to free memory
        for clip in clips:
            clip.close()
        video.close()
        final_trailer.close()

    except Exception as e:
        print(f"Error creating trailer video: {e}")

def main():
    # Check if input video exists
    if not os.path.exists(input_file_path):
        print(f"Error: {input_file_path} not found")
        return

    # Load transcript
    transcript_file = "/content/film_analysis_output.json"
    if not os.path.exists(transcript_file):
        print(f"Error: {transcript_file} not found")
        return

    transcript_data = load_transcript(transcript_file)
    if not transcript_data:
        print("Error: No valid transcript data loaded")
        return

    # Debug: Print the first few entries to verify structure
    print("Transcript data sample:", json.dumps(transcript_data[:2], indent=2))

    # Generate trailer segments
    trailer = generate_trailer(transcript_data)

    # Output the trailer segments
    if trailer:
        print("Generated Trailer Segments:")
        print(json.dumps(trailer, indent=2))

        # Save trailer segments to a file
        with open("trailer.json", "w") as f:
            json.dump(trailer, f, indent=2)
        print("Trailer segments saved to trailer.json")

        # Create the trailer video
        create_trailer_video(trailer, input_file_path, output_trailer_path)
    else:
        print("Failed to generate trailer")

if __name__ == "__main__":
    main()

Total segments loaded: 458
Transcript data sample: [
  {
    "start": 0.0,
    "end": 12.84,
    "text": " Get all the metal you can find!"
  },
  {
    "start": 12.84,
    "end": 17.88,
    "text": " Rob the metal!"
  }
]
Distributed 458 segments across 4 API requests: [115, 115, 115, 113] segments per request
Processing API request 1 with 115 segment(s)
API request 1 processed with 115 segments
Processing API request 2 with 115 segment(s)
API request 2 processed with 115 segments
Processing API request 3 with 115 segment(s)
API request 3 processed with 115 segments
Processing API request 4 with 113 segment(s)
API request 4 processed with 113 segments
Total trailer duration: 258.58 seconds
Generated Trailer Segments:
[
  {
    "start": 0.0,
    "end": 12.84,
    "text": " Get all the metal you can find!"
  },
  {
    "start": 17.88,
    "end": 29.6,
    "text": " Help! Help! Help! Help!"
  },
  {
    "start": 29.6,
    "end": 39.64,
    "text": " That's everything! Let's get out of he


t:  26%|██▌       | 390/1529 [17:39<00:18, 60.02it/s, now=None]

t:  26%|██▌       | 390/1529 [17:39<00:18, 60.02it/s, now=None]
t:  56%|█████▋    | 905/1603 [04:37<00:22, 30.74it/s, now=None][A

Moviepy - Building video /content/drive/My Drive/videos/panda_trailer.mp4.
MoviePy - Writing audio in panda_trailerTEMP_MPY_wvf_snd.mp4




chunk:   0%|          | 0/5702 [00:00<?, ?it/s, now=None][A[A

chunk:   2%|▏         | 90/5702 [00:00<00:06, 897.53it/s, now=None][A[A

chunk:   3%|▎         | 180/5702 [00:00<00:14, 369.16it/s, now=None][A[A

chunk:   4%|▍         | 233/5702 [00:00<00:14, 368.12it/s, now=None][A[A

chunk:   5%|▍         | 279/5702 [00:00<00:14, 370.95it/s, now=None][A[A

chunk:   6%|▌         | 322/5702 [00:00<00:14, 371.26it/s, now=None][A[A

chunk:   6%|▋         | 363/5702 [00:00<00:14, 356.41it/s, now=None][A[A

chunk:   7%|▋         | 401/5702 [00:01<00:14, 355.35it/s, now=None][A[A

chunk:   8%|▊         | 448/5702 [00:01<00:13, 385.15it/s, now=None][A[A

chunk:   9%|▊         | 496/5702 [00:01<00:12, 410.40it/s, now=None][A[A

chunk:   9%|▉         | 539/5702 [00:01<00:12, 413.26it/s, now=None][A[A

chunk:  10%|█         | 587/5702 [00:01<00:11, 431.36it/s, now=None][A[A

chunk:  11%|█         | 632/5702 [00:01<00:11, 433.60it/s, now=None][A[A

chunk:  12%|█▏        

MoviePy - Done.
Moviepy - Writing video /content/drive/My Drive/videos/panda_trailer.mp4





t:   0%|          | 0/6200 [00:00<?, ?it/s, now=None][A[A

t:   0%|          | 4/6200 [00:00<02:36, 39.63it/s, now=None][A[A

t:   0%|          | 30/6200 [00:00<00:36, 168.27it/s, now=None][A[A

t:   1%|          | 57/6200 [00:00<00:29, 211.60it/s, now=None][A[A

t:   1%|▏         | 79/6200 [00:00<00:33, 182.11it/s, now=None][A[A

t:   2%|▏         | 98/6200 [00:00<00:55, 110.38it/s, now=None][A[A

t:   2%|▏         | 113/6200 [00:01<01:09, 87.31it/s, now=None][A[A

t:   2%|▏         | 125/6200 [00:01<01:15, 80.31it/s, now=None][A[A

t:   2%|▏         | 135/6200 [00:01<01:19, 76.57it/s, now=None][A[A

t:   2%|▏         | 144/6200 [00:01<01:17, 78.28it/s, now=None][A[A

t:   2%|▏         | 153/6200 [00:01<01:16, 79.19it/s, now=None][A[A

t:   3%|▎         | 162/6200 [00:01<01:16, 79.35it/s, now=None][A[A

t:   3%|▎         | 171/6200 [00:01<01:14, 80.72it/s, now=None][A[A

t:   3%|▎         | 180/6200 [00:01<01:15, 79.54it/s, now=None][A[A

t:   3%|▎       

Moviepy - Done !
Moviepy - video ready /content/drive/My Drive/videos/panda_trailer.mp4
Trailer video saved to /content/drive/My Drive/videos/panda_trailer.mp4


## Generate the summary video with voiceover speaker

In [None]:
import json
import os
from typing import List, Dict
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips, concatenate_audioclips
from google.colab import userdata
import math
import soundfile as sf
from kokoro import KPipeline
import torch
import numpy as np
from textwrap import wrap
import re


# Input and output file paths
input_file_path = '/content/drive/My Drive/videos/panda.mp4'
output_summary_path = '/content/drive/My Drive/videos/panda_summary.mp4'
output_audio_path = '/content/drive/My Drive/videos/panda_summary_audio.wav'
transcript_output_path = '/content/drive/My Drive/videos/panda_summary_transcript.txt'

# Configuration for Google Gemini API
if not os.getenv("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

# Initialize the Gemini Flash 2.0 model
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7)

# Initialize Kokoro-82M pipeline
pipeline = KPipeline(lang_code='a')

def load_transcript(file_path: str) -> List[Dict]:
    """Load the entire transcript JSON, extract start, end, text."""
    try:
        with open(file_path, "r") as f:
            data = json.load(f)
            if isinstance(data, dict) and "transcript" in data:
                data = data["transcript"]
            if not isinstance(data, list):
                raise ValueError(f"Transcript data in {file_path} is not a list. Got type: {type(data)}")
            cleaned_data = []
            for item in data:
                if not isinstance(item, dict) or not all(key in item for key in ["start", "end", "text"]):
                    print(f"Skipping invalid segment: {item}")
                    continue
                segment = {
                    "start": item["start"],
                    "end": item["end"],
                    "text": item["text"]
                }
                cleaned_data.append(segment)
            if not cleaned_data:
                raise ValueError("No valid transcript entries found with start, end, text keys")
            print(f"Total segments loaded: {len(cleaned_data)}")
            return cleaned_data
    except Exception as e:
        print(f"Error loading transcript: {e}")
        return []

def distribute_segments(transcript_data: List[Dict], num_requests: int = 4) -> List[List[Dict]]:
    """Distribute transcript segments evenly across 4 API requests."""
    if not transcript_data:
        print("Warning: Transcript data is empty")
        return []
    try:
        total_segments = len(transcript_data)
        segments_per_request = math.ceil(total_segments / num_requests)
        distributed_segments = [transcript_data[i:i + segments_per_request] for i in range(0, total_segments, segments_per_request)]
        while len(distributed_segments) < num_requests:
            distributed_segments.append([])
        print(f"Distributed {total_segments} segments across {num_requests} API requests: {[len(group) for group in distributed_segments]} segments per request")
        return distributed_segments
    except Exception as e:
        print(f"Error distributing segments: {e}")
        return []

def generate_summary_chunk(chunk: List[Dict], request_count: int) -> List[Dict]:
    """Generate a summary chunk from a group of transcript segments in one API call."""
    if not chunk:
        return []

    ai_prompt = """
    You are a professional video summarizer. I will provide a transcript chunk of a video in JSON format, where each entry contains:
    - "start": Start timestamp in seconds
    - "end": End timestamp
    - "text": Spoken text

    Your job is to select key segments that comprehensively represent the main narrative, themes, or critical moments of the video to create a concise summary that spans the entire video timeline.

    Rules:
    - Choose 1 to 3 segments per chunk that capture essential plot points, themes, or significant dialogue
    - Ensure selected segments collectively cover the beginning, middle, and end of the chunk’s timeline to represent the full narrative arc
    - Maintain original timestamps
    - Only use content from the transcript (do not create new text)
    - Prioritize segments that convey the video’s core message, key events, or character development
    - Return the result as a JSON list like this:

    [
      {{
        "start": 17.88,
        "end": 29.6,
        "text": "This is a critical moment!"
      }}
    ]

    Here is the transcript chunk:

    {transcript}
    """
    transcript_str = json.dumps(chunk, indent=2)
    prompt_template = ChatPromptTemplate.from_template(ai_prompt)
    chain = prompt_template | model | JsonOutputParser()

    try:
        summary_data = chain.invoke({"transcript": transcript_str})
        if not isinstance(summary_data, list):
            print(f"Error: Summary chunk is not a list. Got: {summary_data}")
            return []
        for segment in summary_data:
            if not isinstance(segment, dict) or not all(key in segment for key in ["start", "end", "text"]):
                print(f"Error: Invalid summary segment: {segment}")
                return []
        print(f"API request {request_count} processed with {len(chunk)} segments")
        return summary_data
    except Exception as e:
        print(f"Error generating summary chunk: {e}")
        return []

def generate_text_summary(summary_segments: List[Dict]) -> str:
    """Generate a concise text summary from selected segments."""
    if not summary_segments:
        return ""

    ai_prompt = """
    You are a professional video summarizer. I will provide a list of selected video transcript segments in JSON format, each containing:
    - "start": Start timestamp in seconds
    - "end": End timestamp
    - "text": Spoken text

    Your job is to create a concise, coherent summary (100-150 words) that captures the main narrative, themes, or key moments of the entire video. Use the provided segments as the basis, but rephrase and connect them into a natural, professional narrative that spans the full timeline of the video. Do not invent new content beyond what is implied by the segments.

    Return the summary as a plain text string.

    Here are the selected segments:

    {segments}
    """
    segments_str = json.dumps(summary_segments, indent=2)
    prompt_template = ChatPromptTemplate.from_template(ai_prompt)
    chain = prompt_template | model | StrOutputParser()

    try:
        summary_text = chain.invoke({"segments": segments_str})
        print(f"Generated text summary: {summary_text[:100]}...")
        return summary_text
    except Exception as e:
        print(f"Error generating text summary: {e}")
        return ""

def save_transcript(summary_text: str, output_path: str):
    """Save the text summary to a file."""
    try:
        with open(output_path, 'w') as f:
            f.write(summary_text)
        print(f"Transcript saved to {output_path}")
    except Exception as e:
        print(f"Error saving transcript: {e}")

def generate_summary_audio(summary_text: str, output_audio_path: str, target_duration: float) -> float:
    """Generate audio in chunks to match target video duration using Kokoro-82M."""
    if not summary_text:
        print("Error: No summary text provided for audio generation")
        return 0.0

    try:
        # Split text into sentences
        sentences = re.split(r'(?<=[.!?])\s+', summary_text.strip())
        if not sentences:
            print("Error: No sentences found in summary text")
            return 0.0

        # Estimate duration per sentence (assuming ~2-3 seconds per sentence)
        est_duration_per_sentence = 2.5  # Average speaking time per sentence
        num_sentences = len(sentences)
        est_total_duration = num_sentences * est_duration_per_sentence

        # Calculate number of chunks needed to approximate target duration
        num_chunks = max(1, math.ceil(target_duration / est_total_duration))
        sentences_per_chunk = math.ceil(num_sentences / num_chunks) if num_chunks > 0 else num_sentences

        audio_chunks = []
        total_duration = 0.0

        # Generate audio for each chunk
        for i in range(0, num_sentences, sentences_per_chunk):
            chunk_text = " ".join(sentences[i:i + sentences_per_chunk])
            if not chunk_text:
                continue
            generator = pipeline(chunk_text, voice='af_heart')
            for _, (_, _, audio) in enumerate(generator):
                audio_chunks.append(audio)
                chunk_duration = len(audio) / 24000
                total_duration += chunk_duration
                break

        if not audio_chunks:
            print("Error: No audio chunks generated by Kokoro-82M")
            return 0.0

        # Concatenate audio chunks
        concatenated_audio = np.concatenate(audio_chunks)
        sf.write(output_audio_path, concatenated_audio, 24000)
        print(f"Summary audio saved to {output_audio_path}, duration: {total_duration:.2f} seconds")

        # If still shorter than target, pad with silence
        if total_duration < target_duration:
            with sf.SoundFile(output_audio_path, 'r+') as f:
                silence_samples = int((target_duration - total_duration) * 24000)
                silence = np.zeros(silence_samples)
                f.seek(0, sf.SEEK_END)
                f.write(silence)
            total_duration = target_duration
            print(f"Audio padded with silence to match target duration: {total_duration:.2f} seconds")

        return total_duration
    except Exception as e:
        print(f"Error generating summary audio with Kokoro-82M: {e}")
        return 0.0

def create_summary_video(summary_segments: List[Dict], summary_audio_path: str, input_video: str, output_path: str):
    """Create a summary video with synchronized audio."""
    try:
        video = VideoFileClip(input_video)
        clips = []

        # Extract clips based on summary segments
        for segment in summary_segments:
            start = segment["start"]
            end = segment["end"]
            clip = video.subclip(start, end)
            clips.append(clip)

        # Concatenate clips
        final_video = concatenate_videoclips(clips, method="compose")

        # Load and set audio
        if os.path.exists(summary_audio_path):
            audio = AudioFileClip(summary_audio_path)
            final_video = final_video.set_audio(audio)

        # Write the output video
        final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
        print(f"Summary video saved to {output_path}")

        # Close clips to free memory
        for clip in clips:
            clip.close()
        video.close()
        final_video.close()
        if 'audio' in locals():
            audio.close()

    except Exception as e:
        print(f"Error creating summary video: {e}")

def main():
    # Check if input video exists
    if not os.path.exists(input_file_path):
        print(f"Error: {input_file_path} not found")
        return

    # Load transcript
    transcript_file = "/content/film_analysis_output.json"
    if not os.path.exists(transcript_file):
        print(f"Error: {transcript_file} not found")
        return

    transcript_data = load_transcript(transcript_file)
    if not transcript_data:
        print("Error: No valid transcript data loaded")
        return

    # Debug: Print the first few entries
    print("Transcript data sample:", json.dumps(transcript_data[:2], indent=2))

    # Generate summary segments
    distributed_segments = distribute_segments(transcript_data, num_requests=4)
    summary_segments = []
    for request_count, chunk in enumerate(distributed_segments, 1):
        if chunk:
            print(f"Processing API request {request_count} with {len(chunk)} segment(s)")
            chunk_summary = generate_summary_chunk(chunk, request_count)
            summary_segments.extend(chunk_summary)

    # Sort by start time
    summary_segments = sorted(summary_segments, key=lambda x: x["start"])
    total_duration = sum(segment["end"] - segment["start"] for segment in summary_segments)
    print(f"Total summary duration: {total_duration:.2f} seconds")

    if summary_segments:
        print("Generated Summary Segments:")
        print(json.dumps(summary_segments, indent=2))

        # Save summary segments to a file
        with open("summary.json", "w") as f:
            json.dump(summary_segments, f, indent=2)
        print("Summary segments saved to summary.json")

        # Generate text summary
        summary_text = generate_text_summary(summary_segments)
        if summary_text:
            print("Text Summary:")
            print(summary_text)

            # Save transcript
            save_transcript(summary_text, transcript_output_path)

            # Generate summary audio targeting video duration
            audio_duration = generate_summary_audio(summary_text, output_audio_path, total_duration)

            # Create the summary video
            create_summary_video(summary_segments, output_audio_path, input_file_path, output_summary_path)
        else:
            print("Failed to generate text summary")
    else:
        print("Failed to generate summary")

if __name__ == "__main__":
    main()