In [None]:
from IPython.display import display, Image, Audio
import cv2 
import base64
import openai

In [None]:
video = cv2.VideoCapture("wearable.mp4")

base64Frames = []
while video.isOpened():
    success, frame = video.read()
    if not success:
        break
    _, buffer = cv2.imencode(".jpg", frame)
    base64Frames.append(base64.b64encode(buffer).decode("utf-8"))

video.release()
print(len(base64Frames), "frames read.")

In [None]:
PROMPT_MESSAGES = [
    {
        "role": "user",
        "content": [
            "These are frames of a video. Create a short voiceover script in the style of a super excited sports narrator who is narrating his favorite match. Except he's narrating the point of view of a human doing mundane activities but he is still super excited.",
            *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::10]),
        ],
    },
]
params = {
    "model": "gpt-4-vision-preview",
    "messages": PROMPT_MESSAGES,
    "api_key": 'XXXXXX',
    "headers": {"Openai-Version": "2020-11-07"},
    "max_tokens": 500,
}

result = openai.ChatCompletion.create(**params)
print(result.choices[0].message.content)

In [None]:
PROMPT_MESSAGES = [
     {
            "role": "user",
            "content": f"Shorten this script so it can be read in about 30 seconds: {result.choices[0].message.content}",
        }
]
params = {
    "model": "gpt-4",
    "messages": PROMPT_MESSAGES,
    "api_key": 'XXXXXX',
    "headers": {"Openai-Version": "2020-11-07"},
    "max_tokens": 500,
}

result = openai.ChatCompletion.create(**params)
print(result.choices[0].message.content)

In [None]:
from elevenlabs import generate, play, set_api_key
set_api_key("XXXXXX")
audio = generate(
    text=result.choices[0].message.content,
    voice="Oliver",
    model='eleven_multilingual_v2'
)

play(audio)

In [None]:
import subprocess
import tempfile
import os

with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as audio_file:
    audio_file.write(audio)
    audio_filename = audio_file.name

video_filename = 'wearable.mp4'

output_filename = 'output_with_audio.mp4'

ffmpeg_command = [
    'ffmpeg',
    '-y', 
    '-i', video_filename,
    '-i', audio_filename,
    '-c:v', 'copy',
    '-c:a', 'aac',
    '-strict', 'experimental',
    output_filename
]

subprocess.run(ffmpeg_command)

os.remove(audio_filename)