# AI视频解说

## 1.环境配置

In [None]:
!pip install openai python-dotenv pydub
!apt-get -qq -y install ffmpeg

import openai,os

from google.colab import drive
drive.mount('/content/drive')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv('/content/drive/MyDrive/Colab/.env')

openai.api_key  = os.environ['OPENAI_API_KEY']

In [25]:
from IPython.display import display, Image, Audio

import cv2
import base64
import time
import openai
import os
import requests

## 2.视频拆帧
- 将视频传入Google Drive中并获得路径
- 用OpenCV库读取视频并拆成base64格式的序列帧

请注意：若能看到帧数量大于0，则表示拆帧成功。

In [None]:
video = cv2.VideoCapture("/content/drive/MyDrive/openai/video/cat_1.mp4")

base64Frames = []
while video.isOpened():
    success, frame = video.read()
    if not success:
        break
    _, buffer = cv2.imencode(".jpg", frame)
    base64Frames.append(base64.b64encode(buffer).decode("utf-8"))

video.release()
print(len(base64Frames), "frames read.")

## 3.预处理和显示帧

In [None]:
display_handle = display(None, display_id=True)
for img in base64Frames:
    display_handle.update(Image(data=base64.b64decode(img.encode("utf-8"))))
    time.sleep(0.025)

## 4.OpenAI接口操作

In [27]:
from openai import OpenAI
client = OpenAI()

### 4.1 生成视频描述

In [None]:
PROMPT_MESSAGES = [
    {
        "role": "user",
        "content": [
            "这些是我要上传的视频的帧。 生成引人注目的描述，我可以将其与视频一起上传。",
            *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::20]),
        ],
    },
]
params = {
    "model": "gpt-4-vision-preview",
    "messages": PROMPT_MESSAGES,
    "max_tokens": 200,
}

result = client.chat.completions.create(**params)
print(result.choices[0].message.content)

### 4.2 生成视频旁白

In [None]:
PROMPT_MESSAGES = [
    {
        "role": "user",
        "content": [
            "这些是视频的帧。 按照大卫·阿滕伯勒的风格创建一个简短的配音脚本。 仅包含旁白。字数控制在50字左右",
            *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::20]),
        ],
    },
]
params = {
    "model": "gpt-4-vision-preview",
    "messages": PROMPT_MESSAGES,
    "max_tokens": 500,
}

result = client.chat.completions.create(**params)
print(result.choices[0].message.content)

### 4.3 用TTS生成语音

In [None]:
response = client.audio.speech.create(
    model="tts-1",
    voice="onyx",
    input= result.choices[0].message.content,
)


Audio(response.content)

### 4.4 保存mp3文件用于视频合成

In [38]:
file_path = '/content/drive/MyDrive/openai/audio/cat_1.mp3'

# 确保文件夹路径存在
os.makedirs(os.path.dirname(file_path), exist_ok=True)

# 保存音频内容为MP3文件
with open(file_path, 'wb') as audio_file:
    audio_file.write(response.content)