In [3]:
#import necessary libraries

from IPython.display import display, Image, Audio
import cv2  # We're using OpenCV to read video, to install !pip install opencv-python
import base64
import time
from openai import OpenAI
import os
from dotenv import load_dotenv  # Import dotenv to load environment variables
import requests

# Load environment variables from .env file
load_dotenv()

# Use environment variable for API key
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [10]:
#capture vid frames
video = cv2.VideoCapture("data/bike_vid.mp4")

base64Frames = []
while video.isOpened():
    success, frame = video.read()
    if not success:
        break
    _, buffer = cv2.imencode(".jpg", frame)
    base64Frames.append(base64.b64encode(buffer).decode("utf-8"))

video.release()
print(len(base64Frames), "frames read.")

414 frames read.


In [13]:
#prompt GPT to summarize the video
PROMPT_MESSAGES = [
    {
        "role": "user",
        "content": [
            "These are frames from a video that I want to upload. Generate a concise summary that I can upload along with the video.",
            *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]),
        ],
    },
]
params = {
    "model": "gpt-4o",
    "messages": PROMPT_MESSAGES,
    "max_tokens": 200,
}

result = client.chat.completions.create(**params)
print(result.choices[0].message.content)

"Enjoy a serene stroll along a picturesque urban riverside. Watch as people bike, jog, and relax against a backdrop of beautiful historic architecture and lush greenery. A peaceful escape in the heart of the city!"


In [33]:
#prompt GPT to create quiz questions for the video
PROMPT_MESSAGES = [
    {
        "role": "user",
        "content": [
            "These are frames from a video that I want to upload. Generate several quiz questions (multiple choice, true/false, and short answer) based on a summary of the video.",
            *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]),
        ],
    },
]
params = {
    "model": "gpt-4o",
    "messages": PROMPT_MESSAGES,
    "max_tokens": 200,
}

result = client.chat.completions.create(**params)
print(result.choices[0].message.content)

Certainly! Here are quiz questions based on a summary of your video using the provided frames:

**Multiple Choice Questions:**

1. What mode of transportation is featured most prominently in the video frames?
   - A) Walking
   - B) Running
   - C) Cycling
   - D) Driving

2. What type of building can be seen in the background of the frames?
   - A) A modern glass office building
   - B) A historical stone building
   - C) A wooden house
   - D) A factory

3. What sport, besides cycling, is shown being performed in the video?
   - A) Swimming
   - B) Football
   - C) Running
   - D) Basketball

4. What is the cyclist wearing on their head?
   - A) A cap
   - B) A helmet
   - C) A hat
   - D) Goggles

**True/False Questions:**

1


In [34]:
#convert quiz questions to TTS 

response = requests.post(
    "https://api.openai.com/v1/audio/speech",
    headers={
        "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
    },
    json={
        "model": "tts-1-1106",
        "input": result.choices[0].message.content,
        "voice": "onyx",
    },
)

audio = b""
for chunk in response.iter_content(chunk_size=1024 * 1024):
    audio += chunk
Audio(audio)