In [11]:
!pip install -U -q twelvelabs azure-search-documents azure-identity python-dotenv

In [12]:
import json
import numpy as np
import os
import requests
from azure.core.credentials import AzureKeyCredential
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    AzureOpenAIModelName,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    BinaryQuantizationCompression,
    HnswAlgorithmConfiguration,
    HnswParameters,
    ScalarQuantizationCompression,
    ScalarQuantizationParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SearchableField,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
)
from azure.search.documents.models import VectorizableTextQuery, VectorizedQuery
from dotenv import load_dotenv

In [13]:
# Load environment variables from .env file
load_dotenv()

# Set up OpenAI client based on environment variables
AZURE_OPENAI_ENDPOINT: str = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY: str = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME: str = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME")
AZURE_OPENAI_API_VERSION: str = "2023-05-15"

# Set up Search Service client based on environment variables
SEARCH_SERVICE_ENDPOINT = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
SEARCH_SERVICE_API_KEY = os.getenv("AZURE_SEARCH_ADMIN_KEY")

INDEX_NAME = "twleve-labs-video-search"

# Authentication method flag
use_aad_for_search = False  # Set based on your authentication method

# Choose the correct credential based on your authentication method
credential = (
    DefaultAzureCredential()
    if use_aad_for_search
    else AzureKeyCredential(SEARCH_SERVICE_API_KEY)
)

# Initialize the SearchIndexClient for creating indexes
index_client = SearchIndexClient(
    endpoint=SEARCH_SERVICE_ENDPOINT, credential=credential
)

# Initialize the SearchClient
search_client = SearchClient(endpoint=SEARCH_SERVICE_ENDPOINT, credential=credential, index_name=INDEX_NAME)

In [14]:
from twelvelabs import TwelveLabs
import os

# Retrieve the API key from environment variables
TWELVE_LABS_API_KEY = os.getenv('TWELVE_LABS_API_KEY')

# Initialize the Twelve Labs client
twelvelabs_client = TwelveLabs(api_key=TWELVE_LABS_API_KEY)

In [4]:
from twelvelabs import TwelveLabs
from twelvelabs.models.embed import EmbeddingsTask


def generate_embedding(video_url):
    """
    Generate embeddings for a given video URL using the Twelve Labs API.

    This function creates an embedding task for the specified video URL using
    the Marengo-retrieval-2.6 engine. It monitors the task progress and waits
    for completion. Once done, it retrieves the task result and extracts the
    embeddings along with their associated metadata.

    Args:
        video_url (str): The URL of the video to generate embeddings for.

    Returns:
        tuple: A tuple containing two elements:
            1. list: A list of dictionaries, where each dictionary contains:
                - 'embedding': The embedding vector as a list of floats.
                - 'start_offset_sec': The start time of the segment in seconds.
                - 'end_offset_sec': The end time of the segment in seconds.
                - 'embedding_scope': The scope of the embedding (e.g., 'shot', 'scene').
            2. EmbeddingsTaskResult: The complete task result object from Twelve Labs API.

    Raises:
        Any exceptions raised by the Twelve Labs API during task creation,
        execution, or retrieval.
    """
    # Create an embedding task
    task = twelvelabs_client.embed.task.create(
        engine_name="Marengo-retrieval-2.6", video_url=video_url
    )
    print(
        f"Created task: id={task.id} engine_name={task.engine_name} status={task.status}"
    )

    # Define a callback function to monitor task progress
    def on_task_update(task: EmbeddingsTask):
        print(f"  Status={task.status}")

    # Wait for the task to complete
    status = task.wait_for_done(sleep_interval=2, callback=on_task_update)
    print(f"Embedding done: {status}")

    # Retrieve the task result
    task_result = twelvelabs_client.embed.task.retrieve(task.id)

    # Extract and return the embeddings
    embeddings = []
    for v in task_result.video_embeddings:
        embeddings.append(
            {
                "embedding": v.embedding.float,
                "start_offset_sec": v.start_offset_sec,
                "end_offset_sec": v.end_offset_sec,
                "embedding_scope": v.embedding_scope,
            }
        )

    return embeddings, task_result

In [33]:
# Define the list of video URLs
video_url = [
    "https://static.videezy.com/system/resources/previews/000/004/947/original/Rainy_Road_4K_Living_Background.mp4",
    "https://static.videezy.com/system/resources/previews/000/048/876/original/pattayaSM03.mp4",
]


# Example usage
for video in video_url:
    embeddings, task_result = generate_embedding(video)

    # Add the video_url to each embedding dictionary
    for emb in embeddings:
        emb["video_url"] = video

    print(f"Generated {len(embeddings)} embeddings for the video {video}")
    for i, emb in enumerate(embeddings):
        print(f"Embedding {i+1}:")
        print(f"  Scope: {emb['embedding_scope']}")
        print(
            f"  Time range: {emb['start_offset_sec']} - {emb['end_offset_sec']} seconds"
        )
        print(f"  Embedding vector (first 5 values): {emb['embedding'][:5]}")
        print(f"  Video URL: {emb['video_url']}")
    print()

BadRequestError: Error code: 400 - {'code': 'video_resolution_too_low', 'message': 'The resolution of the video is too low. Please use a video with resolution between 360p(480x360) and 2160p(3840x2160). Current resolution is 354p.'}