# TwelveLabs / OpenSearch Demonstration

Code for the Medium blog post, [Multi-Vector Semantic Search: Advanced Video Search with TwelveLabs and Amazon OpenSearch](https://garystafford.medium.com/multi-vector-semantic-search-advanced-video-search-with-twelve-labs-and-amazon-opensearch-7b81ba52c373). How TwelveLabs AI Models and Amazon OpenSearch Serverless enable multi-vector semantic and hybrid search for video content.

**Prerequisites**

See README file for prerequisites.

Videos must meet the TwelveLabs [requirements](https://docs.twelvelabs.io/docs/get-started/quickstart/create-embeddings#prerequisites):

- Video resolution: Must be at least 360x360 and must not exceed 3840x2160.
- Aspect ratio: Must be one of 1:1, 4:3, 4:5, 5:4, 16:9, 9:16, or 17:9.
- Video and audio formats: Your video files must be encoded in the video and audio formats listed on the FFmpeg Formats Documentation page.
- Duration: Must be between 4 seconds and 2 hours (7,200s).
- File size: Must not exceed 2 GB.

**Workflow Diagram**

![Architecture](./twelve_labs_bedrock.png)


## Install Required Python Packages


In [None]:
%pip install pip -Uq
%pip install python-dotenv twelvelabs boto3 opensearch-py -Uq
%pip install matplotlib Pillow scikit-learn plotly nbformat pandas -Uq

### Restart Kernel

If first time installing the packages, restart your Jupyter Notebook's kernel before continuing.


In [None]:
# Test that the Twelve Labs package is installed
%pip show twelvelabs

## Load Environment Variables

There are several ways to load your sensitive environment variables. The package, `python-dotenv`, reads key-value pairs from a plain text `.env` file and can set them as environment variables. We are using the `.env` file to store our sensitive variables.


In [None]:
import os
from dotenv import load_dotenv

load_dotenv()  # Loads variables from .env file

AWS_REGION = os.getenv("AWS_REGION")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")
TL_API_KEY = os.getenv("TL_API_KEY")
OPENSEARCH_ENDPOINT = os.getenv("OPENSEARCH_ENDPOINT")

### Constants


In [None]:
# Set the TwelveLabs and OpenSearch index names to the same value
INDEX_NAME = "commercials-index"

# Set the local directories for videos, intermediate files, and OpenSearch documents
VIDEO_DIRECTORY = "videos/commercials"
OUTPUT_DIRECTORY = "output/commercials"
DOCUMENT_DIRECTORY = "documents/commercials"

## Create TwelveLabs Index


### TwelveLabs Client


In [None]:
import json

from twelvelabs import TwelveLabs
from twelvelabs.models import Video
from twelvelabs.exceptions import NotFoundError

tl_client = TwelveLabs(api_key=TL_API_KEY)

### Create TwelveLabs Index


In [None]:
def create_index(index_name: str) -> str:
    """Create a new index for embeddings if it doesn't already exist.

    Args:
        index_name (str): The name of the index to create.

    Returns:
        str: The ID of the created index.
    """
    # Check if the index already exists
    index_list = tl_client.index.list(
        name=index_name,
        sort_option="asc",
        page_limit=1,
    )

    # If the index exists, return its ID
    if index_list:
        for index in index_list:
            print(f"Index '{index.name}' already exists.")
            return index.id

    # If the index does not exist, create a new one
    print(f"Creating index '{index_name}'...")
    models = [
        {"name": "marengo2.7", "options": ["visual", "audio"]},
        {"name": "pegasus1.2", "options": ["visual", "audio"]},
    ]

    created_index = tl_client.index.create(
        name=index_name, models=models, addons=["thumbnail"]
    )

    return created_index.id


tl_index_id = create_index(INDEX_NAME)
print(f"Index ID: {tl_index_id}")

## Upload Videos to Index


In [None]:
def upload_video(tl_index_id: str, video_path: str) -> None:
    """Upload a video to the TwelveLabs index.

    Args:
        tl_index_id (str): The ID of the TwelveLabs index.
        video_path (str): The path to the video file to upload.

    Returns:
        None
    """
    try:
        task = tl_client.task.create(index_id=tl_index_id, file=video_path)
        print(f"Task id={task.id}")
        print(f"Video '{video_path}' uploaded successfully!")
    except Exception as ex:
        print(f"Failed to upload video '{video_path}': {ex}")


if not os.path.exists(VIDEO_DIRECTORY):
    print(f"Video directory '{VIDEO_DIRECTORY}' does not exist, skipping upload.")
else:
    for video in os.listdir(VIDEO_DIRECTORY):
        if video.endswith(".mp4"):
            video_path = os.path.join(VIDEO_DIRECTORY, video)
            upload_video(tl_index_id, video_path)

## Retrieve Embeddings and Analyses from TwelveLabs Index


### Retrieve List of Videos IDs from Index


In [None]:
def get_video_ids_from_index(tl_index_id: str, page_limit: int = 50) -> list:
    """Retrieve video IDs from the specified index.

    Args:
        tl_index_id (str): The ID of the index to query.
        page_limit (int): The maximum number of results to return.

    Returns:
        list: A list of video IDs retrieved from the index.
    """
    videos = tl_client.index.video.list(
        index_id=tl_index_id,
        page_limit=page_limit,
    )
    if not videos:
        raise NotFoundError(f"No videos found in index {tl_index_id}")

    video_ids = list(set(video.id for video in videos))
    print(f"Total count of videos in index {tl_index_id}: {len(video_ids)}")
    return video_ids

### Bulk Retrieve Embeddings from Index


In [None]:
def save_embeddings_to_json(video: Video, output_path: str) -> None:
    """Save the embedding task details to a JSON file if it doesn't already exist.

    Args:
        video (Video): The video object containing embedding details.
        output_path (str): The path where the JSON file will be saved.

    Returns:
        None
    """
    # Serialize the video object to a dictionary
    video_data = video.model_dump_json()
    video_data = json.loads(video_data)
    video_data["video_id"] = video.id

    # Determine the filename using the input filename from the task metadata
    input_filename = video_data["video_id"]
    output_filename = f"{output_path}/{input_filename}_embeddings.json"
    if os.path.exists(output_filename):
        print(f"Embeddings already exist for video ID {video.id}. Skipping...")
        return

    print(f"Saving embeddings for video ID {video.id} to {output_filename}")

    # Write the dictionary to a JSON file
    with open(output_filename, "w") as json_file:
        json.dump(video_data, json_file, indent=4)
    print(f"Embeddings saved to {output_filename}")


# Retrieve the video IDs from the index
video_ids = get_video_ids_from_index(tl_index_id)

# Retrieve the video embeddings from the index and save to JSON
if not os.path.exists(OUTPUT_DIRECTORY):
    print(f"Output directory '{OUTPUT_DIRECTORY}' does not exist, skipping retrieval.")
else:
    for video_id in video_ids:
        print(f"Processing video ID: {video_id}")
        video = tl_client.index.video.retrieve(
            index_id=tl_index_id, id=video_id, embedding_option=["visual-text", "audio"]
        )
        save_embeddings_to_json(video, OUTPUT_DIRECTORY)

### Bulk Create Analyses from Videos in Index


In [None]:
def summarize_video(tl_index_id: str, video_id: str, output_path: str) -> None:
    """Summarize a video and save the analysis to a JSON file if it doesn't already exist.

    Args:
        tl_index_id (str): The ID of the index where the video is stored.
        video_id (str): The ID of the video to summarize.
        output_path (str): The path where the JSON file will be saved.

    Returns:
        None
    """
    # Ensure the output directory exists
    if not os.path.exists(output_path):
        os.makedirs(output_path, exist_ok=False)

    # Check if the analysis already exists
    filename = f"{output_path}/{video_id}_analysis.json"
    print(video_id)
    if os.path.exists(filename):
        print(f"Analysis already exists for video ID {video_id}. Skipping...")
        return
    print(f"Analyzing video ID: {video_id}")

    # Get the video summary
    res_summary = tl_client.summarize(
        video_id=video_id,
        prompt="Summarize the video in a concise manner.",
        temperature=0.4,
        type="summary",
    )

    # Get the chapters of the video
    res_chapters = tl_client.summarize(
        video_id=video_id,
        prompt="List the chapters of the video.",
        temperature=0.4,
        type="chapter",
    )

    # Get the highlights of the video
    res_highlights = tl_client.summarize(
        video_id=video_id,
        prompt="List the highlights of the video.",
        temperature=0.4,
        type="highlight",
    )

    # Get open-ended text analysis of the video
    res_analyze = tl_client.analyze(
        video_id=video_id,
        prompt="Describe what is happening in the video.",
        temperature=0.4,
    )

    # Get the gist of the video
    res_gist = tl_client.gist(video_id=video_id, types=["title", "topic", "hashtag"])

    # Combined responses
    analyses = {}

    analyses.update(
        {
            "gist": res_gist.model_dump(),
            "video_id": video_id,
            "index_id": tl_index_id,
            "summary": res_summary.summary,
            "analysis": res_analyze.data,
            "chapters": res_chapters.chapters.model_dump(),
            "highlights": res_highlights.highlights.model_dump(),
        }
    )

    # Save to file
    with open(filename, "w") as f:
        f.write(json.dumps(analyses))


# Retrieve the video IDs from the index
video_ids = get_video_ids_from_index(tl_index_id)

# Retrieve the video analysis from the index and save to JSON
if not os.path.exists(OUTPUT_DIRECTORY):
    print(f"Output directory '{OUTPUT_DIRECTORY}' does not exist, skipping analysis.")
else:
    for video_id in video_ids:
        print(f"Processing video ID: {video_id}")
        summarize_video(tl_index_id, video_id, OUTPUT_DIRECTORY)

### Merge Embeddings and Analyses


In [None]:
def extract_video_ids(output_path: str) -> list:
    """Extract video IDs from analysis filenames in the specified directory.

    Args:
        output_path (str): Directory containing the analysis JSON files

    Returns:
        list: List of extracted video IDs
    """
    video_ids = []

    for filename in os.listdir(output_path):
        # Check if it's an analysis file
        if filename.endswith("_analysis.json"):
            # Extract the ID part from the filename
            # The ID is everything before "_analysis.json"
            video_id = filename.split("_analysis.json")[0]
            video_ids.append(video_id)

    return video_ids


if not os.path.exists(OUTPUT_DIRECTORY):
    print(f"Output directory '{OUTPUT_DIRECTORY}' does not exist, skipping extraction.")
else:
    video_ids = extract_video_ids(OUTPUT_DIRECTORY)
    print(f"Found {len(video_ids)} video IDs: {video_ids[0:5]}")

In [None]:
def combine_segments_to_documents(
    output_path: str, document_path: str, video_ids: list
) -> None:
    """Combine embeddings and analyses into single documents and save them to a local directory.

    Args:
        output_path (str): Directory containing the analysis and embeddings JSON files
        document_path (str): Directory to save the combined document files
        video_ids (list): List of video IDs to process

    Returns:
        None
    """
    for video_id in video_ids:
        filename = f"{document_path}/{video_id}_document.json"
        # Check if the document already exists
        if os.path.exists(filename):
            print(f"Document already exists for video ID {video_id}. Skipping...")
            continue

        print(f"Processing video ID: {video_id}")
        # Open corresponding analyses and embeddings documents and combined
        with open(f"{output_path}/{video_id}_embeddings.json", "r") as f:
            embeddings = json.load(f)

        with open(f"{output_path}/{video_id}_analysis.json", "r") as f:
            analyses = json.load(f)

        # Combine the two documents
        document = {}
        document.update(analyses)
        document.update(embeddings)

        # Remove unneeded keys
        document["gist"].pop("id", None)
        document["gist"].pop("usage", None)

        # Segments of video
        segments = document["embedding"]["video_embedding"]["segments"]

        # Write documents to local directory for each segment
        document.pop("embedding", None)
        document["segments"] = segments
        for segment in document["segments"]:
            segment["segment_embedding"] = segment["embeddings_float"].copy()
            segment.pop("embeddings_float", None)

        with open(filename, "w") as f:
            f.write(json.dumps(document, indent=4))


if not os.path.exists(DOCUMENT_DIRECTORY):
    print(
        f"Document directory '{DOCUMENT_DIRECTORY}' does not exist, skipping document creation."
    )
elif not os.path.exists(OUTPUT_DIRECTORY):
    print(
        f"Output directory '{OUTPUT_DIRECTORY}' does not exist, skipping document creation."
    )
else:
    combine_segments_to_documents(OUTPUT_DIRECTORY, DOCUMENT_DIRECTORY, video_ids)

## OpenSearch


### Load Required Packages


In [None]:
import boto3

from opensearchpy import (
    AWSV4SignerAuth,
    NotFoundError,
    OpenSearch,
    RequestsHttpConnection,
)

### Option #1: Amazon OpenSearch Client


In [None]:
# Create OpenSearch client for Amazon OpenSearch Serverless
# https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-amazon-opensearch-serverless

service = "aoss"
credentials = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    aws_session_token=AWS_SESSION_TOKEN,
    region_name=AWS_REGION,
).get_credentials()
auth = AWSV4SignerAuth(credentials, AWS_REGION, service)

os_client = OpenSearch(
    hosts=[{"host": OPENSEARCH_ENDPOINT, "port": 443}],
    http_auth=auth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    pool_maxsize=20,
)

os_client

### Option #2: OpenSearch Client Running in Docker

Recommended for local development and debugging purposes only as an alternative to Amazon OpenSearch Serverless.


In [None]:
import warnings

# Suppress security warnings related to unverified HTTPS requests and SSL connections
warnings.filterwarnings("ignore", message="Unverified HTTPS request")
warnings.filterwarnings(
    "ignore", message="Connecting to https://localhost:9200 using SSL"
)

os_client = OpenSearch(
    hosts=[{"host": OPENSEARCH_ENDPOINT, "port": 9200}],
    http_auth=("admin", "OpenSearch123"),
    use_ssl=True,
    verify_certs=False,
)

os_client

### Create New OpenSearch Vector Index


#### Optionally: Delete Existing Index


In [None]:
def delete_index(os_client, os_index: str) -> None:
    """Delete an index in OpenSearch.

    Args:
        os_client (OpenSearch): The OpenSearch client instance.
        os_index (str): The name of the index to delete.

    Returns:
        None
    """
    if not os_client.indices.exists(index=os_index):
        print(f"Index '{os_index}' does not exist.")
    else:
        os_client.indices.delete(index=os_index)
        print(f"Index '{os_index}' deleted successfully.")


# Delete the OpenSearch index for video embeddings
delete_index(os_client, INDEX_NAME)

#### Create New Index


In [None]:
# https://docs.opensearch.org/docs/latest/vector-search/specialized-operations/nested-search-knn/


def create_index(os_client, os_index: str) -> None:
    """Create an index in OpenSearch with specified settings and mappings.

    Args:
        os_client (OpenSearch): The OpenSearch client instance.
        os_index (str): The name of the index to create.

    Returns:
        None
    """
    if os_client.indices.exists(index=os_index):
        print(f"Index '{os_index}' already exists.")
        return

    index_body = {
        "settings": {
            "index": {
                "knn": True,
                "number_of_shards": 2,
            }
        },
        "mappings": {
            "properties": {
                "segments": {
                    "type": "nested",
                    "properties": {
                        "segment_embedding": {
                            "type": "knn_vector",
                            "dimension": 1024,
                            "method": {
                                "engine": "faiss",
                                "name": "hnsw",
                                "space_type": "cosinesimil",  # Use l2 for Amazon OpenSearch Serverless
                            },
                        }
                    },
                }
            }
        },
    }

    # Check if the index already exists
    if os_client.indices.exists(index=os_index):
        print(f"Index '{os_index}' already exists.")
    else:
        os_client.indices.create(index=os_index, body=index_body)
        print(f"Index '{os_index}' created successfully.")


# Create the OpenSearch index for video embeddings
create_index(os_client, INDEX_NAME)

#### Retrieve Information About OpenSearch Index


In [None]:
try:
    response = os_client.indices.get(index=INDEX_NAME)
    print(json.dumps(response, indent=4))
except NotFoundError as ex:
    print(f"Index not found: {ex}")
except Exception as ex:
    print(ex.error)

### Bulk Index OpenSearch Documents


In [None]:
def load_and_index_documents(os_index: str, document_path: str) -> None:
    """Load documents from JSON files in the specified directory and index them in OpenSearch.

    Args:
        os_index (str): The name of the OpenSearch index to create or use.
        document_path (str): Directory containing the document JSON files

    Returns:
        None
    """
    payload = ""
    put_command = f'{{ "create": {{ "_index": "{os_index}" }} }}\n'

    for file in os.listdir(document_path):
        if file.endswith("_document.json"):
            with open(os.path.join(document_path, file), "r") as f:
                tmp = json.load(f)
                payload += f"{put_command}{json.dumps(tmp)}\n"
    try:
        response = os_client.bulk(
            index=os_index,
            body=payload,
        )
        print(json.dumps(response, indent=4))
        row_count = int(len(payload.splitlines()) / 2)
        return row_count
    except Exception as ex:
        print(f"Error indexing documents: {ex}")
        return 0


if not os.path.exists(DOCUMENT_DIRECTORY):
    print(
        f"Document directory '{DOCUMENT_DIRECTORY}' does not exist, skipping indexing."
    )
else:
    row_count = load_and_index_documents(INDEX_NAME, DOCUMENT_DIRECTORY)
    print(f"Total rows to index: {row_count}")

In [None]:
from time import sleep

# Wait for Amazon OpenSearch Serverless indexing to complete and refresh (~60s)
response = os_client.count(index=INDEX_NAME)
while response["count"] != row_count:
    response = os_client.count(index=INDEX_NAME)
    print(f"Current indexed documents: {response['count']}")
    sleep(10)
print(f"Indexing completed. Total indexed documents: {response['count']}")

## Query the Amazon OpenSearch Index


### Convert User Text Query to Embedding


In [None]:
def get_text_embedding_from_query(query: str) -> list:
    """Convert a text query to an embedding using TwelveLabs.

    Args:
        query (str): The text query to convert.

    Returns:
        list: The embedding vector.
    """
    response = tl_client.embed.create(
        model_name="Marengo-retrieval-2.7",
        text_truncate="start",
        text=query,
    )
    # print(response)
    if (
        response.text_embedding is not None
        and response.text_embedding.segments is not None
    ):
        return response.text_embedding.segments[0].embeddings_float
    else:
        raise ValueError("Failed to retrieve embedding from the response.")


query = "elderly drivers"  # "who charges me less but gives me more"  # "boom boom boom to the baseline" # "switch to a turbo tax live expert"
text_embedding = get_text_embedding_from_query(query)
print(f"Embedding: {text_embedding[:5]}...")  # Print first 5 elements for brevity

# Optionally save the text embedding to a JSON file for later use
with open("text_embedding.json", "w") as f:
    json.dump(text_embedding, f, indent=4)

### Convert Sample Image to Embedding


In [None]:
def get_image_embedding_from_query(image_file: str) -> list:
    """Convert an image file to an embedding using TwelveLabs.

    Args:
        query (str): The text query to convert.

    Returns:
        list: The embedding vector.
    """
    response = tl_client.embed.create(
        model_name="Marengo-retrieval-2.7",
        image_file=image_file,
    )
    print(response)
    if (
        response.image_embedding is not None
        and response.image_embedding.segments is not None
    ):
        return response.image_embedding.segments[0].embeddings_float
    else:
        raise ValueError("Failed to retrieve embedding from the response.")


image_embedding = get_image_embedding_from_query(
    "sample_images/nike_dream_crazier_clip.png"
)
print(f"Embedding: {image_embedding[:5]}...")  # Print first 5 elements for brevity

# Optionally save the image embedding to a JSON file for later use
with open("image_embedding.json", "w") as f:
    json.dump(image_embedding, f, indent=4)

In [None]:
# Optional: Reload the embeddings from JSON files instead of calling the API repeatedly.
# This can be useful for offline use or debugging and testing purposes.

text_embedding = json.load(open("text_embedding.json", "r"))
image_embedding = json.load(open("image_embedding.json", "r"))

### Nested k-NN Semantic Search (Approximate k-NN Search (ANN))


In [None]:
# Reference: https://docs.opensearch.org/docs/latest/vector-search/vector-search-techniques/approximate-knn/#get-started-with-approximate-k-nn


def semantic_search(os_index: str, embedding: list) -> dict:
    """Query the OpenSearch index using a text embedding.

    Args:
        os_index (str): The ID of the Amazon OpenSearch index.
        embedding (list): The embedding vector to use for the query.

    Returns:
        dict: The search response from OpenSearch.
    """
    query = {
        "query": {
            "nested": {
                "path": "segments",
                "query": {
                    "knn": {
                        "segments.segment_embedding": {
                            "vector": embedding,
                            "k": 6,
                        }
                    }
                },
            }
        },
        "size": 6,
        "_source": {"excludes": ["segments.segment_embedding"]},
    }

    try:
        search_results = os_client.search(body=query, index=os_index)
        return search_results
    except Exception as ex:
        print(f"Error querying index: {ex}")
        return {}


# Query the index with the embedding
search_results_1 = semantic_search(INDEX_NAME, text_embedding)

for hit in search_results_1["hits"]["hits"]:
    print(f"Video ID: {hit['_source']['video_id']}")
    print(f"Title: {hit['_source']['gist']['title']}")
    print(f"Score: {hit['_score']}")
    print(f"Duration: {hit['_source']['system_metadata']['duration']:.2f} seconds")
    print("\r")

### Nested k-NN Semantic Search with Filters


In [None]:
# Reference: https://docs.opensearch.org/docs/latest/vector-search/filter-search-knn/efficient-knn-filtering/


def semantic_search_with_filter(os_index: str, embedding: list) -> dict:
    """Query the OpenSearch index using a text embedding with a filter on segment duration.

    Args:
        os_index (str): The ID of the Amazon OpenSearch index.
        embedding (list): The embedding vector to use for the query.

    Returns:
        dict: The search response from OpenSearch.
    """
    query = {
        "query": {
            "nested": {
                "path": "segments",
                "query": {
                    "knn": {
                        "segments.segment_embedding": {
                            "vector": embedding,
                            "k": 6,
                            "filter": {
                                "bool": {
                                    "must": [
                                        {
                                            "range": {
                                                "system_metadata.duration": {
                                                    "gte": 20,
                                                    "lte": 60,
                                                }
                                            }
                                        },
                                    ]
                                }
                            },
                        }
                    }
                },
            }
        },
        "size": 6,
        "_source": {"excludes": ["segments.segment_embedding"]},
    }

    try:
        search_results = os_client.search(body=query, index=os_index)
        return search_results
    except Exception as ex:
        print(f"Error querying index: {ex}")
        return {}


# Query the index with the embedding
search_results_2 = semantic_search_with_filter(INDEX_NAME, text_embedding)

for hit in search_results_2["hits"]["hits"]:
    print(f"Video ID: {hit['_source']['video_id']}")
    print(f"Title: {hit['_source']['gist']['title']}")
    print(f"Score: {hit['_score']}")
    print(f"Duration: {hit['_source']['system_metadata']['duration']:.2f} seconds")
    print("\r")

### Nested k-NN Semantic Search with Inner Hits

Include information about the matching nested fields in the response.


In [None]:
# Reference: https://docs.opensearch.org/docs/latest/vector-search/specialized-operations/nested-search-knn/#inner-hits


def semantic_search_inner_hits(os_index: str, embedding: list) -> dict:
    """Query the OpenSearch index using a text embedding with inner hits to retrieve nested segments.

    Args:
        os_index (str): The ID of the Amazon OpenSearch index.
        embedding (list): The embedding vector to use for the query.

    Returns:
        dict: The search response from OpenSearch.
    """
    query = {
        "query": {
            "nested": {
                "path": "segments",
                "query": {
                    "knn": {
                        "segments.segment_embedding": {
                            "vector": embedding,
                            "k": 6,
                        }
                    }
                },
                "inner_hits": {
                    "_source": False,
                    "fields": [
                        "segments.start_offset_sec",
                        "segments.end_offset_sec",
                        "segments.embedding_option",
                    ],
                },
            }
        },
        "size": 6,
        "_source": {"excludes": ["segments.segment_embedding"]},
    }

    try:
        search_results = os_client.search(body=query, index=os_index)
        return search_results
    except Exception as ex:
        print(f"Error querying index: {ex}")
        return {}


# Query the index with the embedding
search_results_3 = semantic_search_inner_hits(INDEX_NAME, text_embedding)

for hit in search_results_3["hits"]["hits"]:
    print(f"Video ID: {hit['_source']['video_id']}")
    print(f"Title: {hit['_source']['gist']['title']}")
    print(f"Score: {hit['_score']}")
    print(f"Duration: {hit['_source']['system_metadata']['duration']:.2f} seconds")
    print("Matching Segment:")
    for segment in hit["inner_hits"]["segments"]["hits"]["hits"]:
        print(f"  Segment: {segment['_nested']['offset']}")
        print(f"    Score: {segment['_score']}")
        print(
            f"    Embedding type: {segment['fields']['segments.embedding_option'][0]}"
        )
        print(f"    Start: {segment['fields']['segments.start_offset_sec'][0]} seconds")
        print(f"    End: {segment['fields']['segments.end_offset_sec'][0]} seconds")
    print("\r")

### Nested k-NN Semantic Search with all Nested Hits

To retrieve the scores for all nested field documents within each parent document. By default, only the highest-scoring nested document is considered when you query nested fields.

_Note that as of 2025-06-28, although Amazon OpenSearch Serverless claims that it supports version 2.19, the `expand_nested_docs` is not available (error: `Error querying index: RequestError(400, 'x_content_parse_exception', '[1:12885] [knn] unknown field [expand_nested_docs]')`). The below search was performed in OpenSearch using Docker._


In [None]:
# Reference: https://docs.opensearch.org/docs/latest/vector-search/specialized-operations/nested-search-knn/#retrieving-all-nested-hits


def semantic_search_all_inner_hits(os_index: str, embedding: list) -> dict:
    """Query the OpenSearch index using a text embedding with inner hits to retrieve all matching nested segments.

    Args:
        os_index (str): The ID of the Amazon OpenSearch index.
        embedding (list): The embedding vector to use for the query.

    Returns:
        dict: The search response from OpenSearch.
    """
    query = {
        "query": {
            "nested": {
                "path": "segments",
                "query": {
                    "knn": {
                        "segments.segment_embedding": {
                            "vector": embedding,
                            "k": 6,
                            "expand_nested_docs": True,
                            "rescore": True,
                        }
                    }
                },
                "inner_hits": {
                    "_source": False,
                    "fields": [
                        "segments.start_offset_sec",
                        "segments.end_offset_sec",
                        "segments.embedding_option",
                        "segments.segment_embedding",
                    ],
                    "size": 3,
                },
                "score_mode": "max",
            }
        },
        "size": 6,
        "_source": {"excludes": ["segments.segment_embedding"]},
    }

    try:
        search_results = os_client.search(body=query, index=os_index)
        return search_results
    except Exception as ex:
        print(f"Error querying index: {ex}")
        return {}


# Query the index with the embedding
search_results_4 = semantic_search_all_inner_hits(INDEX_NAME, text_embedding)
for hit in search_results_4["hits"]["hits"]:
    print(f"Video ID: {hit['_source']['video_id']}")
    print(f"Title: {hit['_source']['gist']['title']}")
    print(f"Score: {hit['_score']}")
    print(f"Duration: {hit['_source']['system_metadata']['duration']:.2f} seconds")
    print("Matching Segment(s):")
    for segment in hit["inner_hits"]["segments"]["hits"]["hits"]:
        print(f"  Segment: {segment['_nested']['offset']}")
        print(f"    Score: {segment['_score']}")
        print(
            f"    Embedding type: {segment['fields']['segments.embedding_option'][0]}"
        )
        print(f"    Start: {segment['fields']['segments.start_offset_sec'][0]} seconds")
        print(f"    End: {segment['fields']['segments.end_offset_sec'][0]} seconds")
    print("\r")

### Nested k-NN Semantic Search with all Nested Hits, with Filtering on Nested Fields


In [None]:
# Reference: https://docs.opensearch.org/docs/latest/vector-search/specialized-operations/nested-search-knn/#retrieving-all-nested-hits


def semantic_search_all_inner_hits(os_index: str, embedding: list) -> dict:
    """Query the OpenSearch index using a text embedding with inner hits to retrieve all matching nested segments.

    Args:
        os_index (str): The ID of the Amazon OpenSearch index.
        embedding (list): The embedding vector to use for the query.

    Returns:
        dict: The search response from OpenSearch.
    """
    query = {
        "query": {
            "nested": {
                "path": "segments",
                "query": {
                    "knn": {
                        "segments.segment_embedding": {
                            "vector": embedding,
                            "k": 3,
                            "expand_nested_docs": True,
                            "filter": {"term": {"segments.embedding_option": "audio"}},
                            "rescore": True,
                        }
                    }
                },
                "inner_hits": {
                    "_source": False,
                    "fields": [
                        "segments.start_offset_sec",
                        "segments.end_offset_sec",
                        "segments.embedding_option",
                    ],
                },
                "score_mode": "max",
            }
        },
        "size": 3,
        "_source": {"excludes": ["segments.segment_embedding"]},
    }

    try:
        search_results = os_client.search(body=query, index=os_index)
        return search_results
    except Exception as ex:
        print(f"Error querying index: {ex}")
        return {}


# Query the index with the embedding
search_results_5 = semantic_search_all_inner_hits(INDEX_NAME, text_embedding)
for hit in search_results_5["hits"]["hits"]:
    print(f"Video ID: {hit['_source']['video_id']}")
    print(f"Title: {hit['_source']['gist']['title']}")
    print(f"Score: {hit['_score']}")
    print(f"Duration: {hit['_source']['system_metadata']['duration']:.2f} seconds")
    print("Matching Segment(s):")
    for segment in hit["inner_hits"]["segments"]["hits"]["hits"]:
        print(f"  Segment: {segment['_nested']['offset']}")
        print(f"    Score: {segment['_score']}")
        print(
            f"    Embedding type: {segment['fields']['segments.embedding_option'][0]}"
        )
        print(f"    Start: {segment['fields']['segments.start_offset_sec'][0]} seconds")
        print(f"    End: {segment['fields']['segments.end_offset_sec'][0]} seconds")
    print("\r")

### Radial Search

Search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point (squared Euclidean distance).


In [None]:
# Reference: https://docs.opensearch.org/docs/latest/vector-search/specialized-operations/radial-search-knn/


def radial_search(os_index: str, embedding: list) -> dict:
    """Query the OpenSearch index using a text embedding with radial search to find segments within a certain distance.

    Args:
        os_index (str): The ID of the Amazon OpenSearch index.
        embedding (list): The embedding vector to use for the query.

    Returns:
        dict: The search response from OpenSearch.
    """
    query = {
        "query": {
            "nested": {
                "path": "segments",
                "query": {
                    "knn": {
                        "segments.segment_embedding": {
                            "vector": embedding,
                            "max_distance": 1,
                        }
                    }
                },
            }
        },
        "size": 6,
        "_source": {"excludes": ["segments.segment_embedding"]},
    }

    try:
        search_results = os_client.search(body=query, index=os_index)
        return search_results
    except Exception as ex:
        print(f"Error querying index: {ex}")
        return {}


# Query the index with the embedding
search_results_6 = semantic_search(INDEX_NAME, text_embedding)

for hit in search_results_6["hits"]["hits"]:
    print(f"Video ID: {hit['_source']['video_id']}")
    print(f"Title: {hit['_source']['gist']['title']}")
    print(f"Score: {hit['_score']}")
    print(f"Duration: {hit['_source']['system_metadata']['duration']:.2f} seconds")
    print("\r")

## Displaying Previews of Search Results


### Visual Grid of Results


In [None]:
from matplotlib import pyplot as plt
from PIL import Image
from urllib import request
import io


def load_image_from_url(url: str) -> Image.Image:
    """Load an image from a URL.

    Args:
        url (str): The URL of the image to load.

    Returns:
        PIL.Image.Image: The loaded image.
    """
    try:
        with request.urlopen(url) as response:
            image_data = response.read()
            image = Image.open(io.BytesIO(image_data))
            return image
    except Exception as e:
        print(f"Error loading video thumbnail from URL: {e}")
        return None


index = 1
rows = 3
columns = 3

fig = plt.figure(figsize=(10, 7))

for hit in search_results_1["hits"]["hits"]:
    fig.set_dpi(300)
    fig.add_subplot(rows, columns, index)
    image_url = hit["_source"]["hls"]["thumbnail_urls"][0]
    image = load_image_from_url(image_url)
    plt.axis("off")
    plt.imshow(image)
    plt.title(
        f'Video: {hit["_source"]["system_metadata"]["filename"][0:40]}\nScore: {hit["_score"]}',
        fontdict=dict(family="Arial", size=8),
        color="black",
    )
    index += 1

### 2D/3D Visualizations Using t-SNE

t-SNE (t-distributed Stochastic Neighbor Embedding) is a popular technique for reducing high-dimensional data, such as embeddings, to 2 or 3 dimensions for visualization or further analysis.


In [None]:
def semantic_search_t_sne(os_index: str, embedding: list) -> dict:
    """Query the OpenSearch index using a text embedding.

    Args:
        os_index (str): The ID of the Amazon OpenSearch index.
        embedding (list): The embedding vector to use for the query.

    Returns:
        dict: The search response from OpenSearch including embeddings.
    """
    query = {
        "query": {
            "nested": {
                "path": "segments",
                "query": {
                    "knn": {
                        "segments.segment_embedding": {
                            "vector": embedding,
                            "k": 9,
                        }
                    }
                },
                "inner_hits": {
                    "_source": False,
                    "fields": [
                        "segments.start_offset_sec",
                        "segments.end_offset_sec",
                        "segments.embedding_option",
                    ],
                },
            }
        },
        "size": 9,
    }

    try:
        search_results = os_client.search(body=query, index=os_index)
        return search_results
    except Exception as ex:
        print(f"Error querying index: {ex}")
        return {}


# Query the index with the embedding
search_results_7 = semantic_search_t_sne(INDEX_NAME, text_embedding)

In [None]:
# Extract embeddings and video names from the search results
results = []

for hit in search_results_7["hits"]["hits"]:
    results.append(
        [
            hit["_source"]["segments"][0]["segment_embedding"],
            hit["_source"]["system_metadata"]["filename"],
        ]
    )

results.append([text_embedding, "User query"])

#### 2D Visualization Using t-SNE


In [None]:
from sklearn.manifold import TSNE
import numpy as np

# Initialize t-SNE (n_components=2 for 2D reduction)
tsne = TSNE(n_components=2, random_state=42, perplexity=5)

# Extract embeddings and apply t-SNE
embeddings = np.array([res[0] for res in results])
embeddings = tsne.fit_transform(embeddings)

# Combine the reduced embeddings with their corresponding video names
vis_dims_2d = list(
    map(
        lambda x: [x[0][0], x[0][1], x[1]], zip(embeddings, [res[1] for res in results])
    )
)
print(vis_dims_2d[0])

In [None]:
import plotly.graph_objs as go
import numpy as np

fig = go.Figure()

# Search results
for i, video_name in enumerate(vis_dims_2d[0:-1]):
    x = np.array([vis_dims_2d[i][0]])
    y = np.array([vis_dims_2d[i][1]])
    fig.add_trace(
        go.Scatter(
            x=x,
            y=y,
            mode="markers",
            marker=dict(
                size=15,
                colorscale="Viridis",
                opacity=1.0,
                symbol="circle",
            ),
            name=vis_dims_2d[i][2][0:25],
        )
    )

    # User query
    x = np.array([vis_dims_2d[-1][0]])
    y = np.array([vis_dims_2d[-1][1]])

fig.add_trace(
    go.Scatter(
        x=x,
        y=y,
        mode="text+markers",
        marker=dict(
            size=15,
            color="black",
            colorscale="Viridis",
            opacity=1.0,
            symbol="square",
        ),
        name=vis_dims_2d[-1][2][0:25],
        text=vis_dims_2d[-1][2],
        textposition="bottom center",
        showlegend=False,
    )
)

fig.update_layout(
    autosize=True,
    width=900,
    height=600,
    font=dict(size=12, color="black", family="Arial, sans-serif"),
    title="Commercials Search Results using t-SNE",
    margin=dict(l=30, r=30, b=30, t=60, pad=10),
    xaxis=dict(title="x"),
    yaxis=dict(title="y"),
    legend=dict(title="   Search Results"),
)
fig.show()

#### 3D Visualization Using t-SNE


In [None]:
from sklearn.manifold import TSNE
import numpy as np

# Initialize t-SNE (n_components=3 for 3D reduction)
tsne = TSNE(n_components=3, random_state=42, perplexity=5)

# Extract embeddings and apply t-SNE
embeddings = np.array([res[0] for res in results])
embeddings = tsne.fit_transform(embeddings)

# Combine the reduced embeddings with their corresponding video names
vis_dims_3d = list(
    map(
        lambda x: [x[0][0], x[0][1], x[0][2], x[1]],
        zip(embeddings, [res[1] for res in results]),
    )
)
print(vis_dims_3d[0])

In [None]:
import plotly.graph_objs as go
import numpy as np

fig = go.Figure()

# Results
for i, video_name in enumerate(vis_dims_3d[0:-1]):
    x = np.array([vis_dims_3d[i][0]])
    y = np.array([vis_dims_3d[i][1]])
    z = np.array([vis_dims_3d[i][2]])

    fig.add_trace(
        go.Scatter3d(
            x=x,
            y=y,
            z=z,
            mode="markers",
            marker=dict(size=7, colorscale="Viridis", opacity=1.0, symbol="circle"),
            name=vis_dims_3d[i][3][0:25],
            text=vis_dims_3d[i][3][0:25],
            textposition="top center",
        )
    )

    # User query
    x = np.array([vis_dims_3d[-1][0]])
    y = np.array([vis_dims_3d[-1][1]])
    z = np.array([vis_dims_3d[-1][2]])

fig.add_trace(
    go.Scatter3d(
        x=x,
        y=y,
        z=z,
        mode="markers",
        marker=dict(
            size=7, color="black", colorscale="Viridis", opacity=1.0, symbol="square"
        ),
        name=vis_dims_3d[-1][3],
        text=vis_dims_3d[-1][3],
        textposition="top center",
        showlegend=False,
    )
)

x_eye = -1.25
y_eye = 1.5
z_eye = 0.5

fig.update_layout(
    # autosize=True,
    width=900,
    height=600,
    font=dict(size=12, color="black", family="Arial, sans-serif"),
    title="Commercials Search Results using t-SNE",
    margin=dict(l=30, r=30, b=30, t=50, pad=10),
    scene=dict(
        xaxis=dict(title="z"),
        yaxis=dict(title="x"),
        zaxis=dict(title="y"),
    ),
    scene_camera_eye=dict(x=x_eye, y=y_eye, z=z_eye),
    updatemenus=[
        dict(
            type="buttons",
            showactive=True,
            y=0.9,
            x=0.9,
            xanchor="left",
            yanchor="bottom",
            pad=dict(t=10, r=10),
            buttons=[
                dict(
                    label="Play",
                    method="animate",
                    args=[
                        None,
                        dict(
                            frame=dict(duration=15, redraw=True),
                            transition=dict(duration=1),
                            fromcurrent=True,
                            mode="immediate",
                        ),
                    ],
                )
            ],
        )
    ],
    legend=dict(
        title="   Search Results",
    ),
)


def rotate_z(x, y, z, theta):
    w = x + 1j * y
    return np.real(np.exp(1j * theta) * w), np.imag(np.exp(1j * theta) * w), z


frames = []
for t in np.arange(0, 10, 0.01):
    xe, ye, ze = rotate_z(x_eye, y_eye, z_eye, -t)
    frames.append(go.Frame(layout=dict(scene_camera_eye=dict(x=xe, y=ye, z=ze))))
fig.frames = frames

fig.show()

### Extracting a List of Segments from Video Search Results

Reorder the search results as a list of segments as opposed to a list of videos.


In [None]:
# Reference: https://docs.opensearch.org/docs/latest/vector-search/specialized-operations/nested-search-knn/#retrieving-all-nested-hits


def semantic_search_all_inner_hits(os_index: str, embedding: list) -> dict:
    """Query the OpenSearch index using a text embedding with inner hits to retrieve all matching nested segments.

    Args:
        os_index (str): The ID of the Amazon OpenSearch index.
        embedding (list): The embedding vector to use for the query.

    Returns:
        dict: The search response from OpenSearch.
    """
    query = {
        "query": {
            "nested": {
                "path": "segments",
                "query": {
                    "knn": {
                        "segments.segment_embedding": {
                            "vector": embedding,
                            "k": 50,
                            "expand_nested_docs": True,
                            "rescore": True,
                        }
                    }
                },
                "inner_hits": {
                    "_source": False,
                    "fields": [
                        "segments.start_offset_sec",
                        "segments.end_offset_sec",
                        "segments.embedding_option",
                        "segments.segment_embedding",
                    ],
                    "size": 50,
                },
                "score_mode": "max",
            }
        },
        "size": 50,
        # "_source": {"excludes": ["segments.segment_embedding"]},
    }

    try:
        search_results = os_client.search(body=query, index=os_index)
        return search_results
    except Exception as ex:
        print(f"Error querying index: {ex}")
        return {}


# Query the index with the embedding
search_results_8 = semantic_search_all_inner_hits(INDEX_NAME, text_embedding)

for hit in search_results_8["hits"]["hits"]:
    print(f"Video ID: {hit['_source']['video_id']}")
    print(f"Title: {hit['_source']['gist']['title']}")
    print(f"Score: {hit['_score']}")
    print(f"Duration: {hit['_source']['system_metadata']['duration']:.2f} seconds")
    print("Matching Segment(s):")
    for segment in hit["inner_hits"]["segments"]["hits"]["hits"]:
        print(f"  Segment: {segment['_nested']['offset']}")
        print(f"    Score: {segment['_score']}")
        print(
            f"    Embedding type: {segment['fields']['segments.embedding_option'][0]}"
        )
        print(f"    Start: {segment['fields']['segments.start_offset_sec'][0]} seconds")
        print(f"    End: {segment['fields']['segments.end_offset_sec'][0]} seconds")
    print("\r")

In [None]:
def extract_segments_sorted_by_score(results: dict) -> list:
    """Extract segments from search results and sort them by score.

    Args:
        results (dict): The search results from the OpenSearch query.

    Returns:
        list: A list of segments sorted by their score in descending order.
    """
    segments = []

    for hit in results["hits"]["hits"]:
        for segment in hit["inner_hits"]["segments"]["hits"]["hits"]:
            segment_score = {}
            segment_score["title"] = hit["_source"]["gist"]["title"]
            segment_score["filename"] = hit["_source"]["system_metadata"]["filename"]
            segment_score["offset"] = segment["_nested"]["offset"]
            segment_score["_score"] = segment["_score"]
            segment_score["embedding_option"] = segment["fields"][
                "segments.embedding_option"
            ][0]
            segment_score["start_offset_sec"] = round(
                segment["fields"]["segments.start_offset_sec"][0], 2
            )
            segment_score["end_offset_sec"] = round(
                segment["fields"]["segments.end_offset_sec"][0], 2
            )
            segment_score["embedding"] = segment["fields"]["segments.segment_embedding"]
            segments.append(segment_score)

    segments = sorted(segments, key=lambda x: x["_score"], reverse=True)
    # print(json.dumps(segments[:3], indent=4))
    return segments

#### Display Top Video Segment from Search Results


In [None]:
from IPython.display import HTML

segments = extract_segments_sorted_by_score(search_results_8)
video_file = segments[0]["filename"]
segment_start = segments[0]["start_offset_sec"]
segment_end = segments[0]["end_offset_sec"]

HTML(
    f"""
    <h2>Segment Details</h2>
    <p>Filename: {video_file}</p>
    <p>Segment start: {segment_start} seconds</p>
    <p>Segment end: {segment_end} seconds</p>
    <video width="600" height="auto" controls>
        <source src="videos//commercials//{video_file}#t={segment_start},{segment_end}" type="video/mp4">
    </video>    
"""
)

#### Display All Segments in 2D Scatter Plot Using PCA

Principal Component Analysis (PCA) is a dimensionality reduction technique used to simplify complex datasets by transforming the original variables into a new set of uncorrelated variables called principal components.


In [None]:
from sklearn.decomposition import PCA
import plotly.express as px

# Extract segments and their embeddings from the search results
segments = extract_segments_sorted_by_score(search_results_8)
embeddings = list(map(lambda segment: segment["embedding"], segments))

# Reduce the dense vector embedding's dimensions from 1,024 to 2 using PCA for visualization
pca = PCA(n_components=2)

# Fit the PCA model to the embeddings and transform them to 2D
vis_dims_2d = pca.fit_transform(embeddings)
print(f"Reduced dimensions shape (2d): {vis_dims_2d.shape}")

# Create a new list of segments with the 2D embeddings
segments = list(
    map(
        lambda segment, vis_dim: {**segment, "embedding": vis_dim},
        segments,
        vis_dims_2d,
    )
)

fig = px.scatter(
    segments,
    x=[segment["embedding"][0] for segment in segments],
    y=[segment["embedding"][1] for segment in segments],
    color=[segment["filename"] for segment in segments],
    hover_name=[segment["filename"] for segment in segments],
    hover_data=["embedding_option", "start_offset_sec", "end_offset_sec", "offset"],
    title="All Commercial Segments using PCA",
    labels={"x": "PCA Dimension 1", "y": "PCA Dimension 2"},
    width=900,
    height=600,
    opacity=0.75,
)

fig.layout.xaxis.scaleanchor = "y"
fig.layout.yaxis.scaleanchor = "x"
fig.layout.xaxis.scaleratio = 1
fig.layout.yaxis.scaleratio = 1
fig.layout.xaxis.dtick = 0.25
fig.layout.yaxis.dtick = 0.25
fig.layout.legend = dict(
    title_text="Embedding Type",
    font=dict(size=10, family="Arial, sans-serif"),
)
fig.layout.title.font = dict(
    size=16,
    family="Arial, sans-serif",
)
fig.layout.showlegend = False

fig.show()

#### Display All Segments in 2D Scatter Plot Using t-SNE

t-SNE (t-distributed Stochastic Neighbor Embedding) is a popular technique for reducing high-dimensional data, such as embeddings, to 2 or 3 dimensions for visualization or further analysis.


In [None]:
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import numpy as np
import plotly.express as px

# Extract segments and their embeddings from the search results
segments = extract_segments_sorted_by_score(search_results_8)
embeddings = list(map(lambda segment: segment["embedding"], segments))
embeddings = np.array(embeddings)

# Initialize t-SNE (n_components=2 for 2D reduction)
tsne = TSNE(n_components=2, random_state=42)

# Fit and transform the embeddings
vis_dims_2d = tsne.fit_transform(embeddings)

# Find clusters using KMeans
kmeans = KMeans(n_clusters=70, random_state=0).fit(vis_dims_2d)
labels = kmeans.labels_

# Create a new list of segments with the 2D embeddings
segments = list(
    map(
        lambda segment, vis_dim: {**segment, "embedding": vis_dim},
        segments,
        vis_dims_2d,
    )
)

fig = px.scatter(
    segments,
    x=[segment["embedding"][0] for segment in segments],
    y=[segment["embedding"][1] for segment in segments],
    color=[segment["filename"][0:10] for segment in segments],
    hover_name=[segment["filename"] for segment in segments],
    hover_data=["embedding_option", "start_offset_sec", "end_offset_sec", "offset"],
    title="All Commercial Segments using t-SNE",
    labels={"x": "t-SNE Dimension 1", "y": "t-SNE Dimension 2"},
    width=900,
    height=600,
    opacity=0.75,
)

fig.layout.xaxis.scaleanchor = "y"
fig.layout.yaxis.scaleanchor = "x"
fig.layout.xaxis.scaleratio = 1
fig.layout.yaxis.scaleratio = 1
fig.layout.xaxis.dtick = 5
fig.layout.yaxis.dtick = 5
fig.layout.legend = dict(
    title_text="Commercial",
    font=dict(size=10, family="Arial, sans-serif"),
)
fig.layout.title.font = dict(
    size=16,
    family="Arial, sans-serif",
)
fig.layout.showlegend = False


# Add circles around clusters
for cluster in np.unique(labels):
    cluster_points = vis_dims_2d[labels == cluster]
    center = cluster_points.mean(axis=0)
    radius = np.linalg.norm(cluster_points - center, axis=1).max()
    # Define circle bounds (Plotly circles use bounding box)
    x0, y0 = center - radius
    x1, y1 = center + radius
    fig.add_shape(
        type="circle",
        xref="x",
        yref="y",
        x0=x0,
        y0=y0,
        x1=x1,
        y1=y1,
        opacity=0.2,
        line=dict(
            color="black",
            width=1,
            dash="dot",  # Optional: change to "solid", "dash", etc. for different styles
        ),
        fillcolor="rgba(0,0,0,0)",
    )

fig.show()