# LanceDB Debugging and Exploration

In [1]:
import sys
from pathlib import Path

# Get the project root directory
project_root = Path.cwd().parent  # Adjust if your notebook is nested differently
sys.path.append(str(project_root))

In [2]:
import lancedb
from lancedb.table import Table
from lancedb.pydantic import pydantic_to_schema

from models.data_models import VideoData, VideoSegmentData
from models.lancedb_pydantic_models import VideoModel, VideoSegmentModel
from mm_vector_stores.multimodal_lancedb import MultiModalLanceDB
from mm_emeddings.bridgetower_embeddings import BridgeTowerEmbeddings
from utils.logger import logger

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
embedder = BridgeTowerEmbeddings()

In [4]:
db = MultiModalLanceDB(uri="/home/zalasyu/Documents/projects/multimodal_chatbot/data/multimodal_lancedb", embedding=embedder)

In [5]:
db.get_table("Videos")

LanceTable(connection=LanceDBConnection(/home/zalasyu/Documents/projects/multimodal_chatbot/data/multimodal_lancedb), name="Videos")

In [6]:
videos_table = db.get_table("Videos")
video_segments_table = db.get_table("VideoSegments")

In [7]:
videos_table.to_pandas().head()

Unnamed: 0,id,video_url,title,description,summary_abstractive,summary_extractive,language,video_path,audio_path,transcript_path_vtt,transcript_path_text,transcribed,description_path
0,LAzKGkTIKpg,https://www.youtube.com/watch?v=LAzKGkTIKpg,LITERATURE - Voltaire,"Voltaire was one of the wisest, funniest and c...",,François-Marie Arouet was born in 1694 . he de...,en,/home/zalasyu/Documents/projects/multimodal_ch...,/home/zalasyu/Documents/projects/multimodal_ch...,data/raw/transcripts/YouTube/LITERATURE_-_Volt...,data/raw/transcripts/YouTube/LITERATURE_-_Volt...,True,


In [8]:
video_segments_table.head()

pyarrow.Table
id: int64 not null
parent_video_id: string not null
parent_video_path: string not null
parent_audio_path: string not null
parent_vtt_path: string not null
video_segment_path: string not null
video_segment_transcript_path: string not null
frame_path: string not null
transcript: string not null
enriched_transcript: string not null
duration_ms: double not null
start_ms: double not null
mid_ms: double not null
end_ms: double not null
embeddings: fixed_size_list<item: float>[1536] not null
  child 0, item: float
----
id: [[0,1,2,3,4]]
parent_video_id: [["LAzKGkTIKpg","LAzKGkTIKpg","LAzKGkTIKpg","LAzKGkTIKpg","LAzKGkTIKpg"]]
parent_video_path: [["/home/zalasyu/Documents/projects/multimodal_chatbot/data/raw/videos/YouTube/LITERATURE_-_Voltaire_LAzKGkTIKpg.mp4","/home/zalasyu/Documents/projects/multimodal_chatbot/data/raw/videos/YouTube/LITERATURE_-_Voltaire_LAzKGkTIKpg.mp4","/home/zalasyu/Documents/projects/multimodal_chatbot/data/raw/videos/YouTube/LITERATURE_-_Voltaire_LAzKGkT

In [9]:
df = video_segments_table.to_pandas()

In [10]:
df["embeddings"][0]

array([ 0.00195367,  0.03847323, -0.00078782, ..., -0.02516887,
        0.03434976, -0.02883583], dtype=float32)

## Query Testing

In [11]:
query = "Where did Voltaire move to?"
query_embedding = embedder.embed_query(query)

In [12]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 1})

In [13]:
results = retriever.invoke(query)

KeyError: 'Field "metadata" does not exist in schema'