In [None]:
import os
import sys
from clip_embedding import embed_frame,embed_text
import os
import numpy as np
import faiss
import pickle

sys.path.insert(0, "/Users/abali/github projects/semantic-video-retrieval")

In [None]:


from extract_chunks_updated import extract_chunks
# ✅ Set paths
video_folder = "/Users/abali/github projects/semantic-video-retrieval/data/videos"
frames_output_folder = "/Users/abali/github projects/semantic-video-retrieval/data/frames"
# index_path = "/Users/abali/github projects/semantic-video-retrieval/embeddings/faiss_index"
# metadata_path = "/Users/abali/github projects/semantic-video-retrieval/embeddings/metadata"

# ✅ Extract chunks from videos
all_metadata = []
for fname in os.listdir(video_folder):
    if fname.endswith(".mp4"):
        video_path = os.path.join(video_folder, fname)
        print(f"📦 Processing {fname}...")
        chunks = extract_chunks(video_path, frames_output_folder, chunk_duration=15, fps=1)
        all_metadata.extend(chunks)



In [None]:


def get_chunk_embeddings(chunk_folder_root):
    """
    Given a directory of chunks (each containing frames), compute the average embedding
    for each chunk and return a list of {chunk_id, embedding}.
    """
    chunk_embeddings = []

    for chunk_name in sorted(os.listdir(chunk_folder_root)):
        chunk_path = os.path.join(chunk_folder_root, chunk_name)
        if not os.path.isdir(chunk_path):
            continue

        print(f"🔍 Processing {chunk_name}...")
        frame_embeddings = []

        for fname in sorted(os.listdir(chunk_path)):
            if fname.lower().endswith((".jpg", ".jpeg", ".png")):
                image_path = os.path.join(chunk_path, fname)
                emb = embed_frame(image_path)
                if emb is not None and not np.isnan(emb).any():
                    frame_embeddings.append(emb)
        
        if frame_embeddings:
            mean_emb = np.mean(frame_embeddings, axis=0).astype("float32")
            chunk_embeddings.append({
                "chunk_id": chunk_name,
                "embedding": mean_emb
            })
            print(f"✅ Embedded: {chunk_name} ({len(frame_embeddings)} frames)")
        else:
            print(f"⚠️ No valid frames found for {chunk_name}")

    return chunk_embeddings


In [None]:
chunk_embeddings=get_chunk_embeddings('/Users/abali/github projects/semantic-video-retrieval/data/chunks')

In [None]:
chunk_embeddings

In [None]:
import os
import pickle
import numpy as np
import faiss

def store_embeddings_to_faiss(embedding_data, index_path, metadata_path):
    if not embedding_data:
        print("⚠️ No embeddings to store.")
        return

    os.makedirs(os.path.dirname(index_path), exist_ok=True)
    os.makedirs(os.path.dirname(metadata_path), exist_ok=True)

    embeddings = [item["embedding"] for item in embedding_data]
    metadata = [{"chunk_id": item["chunk_id"]} for item in embedding_data]

    arr = np.vstack(embeddings).astype("float32")
    print(f"📐 FAISS index shape: {arr.shape}")

    index = faiss.IndexFlatL2(arr.shape[1])
    index.add(arr)

    faiss.write_index(index, index_path)
    with open(metadata_path, "wb") as f:
        pickle.dump(metadata, f)

    print(f"✅ Stored {len(arr)} vectors in FAISS and metadata")


In [None]:
store_embeddings_to_faiss(chunk_embeddings, '/Users/abali/github projects/semantic-video-retrieval/embeddings/faiss_index/video_chunks.index', '/Users/abali/github projects/semantic-video-retrieval/embeddings/metadata/chunk_metadata.pkl')  

In [None]:
def search_top_chunks(query, index_path, metadata_path, k=5):
    # Load index
    index = faiss.read_index(index_path)

    # Load metadata
    with open(metadata_path, "rb") as f:
        metadata = pickle.load(f)

    # Embed query
    query_embedding = embed_text(query).astype("float32").reshape(1, -1)

    # Search
    D, I = index.search(query_embedding, k)

    # Collect results
    results = []
    for i in I[0]:
        if i < len(metadata):
            results.append(metadata[i])
    
    return results

In [None]:
results = search_top_chunks(
    query="moving cars",
    index_path="/Users/abali/github projects/semantic-video-retrieval/embeddings/faiss_index/video_chunks.index",
    metadata_path="/Users/abali/github projects/semantic-video-retrieval/embeddings/metadata/chunk_metadata.pkl",k=1)

for r in results:
    print(f"🎬 Chunk: {r['chunk_id']}")


In [None]:
import os
import subprocess

# Loop through the search results and play the video
for r in results:
    # Extract the base video file name (e.g., sample1.mp4 from sample1_chunk0)
    video_name = r['chunk_id'].split('_')[0] + '.mp4'
    
    # Construct the full video path
    video_path = os.path.join("/Users/abali/github projects/semantic-video-retrieval/data/videos", video_name)

    print(f"🎬 Chunk: {r['chunk_id']}")

    # Ensure the video exists
    if os.path.exists(video_path):
        print(f"▶️ Opening video {video_name}...")

        # Use ffplay to open the entire video
        subprocess.Popen(["open", video_path])  # macOS-specific command to open video with default player
    else:
        print(f"⚠️ Video not found: {video_path}")
