# Setup

In [None]:
metadata_path = None
es_host = 'localhost'
es_port = 9200
index_name = 'video_frames'

In [None]:
! pip install elasticsearch

In [None]:
import os
import json
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

In [None]:
dir_path = os.getcwd()
parent_dir_path = os.path.dirname(dir_path)

if not metadata_path:
    metadata_path = f'{parent_dir_path}/final_metadata.json'

# Load Data

In [None]:
def load_metadata(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

metadata = load_metadata(metadata_path)

# Connect to Elasticsearch

In [None]:
es = Elasticsearch([{'host': es_host, 'port': es_port}])
print(f"Connected to Elasticsearch: {es.info()}")

# Create Index

In [None]:
index_body = {
    "mappings": {
        "properties": {
            "frame_id": {"type": "keyword"},
            "keyframe": {
                "properties": {
                    "shot_index": {"type": "integer"},
                    "frame_index": {"type": "integer"},
                    "shot_start": {"type": "integer"},
                    "shot_end": {"type": "integer"},
                    "timestamp": {"type": "float"},
                    "video_path": {"type": "keyword"},
                    "frame_path": {"type": "keyword"}
                }
            },
            "detection": {
                "properties": {
                    "objects": {"type": "object"},
                    "counts": {"type": "object"}
                }
            },
            "ocr": {"type": "text"},
            "tags": {"type": "keyword"}
        }
    }
}

if es.indices.exists(index=index_name):
    es.indices.delete(index=index_name)

es.indices.create(index=index_name, body=index_body)
print(f"Index '{index_name}' created")

# Index Data

In [None]:
def gen_docs():
    for frame_id, frame_data in metadata.items():
        doc = {
            "_index": index_name,
            "_id": frame_id,
            "_source": {
                "frame_id": frame_id,
                "keyframe": frame_data.get("keyframe", {}),
                "detection": frame_data.get("detection", {}),
                "ocr": frame_data.get("ocr", ""),
                "tags": frame_data.get("tags", [])
            }
        }
        yield doc

success, failed = bulk(es, gen_docs())
print(f"Indexed {success} documents. Failed: {failed}")

# Test Search

In [None]:
def search(query, field):
    body = {
        "query": {
            "match": {
                field: query
            }
        }
    }
    res = es.search(index=index_name, body=body)
    return res['hits']['hits']

# Test OCR search
ocr_results = search("example ocr text", "ocr")
print(f"OCR search results: {len(ocr_results)}")

# Test object detection search
object_results = search("person", "detection.objects.person")
print(f"Object detection search results: {len(object_results)}")

# Test tag search
tag_results = search("example_tag", "tags")
print(f"Tag search results: {len(tag_results)}")