In [None]:
#pip install opensearch-py sentence-transformers tqdm

In [None]:
from opensearchpy import OpenSearch
from sentence_transformers import SentenceTransformer

# ----------------------------
# CONFIG
# ----------------------------
OPENSEARCH_URL = "http://localhost:9200"
INDEX_NAME = "movies_vector"
VECTOR_FIELD = "plot_vector"
TOP_K = 50

# ----------------------------
# Init OpenSearch client
# ----------------------------
client = OpenSearch(OPENSEARCH_URL)

# ----------------------------
# Load embedding model
# ----------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")

# ----------------------------
# Function: vector search
# ----------------------------
def vector_search(query_text, k=TOP_K):
    # 1. Generate query vector
    query_vector = model.encode(query_text).tolist()

    # 2. Build OpenSearch query
    body = {
        "size": k,
        "query": {
            "knn": {
                VECTOR_FIELD: {
                    "vector": query_vector,
                    "k": k
                }
            }
        }
    }

    # 3. Execute search
    response = client.search(
        index=INDEX_NAME,
        body=body
    )
    client

    return response["hits"]["hits"]

# ----------------------------
# Example usage
# ----------------------------
if __name__ == "__main__":
    query = input("Enter search query: ")

    results = vector_search(query)

    print("\nTop results:\n")
    for hit in results:
        source = hit["_source"]
        print(f"Score: {hit['_score']:.4f}")
        print(f"Title: {source.get('title')}")
        print(f"Plot: {source.get('plot')[:120]}...")

        print("-" * 50)


In [None]:
from opensearchpy import OpenSearch
from sentence_transformers import SentenceTransformer

# ----------------------------
# CONFIG
# ----------------------------
OPENSEARCH_URL = "http://localhost:9200"
INDEX_NAME = "movies_vector"

TITLE_VECTOR_FIELD = "title_vector"
PLOT_VECTOR_FIELD = "plot_vector"

TOP_K = 10

# ----------------------------
# Init clients
# ----------------------------
client = OpenSearch(OPENSEARCH_URL)
model = SentenceTransformer("all-MiniLM-L6-v2")

# ----------------------------
# Hybrid search function
# ----------------------------
def hybrid_search(
    query_text,
    genre=None,
    year_range=None,
    rating_range=None,
    duration_range=None,
    likes_range=None,
    size=TOP_K
):
    # ---- Generate query vector ----
    query_vector = model.encode(query_text).tolist()

    # ---- Filters ----
    filters = []

    if genre:
        filters.append({"term": {"genres.keyword": genre}})

    if year_range:
        filters.append({"range": {"year": year_range}})

    if rating_range:
        filters.append({"range": {"rating": rating_range}})

    if duration_range:
        filters.append({"range": {"duration": duration_range}})

    if likes_range:
        filters.append({"range": {"like": likes_range}})

    # ---- Hybrid query ----
    body = {
        "size": size,
        "query": {
            "bool": {
                "filter": filters,
                "should": [
                    # -------- BM25 ----------
                    {
                        "multi_match": {
                            "query": query_text,
                            "fields": [
                                "title^3",
                                "plot^1.5",
                                "actors",
                                "directors"
                            ],
                            "type": "best_fields"
                        }
                    },

                    # -------- Title vector ----------
                    {
                        "knn": {
                            TITLE_VECTOR_FIELD: {
                                "vector": query_vector,
                                "k": size,
                                "boost": 3.0
                            }
                        }
                    },

                    # -------- Plot vector ----------
                    {
                        "knn": {
                            PLOT_VECTOR_FIELD: {
                                "vector": query_vector,
                                "k": size,
                                "boost": 1.5
                            }
                        }
                    }
                ],
                "minimum_should_match": 1
            }
        }
    }

    response = client.search(
        index=INDEX_NAME,
        body=body
    )

    return response["hits"]["hits"]

# ----------------------------
# Example usage
# ----------------------------
if __name__ == "__main__":
    results = hybrid_search(
        query_text=" tell me a star wars movie rated 7 plus",
#        genre="Sci-Fi",
#        year_range={"gte": 2000},
#        rating_range={"gte": 7.0},
#        duration_range={"lte": 180},
#        likes_range={"gte": 1000}
    )

    for hit in results:
        src = hit["_source"]
        print("=" * 60)
        print(f"Score   : {hit['_score']:.3f}")
        print(f"Title   : {src.get('title')}")
        print(f"Year    : {src.get('year')}")
        print(f"Rating  : {src.get('rating')}")
        print(f"Genres  : {src.get('genres')}")
        print(f"Plot    : {src.get('plot')[:150]}...")


In [None]:
from opensearchpy import OpenSearch
from sentence_transformers import SentenceTransformer

# ----------------------------
# CONFIG
# ----------------------------
OPENSEARCH_URL = "http://localhost:9200"
INDEX_NAME = "movies_vector"

TITLE_VECTOR_FIELD = "title_vector"
PLOT_VECTOR_FIELD = "plot_vector"

TOP_K = 10

# ----------------------------
# Init clients
# ----------------------------
client = OpenSearch(OPENSEARCH_URL)
model = SentenceTransformer("all-MiniLM-L6-v2")

# ----------------------------
# Hybrid search function
# ----------------------------
def hybrid_search(
    query_text,
    genre=None,
    year_range=None,
    rating_range=None,
    duration_range=None,
    likes_range=None,
    size=TOP_K
):
    # ---- Generate query vector ----
    query_vector = model.encode(query_text).tolist()

    # ---- Filters ----
    filters = []

    if genre:
        filters.append({"term": {"genres.keyword": genre}})

    if year_range:
        filters.append({"range": {"year": year_range}})

    if rating_range:
        filters.append({"range": {"rating": rating_range}})

    if duration_range:
        filters.append({"range": {"duration": duration_range}})

    if likes_range:
        filters.append({"range": {"like": likes_range}})

    # ---- Hybrid query ----
    body = {
        "size": size,
        "query": {
            "bool": {
                "filter": filters,
                "should": [
                    # -------- BM25 ----------
                    {
                        "multi_match": {
                            "query": query_text,
                            "fields": [
                                "title^3",
                                "plot^1.5",
                                "actors",
                                "directors"
                            ],
                            "type": "best_fields"
                        }
                    },

                    # -------- Title vector ----------
                    {
                        "knn": {
                            TITLE_VECTOR_FIELD: {
                                "vector": title_vector,
                                "k": size,
                                "boost": 3.0
                            }
                        }
                    },

                    # -------- Plot vector ----------
                    {
                        "knn": {
                            PLOT_VECTOR_FIELD: {
                                "vector": plot_vector,
                                "k": size,
                                "boost": 1.5
                            }
                        }
                    }
                ],
                "minimum_should_match": 1
            }
        }
    }

    response = client.search(
        index=INDEX_NAME,
        body=body
    )

    return response["hits"]["hits"]

# ----------------------------
# Example usage
# ----------------------------
if __name__ == "__main__":
    results = hybrid_search(
        query_text="suspense",
#        genre="Sci-Fi",
#        year_range={"gte": 2000},
#        rating_range={"gte": 7.0},
#        duration_range={"lte": 180},
#        likes_range={"gte": 1000}
    )

    for hit in results:
        src = hit["_source"]
        print("=" * 60)
        print(f"Score   : {hit['_score']:.3f}")
        print(f"Title   : {src.get('title')}")
        print(f"Year    : {src.get('year')}")
        print(f"Rating  : {src.get('rating')}")
        print(f"Genres  : {src.get('genres')}")
        print(f"Plot    : {src.get('plot')}")
