In [None]:
#pip install opensearch-py sentence-transformers openai


In [None]:
#pip install --upgrade openai


In [79]:
from opensearchpy import OpenSearch
from sentence_transformers import SentenceTransformer

# ----------------------------
# CONFIG
# ----------------------------
OPENSEARCH_URL = "http://localhost:9200"
INDEX_NAME = "movies_vector"

TITLE_VECTOR_FIELD = "title_vector"
PLOT_VECTOR_FIELD = "plot_vector"

TOP_K = 5


# ----------------------------
# Init clients
# ----------------------------
client = OpenSearch(OPENSEARCH_URL)
model = SentenceTransformer("all-MiniLM-L6-v2")

# ----------------------------
# Retrieval
# ----------------------------
def retrieve_movies(query_text, filters=None):
    query_vector = model.encode(query_text).tolist()

    filter_clauses = filters or []

    body = {
        "size": TOP_K,
        "query": {
            "bool": {
                "filter": filter_clauses,
                "should": [
                    {
                        "multi_match": {
                            "query": query_text,
                            "fields": [
                                "genres^3",
                                "title^1",
                                "plot^1",
                                "actors",
                                "directors"
                            ]
                        }
                    },
                #     {
                #         "knn": {
                #             TITLE_VECTOR_FIELD: {
                #                 "vector": query_vector,
                #                 "k": TOP_K,
                #                 "boost": 1.0
                #             }
                #         }
                #     },
                #     {
                #         "knn": {
                #             PLOT_VECTOR_FIELD: {
                #                 "vector": query_vector,
                #                 "k": TOP_K,
                #                 "boost": 4.5
                #             }
                #         }
                #     }
                 ],
                "minimum_should_match": 1
            }
        }
    }

    res = client.search(index=INDEX_NAME, body=body)
    return res["hits"]["hits"]

# ----------------------------
# Context builder
# ----------------------------
def build_context(hits):
    context_blocks = []

    for h in hits:
        src = h["_source"]
        block = f"""
Title: {src.get('title')}
Year: {src.get('year')}
Genres: {src.get('genres')}
Rating: {src.get('rating')}
Plot: {src.get('plot')}
"""
        context_blocks.append(block.strip())
        print(block)

    return "\n\n---\n\n".join(context_blocks)



In [80]:
# ----------------------------
# LLM Answering
# ----------------------------
import os
from openai import OpenAI

openai.api_key = os.getenv("OPENAI_API_KEY")

#client_llm = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
client_llm = OpenAI(api_key=openai.api_key)

USE_OLLAMA = True   # ‚Üê toggle here

# def answer_with_rag_llm(question, context):
#     if USE_OLLAMA:
#         return answer_with_rag_ollama(question, context)
#     else:
#         return answer_with_rag_openai(question, context)


def answer_with_rag_openai(question, context):
    response = client_llm.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a movie expert assistant.Answer the question ONLY using the information provided below.If the answer is not present, say I don't have enough information."},
            {"role": "user", "content": context + "\n\nQuestion:\n" + question}
        ],
        temperature=0.3
    )
    return response.choices[0].message.content
        


In [83]:
# ----------------------------
# Full RAG pipeline
# ----------------------------
def rag_query(question):
    hits = retrieve_movies(question)
    context = build_context(hits)
    answer = answer_with_rag_openai(question, context)

    return answer, hits

# ----------------------------
# Example usage
# ----------------------------
if __name__ == "__main__":
#    question = input("Ask a movie question: ")
    question= "tell me star wars movies that are rated 7 plus"
    answer, sources = rag_query(question)

    print("\nüß† Answer:\n")
    print(answer)

    print("\nüìö Sources:\n")
    for s in sources:
        print("-", s["_source"]["title"])



Title: I Have a Bad Feeling About This
Year: 2018
Genres: Adventure, Parody, Sci-Fi
Rating: 4.9
Plot:  The plot is based on the Star Wars franchise.


Title: Mother Didn't Tell Me
Year: 1950
Genres: Slapstick
Rating: 4.2
Plot:  A woman learns how to deal with the difficulties of being a doctor's wife, as well as an interfering mother-in-law.


Title: Hitchhiking
Year: 2018
Genres: Slapstick, Crime Drama, Romance
Rating: 5.0
Plot: 1. The film is about a man who is trying to commit suicide. He is sitting on the edge of a bridge, ready to jump. He is waiting for someone to stop him. He is waiting for someone to talk to him. He is waiting for someone to give him a reason to live. He is waiting for someone to tell him that he is not alone. He is waiting for someone to tell him that he is loved. He is waiting for someone to tell him that he is worth something. He is waiting for someone to tell him that he is not a failure. He is waiting for someone to tell him that he is


Title: Untitled D

In [82]:
# import requests

# def answer_with_rag_ollama(question, context):
#     payload = {
#         "model": "phi3",
#         "prompt": f"Context:\n{context}\n\nQuestion:\n{question}",
#         "stream": False
#     }

#     res = requests.post(
#         "http://localhost:11434/api/generate",
#         json=payload
#     )

#     return res.json()["response"]


In [81]:
# question= "Explain what RAG is in one sentence"

# payload = {
#         "model": "phi3",
#         "prompt": f"Context:\n""\n\nQuestion:\n{question}",
#         "stream": False
#     }

# res = requests.post(
#        "http://localhost:11434/api/generate",
#        json=payload
#    )
# print(res)
