In [14]:
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_community.document_loaders import CSVLoader
from langchain.chains.query_constructor.base import AttributeInfo, StructuredQueryOutputParser, get_query_constructor_prompt
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers.self_query.chroma import ChromaTranslator
from langchain.retrievers.self_query.base import SelfQueryRetriever

In [3]:
llm = OllamaLLM(model = "llama3.2")
embedding = OllamaEmbeddings(model = "nomic-embed-text")

In [4]:
loader = CSVLoader(file_path="/home/bishwayansaha99/langchain/docs/movie.csv")
docs = loader.load()

In [5]:
docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="Three men walk into the Zone, three men walk out of the Zone",
        metadata={
            "year": 1979,
            "director": "Andrei Tarkovsky",
            "genre": "thriller",
            "rating": 9.9,
        },
    ),
]

In [6]:
vector_store = Chroma.from_documents(documents=docs, embedding=embedding)

question1 = "What are some 90's animated movies?"

print(vector_store.similarity_search(question1))

[Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so'), Document(metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'), Document(metadata={'director': 'Christopher Nolan', 'rating': 8.2, 'year': 2010}, page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...'), Document(metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]


In [7]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

prompt_template = hub.pull("rlm/rag-prompt")

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt_template
    | llm
    | StrOutputParser()
)



In [8]:
chain.invoke(question1)

'I don\'t know the answer to your question about 90\'s animated movies. However, I do know that a document from 1995 mentions "Toys come alive and have a blast doing so", which suggests it might be an animated movie, but without more information, I couldn\'t confirm it. Another possible candidate is the 1993 science fiction film with dinosaurs, as its genre includes animation, but again, I\'d need more context to be sure.'

In [9]:
metadata_field_info = [
    AttributeInfo(name = "genre", description = "The genre of the movie. Can be one of these ['animated', 'science fiction', 'thriller', 'horror', 'comedy']", type="string"),
    AttributeInfo(name = "director", description= "Name of the dorector of the movie", type="string"),
    AttributeInfo(name = "year", description= "The year, when the movie was released", type = "string"),
    AttributeInfo(name = "rating", description="Rating of the movie, the value ies between 0 to 10", type = 'float')
]

docuent_content_description = "Give a brief summary of a movie in not more then 3 lines"

prompt = get_query_constructor_prompt(
    docuent_content_description, metadata_field_info
)
prompt

FewShotPromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, examples=[{'i': 1, 'data_source': '```json\n{{\n    "content": "Lyrics of a song",\n    "attributes": {{\n        "artist": {{\n            "type": "string",\n            "description": "Name of the song artist"\n        }},\n        "length": {{\n            "type": "integer",\n            "description": "Length of the song in seconds"\n        }},\n        "genre": {{\n            "type": "string",\n            "description": "The song genre, one of "pop", "rock" or "rap""\n        }}\n    }}\n}}\n```', 'user_query': 'What are songs by Taylor Swift or Katy Perry about teenage romance under 3 minutes long in the dance pop genre', 'structured_request': '```json\n{{\n    "query": "teenager love",\n    "filter": "and(or(eq(\\"artist\\", \\"Taylor Swift\\"), eq(\\"artist\\", \\"Katy Perry\\")), lt(\\"length\\", 180), eq(\\"genre\\", \\"pop\\"))"\n}}\n```'}, {'i': 2, 'data_source': '```json\n{{\n    "con

In [12]:
output_parser = StructuredQueryOutputParser.from_components()
query_constructor= prompt | llm | output_parser

query_constructor.invoke({"query" : question1})

StructuredQuery(query='teenager love', filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Operation(operator=<Operator.OR: 'or'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='artist', value='Taylor Swift'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='artist', value='Katy Perry')]), Comparison(comparator=<Comparator.LT: 'lt'>, attribute='length', value=180), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='pop')]), limit=None)

In [15]:
self_query_retriever = SelfQueryRetriever(
    query_constructor = query_constructor,
    vectorstore= vector_store,
    structured_query_translator= ChromaTranslator()
)

self_query_chain =  (
    {"context": self_query_retriever, "question": RunnablePassthrough()}
    | prompt_template
    | llm
    | StrOutputParser()
)

In [17]:
self_query_chain.invoke(question1)

'I can provide a list of popular 90\'s animated movies. Some notable ones include "The Lion King" (1994), "Beauty and the Beast" (1991), and "Toy Story" (1995). Additionally, films like "A Bug\'s Life" (1998) and "Mulan" (1998) were also released during this time period.'