# Advanced RAG: Self-querying

### Before doing anything

### Ollama Running in background

- [ollama pull nomic-embed-text](https://ollama.com/library/nomic-embed-text)
- [ollama run qwen2](https://ollama.com/library/qwen2)

### Install these packages

- `!pip install ollama`
- `!pip install langchain`
- `!pip install --upgrade --quiet lark langchain-chroma`
- `!pip install langchain_community`

________________________________________________________________________________

# Prepare Documents

In [1]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_community.embeddings import OllamaEmbeddings

# You can make document from different loaders (e.g PDF, HTML, JSON, etc.)
# different loaders here: https://python.langchain.com/v0.1/docs/modules/data_connection/document_loaders/


docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="Three men walk into the Zone, three men walk out of the Zone",
        metadata={
            "year": 1979,
            "director": "Andrei Tarkovsky",
            "genre": "thriller",
            "rating": 9.9,
        },
    ),
]
vectorstore = Chroma.from_documents(docs, OllamaEmbeddings(model="nomic-embed-text"))

# Creating our self-querying retriever

In [8]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever


metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']",
        type="string",
    ),
       AttributeInfo(
        name="year", #name="email",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director", # eg. name="purchase_history"
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="A 1-10 rating for the movie", type="float"
    ),
    
]



In [9]:
from langchain_community.chat_models import ChatOllama

document_content_description = "Brief summary of a movie"

llm = ChatOllama(model="qwen2",temperature=0)

retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
)


# Query

In [10]:
# This example only specifies a filter
retriever.invoke("I want to watch a movie rated higher than 8.5")

[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),
 Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979})]

In [11]:
# This example specifies a query and a filter
retriever.invoke("is there a movie about toys")

[Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995}),
 Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}),
 Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),
 Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'director': 'Greta Gerwig', 'rating': 8.3, 'year': 2019})]