# Self Querying Process

本节内容主要是为了尝试理解Self Query的执行过程

In [10]:

from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.retrievers.self_query.chroma import ChromaTranslator
from langchain.chains.query_constructor.base import AttributeInfo, StructuredQueryOutputParser, get_query_constructor_prompt
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_core.output_parsers import JsonOutputParser

from chat_model_client import get_model
from pprint import  pprint

## 1. 组织数据

需要声明文档数据结构，细化到每一个属性的名称、数据类型、描述，并解释文档主体内容。

In [5]:
documents = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={
            "year": 1993,
            "director": "Steven Spielberg",
            "rating": 7.7,
            "genre": "science fiction",
        }
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={
            "year": 2010,
            "director": "Christopher Nolan",
            "rating": 8.2,
            "genre": "action",
        }
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within some dreams",
        metadata={
            "year": 1964,
            "director": "Andrei Tarkovsky",
            "rating": 8.1,
            "genre": "thriller",
        }
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and very very noble for your ...",
        metadata={
            "year": 2015,
            "director": "Dennis Dugan",
            "rating": 7.4,
            "genre": "comedy",
        }
    ),
    Document(
        page_content="This film is just boring",
        metadata={
            "year": 1988,
            "director": "Ryan Fleck",
            "rating": 6.8,
            "genre": "horror",
        }
    )
]

metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie, one of ['science fiction', 'comedy', 'drama', thriller', 'romance', 'action', animated']",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year when the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating",
        description="A 1-10 rating for the movie",
        type="float",
    ),
]

document_content_description="Brief summary of a movie"

## 2.构建查询语句

利用LLM能力，将自然语言查询转换为结构化的查询；

In [26]:
prompt = get_query_constructor_prompt(
    document_content_description,
    metadata_field_info
)
llm = get_model('openai')

query_constructor = prompt | llm | StructuredQueryOutputParser.from_components()

query = "Has Dennis Dugan directed any movies about women"

# pprint(prompt.invoke(query))
# output = query_constructor.invoke(query)
# pprint(JsonOutputParser().invoke(output))

query_constructor.invoke(query)


  llm = get_model('openai')


StructuredQuery(query='women', filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Dennis Dugan'), limit=None)

## 3. 构建SelfQueryRetriever

In [28]:

retriever = SelfQueryRetriever(
    query_constructor=query_constructor,
    vectorstore=Chroma.from_documents(documents, OllamaEmbeddings(model = "llama2-chinese")),
    structured_query_translator=ChromaTranslator()
)

retriever.invoke(query)


[Document(metadata={'director': 'Dennis Dugan', 'genre': 'comedy', 'rating': 7.4, 'year': 2015}, page_content='A bunch of normal-sized women are supremely wholesome and very very noble for your ...'),
 Document(metadata={'director': 'Dennis Dugan', 'genre': 'comedy', 'rating': 7.4, 'year': 2015}, page_content='A bunch of normal-sized women are supremely wholesome and very very noble for your ...'),
 Document(metadata={'director': 'Dennis Dugan', 'genre': 'comedy', 'rating': 7.4, 'year': 2015}, page_content='A bunch of normal-sized women are supremely wholesome and very very noble for your ...'),
 Document(metadata={'director': 'Dennis Dugan', 'genre': 'comedy', 'rating': 7.4, 'year': 2015}, page_content='A bunch of normal-sized women are supremely wholesome and very very noble for your ...')]