# Self Querying Retrieval

In [None]:
%pip install -qU langchain
%pip install -qU langchain-community
%pip install -qU langchain_openai
%pip install chromadb
%pip install lark


### Import needed libraries

In [None]:
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI

### Config

In [8]:
embedding_model = OpenAIEmbeddings()
collection_name = "books"
language_model_name = "gpt-3.5-turbo-0125"

## Load documents

In [9]:
docs = [
  Document(
    page_content= "Portrays Elizabeth Bennet's growth in discerning true character over appearances, set against Regency England's social mores.",
    metadata= {
      "name": "Pride and Prejudice",
      "author": "Jane Austen",
      "first_published": 1813,
      "genre": "Romance",
      "origin": "England",
      "rating": 4.5
    }
  ),
  Document(
    page_content= "Resists a totalitarian regime under constant surveillance, showcasing the perils of absolute power and the spirit of rebellion.",
    metadata= {
      "name": "1984",
      "author": "George Orwell",
      "first_published": 1949,
      "genre": "Dystopian",
      "origin": "England",
      "rating": 4.7
    }
  ),
  Document(
    page_content= "Exposes racial injustices in the South through Scout Finch, whose father defends a wrongly accused black man, challenging societal prejudices.",
    metadata= {
      "name": "To Kill a Mockingbird",
      "author": "Harper Lee",
      "first_published": 1960,
      "genre": "Southern Gothic",
      "origin": "United States",
      "rating": 4.8
    }
  ),
  Document(
    page_content= "Reveals the Jazz Age's allure and despair through Gatsby's doomed love, critiquing the American Dream's corruption.",
    metadata= {
      "name": "The Great Gatsby",
      "author": "F. Scott Fitzgerald",
      "first_published": 1925,
      "genre": "Tragedy",
      "origin": "United States",
      "rating": 4.6
    }
  ),
  Document(
    page_content= "Follows Frodo Baggins on a quest to destroy a powerful ring, weaving a tale of bravery, friendship, and darkness in Middle-earth.",
    metadata= {
      "name": "The Lord of the Rings",
      "author": "J.R.R. Tolkien",
      "first_published": 1954,
      "genre": "Fantasy",
      "origin": "England",
      "rating": 4.9
    }
  ),
  Document(
    page_content= "Confronts the haunting legacies of slavery through Sethe, a former slave tormented by her past, exploring themes of family and freedom.",
    metadata= {
      "name": "Beloved",
      "author": "Toni Morrison",
      "first_published": 1987,
      "genre": "Historical Fiction",
      "origin": "United States",
      "rating": 4.7
    }
  ),
  Document(
    page_content= "Captures teenage angst and alienation through Holden Caulfield's cynical view of adult hypocrisy and the pains of growing up.",
    metadata= {
      "name": "The Catcher in the Rye",
      "author": "J.D. Salinger",
      "first_published": 1951,
      "genre": "Coming-of-Age",
      "origin": "United States",
      "rating": 4.5
    }
  ),
  Document(
    page_content= "Examines the fallout of unchecked scientific ambition through Victor Frankenstein's creation of life, highlighting the ethical limits of science.",
    metadata= {
      "name": "Frankenstein",
      "author": "Mary Shelley",
      "first_published": 1818,
      "genre": "Science Fiction",
      "origin": "England",
      "rating": 4.6
    }
  ),
  Document(
    page_content= "Depicts a future where technological progress has stunted humanity, questioning the cost of happiness and freedom.",
    metadata= {
      "name": "Brave New World",
      "author": "Aldous Huxley",
      "first_published": 1932,
      "genre": "Science Fiction",
      "origin": "England",
      "rating": 4.7
    }
  ),
  Document(
    page_content= "Takes Arthur Dent on a ludicrous space journey, poking fun at life's absurdities and the universe's vast mysteries and the number 42.",
    metadata= {
      "name": "The Hitchhiker's Guide to the Galaxy",
      "author": "Douglas Adams",
      "first_published": 1979,
      "genre": "Science Fiction",
      "origin": "England",
      "rating": 4.5
    }
  ),
  Document(
    page_content= "Delves into Raskolnikov's psyche after he murders for a 'noble' cause, probing the depths of guilt, morality, and redemption in bleak Russia.",
    metadata= {
      "name": "Crime and Punishment",
      "author": "Fyodor Dostoevsky",
      "first_published": 1866,
      "genre": "Psychological Fiction",
      "origin": "Russia",
      "rating": 4.3
    }
  )
]

vectorstore = Chroma.from_documents(docs, embedding_model, persist_directory="./chroma_db", collection_name=collection_name)

## Self Querying Retriever Definition

For more info, see https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/

In [10]:
metadata_field_info = [
  AttributeInfo(
    name="name",
    description="The name of the book",
    type="string"
  ),
  AttributeInfo(
    name="author",
    description="The name of the author of the book",
    type="string"
  ),
  AttributeInfo(
    name="first_published",
    description="The year of the first publishing of the book",
    type="integer"
  ),
  AttributeInfo(
    name="genre",
    description="Genre or genres of the book",
    type="string"
  ),
  AttributeInfo(
    name="origin",
    description="The country of origin of the book",
    type="string"
  ),
  AttributeInfo(
    name="rating",
    description="The rating of the book on a scale of 1 to 5",
    type="float"
  ),
]
document_content_description = "A description of the books content, themes, characters and setting."

llm = ChatOpenAI( temperature=0, model_name=language_model_name)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
)


### Show prompt debug, if needed

In [11]:
from langchain.globals import set_debug

set_debug(True)

## Testing