In [None]:
!pip install langchain langchain-community chromadb faiss-cpu -q
!pip install google-generativeai -q
!pip install langchain-google-genai -q
!pip install -U lark -q
!pip install -U google-genai -q

In [2]:
from langchain_core.documents import Document
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo, StructuredQueryOutputParser, get_query_constructor_prompt
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from google import genai
import warnings
warnings.filterwarnings("ignore")

In [8]:
# Prepare docs
docs_raw = [
    {"content": "The Eiffel Tower is located in Paris.", "metadata": {"location": "Paris", "category": "monument"}},
    {"content": "The Great Wall of China is one of the Seven Wonders.", "metadata": {"location": "China", "category": "monument"}},
    {"content": "Mount Everest is the tallest mountain in the world.", "metadata": {"location": "Nepal", "category": "mountain"}},
    {"content": "The Louvre Museum houses many famous artworks.", "metadata": {"location": "Paris", "category": "museum"}},
    {"content": "The Colosseum is a historic site in Rome.", "metadata": {"location": "Rome", "category": "monument"}},
    {"content": "Taj Mahal is an ivory-white marble mausoleum in India.", "metadata": {"location": "India", "category": "monument"}},
    {"content": "The British Museum is located in London.", "metadata": {"location": "London", "category": "museum"}},
    {"content": "The Alps are a major mountain range in Europe.", "metadata": {"location": "Europe", "category": "mountain"}},
    {"content": "The Grand Canyon is located in Arizona.", "metadata": {"location": "Arizona", "category": "canyon"}},
    {"content": "The Statue of Liberty is in New York.", "metadata": {"location": "New York", "category": "monument"}},
    {"content": "Big Ben is a famous clock tower in London.", "metadata": {"location": "London", "category": "monument"}},
    {"content": "Machu Picchu is an ancient city in Peru.", "metadata": {"location": "Peru", "category": "historic"}},
    {"content": "Sagrada Familia is a basilica in Barcelona.", "metadata": {"location": "Barcelona", "category": "religious"}},
    {"content": "Stonehenge is a prehistoric monument in England.", "metadata": {"location": "England", "category": "monument"}},
    {"content": "The Acropolis of Athens is an ancient citadel.", "metadata": {"location": "Athens", "category": "historic"}},
    {"content": "The Sacred Valley is a stunning region near Cusco, Peru, known for its Incan archaeological sites.", "metadata": {"location": "Peru", "category": "historic"}},
    {"content": "Christ the Redeemer is a statue in Rio de Janeiro.", "metadata": {"location": "Rio", "category": "monument"}},
    {"content": "The Forbidden City is a palace complex in Beijing.", "metadata": {"location": "Beijing", "category": "historic"}},
    {"content": "The Sydney Opera House is a multi-venue performing arts centre.", "metadata": {"location": "Sydney", "category": "cultural"}},
    {"content": "Burj Khalifa is the tallest building in the world.", "metadata": {"location": "Dubai", "category": "building"}},
    {"content": "Niagara Falls is a group of waterfalls between Canada and USA.", "metadata": {"location": "Canada", "category": "waterfall"}}
]

documents = [Document(page_content=d["content"], metadata=d["metadata"]) for d in docs_raw]

# Define Embedding Model
hf_embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-large-en")

# Store documents in VectorDB
vectorstore = Chroma.from_documents(documents, hf_embeddings)

In [4]:
import os
os.environ["GOOGLE_API_KEY"] = ""

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [9]:
metadata_field_info = [
    AttributeInfo(
        name="location",
        description="Geographical location of the place described in the document",
        type="string"
    ),
    AttributeInfo(
        name="category",
        description="Type or classification of the place, such as monument, museum, mountain, etc.",
        type="string"
    ),
]

# Define Query Construction Prompt
prompt = get_query_constructor_prompt("Descriptions of famous places", metadata_field_info)

# Define Output Parser
output_parser = StructuredQueryOutputParser.from_components()

# Create Query Constructor
query_constructor = prompt | llm | output_parser

# Create Self-Query Retriever
retriever = SelfQueryRetriever(
    query_constructor=query_constructor,
    vectorstore=vectorstore,
    search_kwargs={"k": 10},
    metadata_field_info=metadata_field_info
)

In [11]:
query = "Show me monuments in Peru"
retrieved_docs = retriever.invoke(query)

# Print Retrieved Documents
for doc in retrieved_docs:
    print(f"Location: {doc.page_content} | Metadata: {doc.metadata}")

Location: Machu Picchu is an ancient city in Peru. | Metadata: {'location': 'Peru', 'category': 'historic'}
Location: Machu Picchu is an ancient city in Peru. | Metadata: {'category': 'historic', 'location': 'Peru'}
Location: The Sacred Valley is a stunning region near Cusco, Peru, known for its Incan archaeological sites. | Metadata: {'category': 'historic', 'location': 'Peru'}


In [12]:
query = "Show me monuments in Peru"

basic_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
basic_results = basic_retriever.get_relevant_documents(query)

print("\n=== BASIC VECTOR RETRIEVER RESULTS ===")
for doc in basic_results:
    print(f"- {doc.page_content} | {doc.metadata}")



=== BASIC VECTOR RETRIEVER RESULTS ===
- Machu Picchu is an ancient city in Peru. | {'category': 'historic', 'location': 'Peru'}
- Machu Picchu is an ancient city in Peru. | {'category': 'historic', 'location': 'Peru'}
- The Sacred Valley is a stunning region near Cusco, Peru, known for its Incan archaeological sites. | {'location': 'Peru', 'category': 'historic'}
- The Great Wall of China is one of the Seven Wonders. | {'category': 'monument', 'location': 'China'}
- The Great Wall of China is one of the Seven Wonders. | {'category': 'monument', 'location': 'China'}
- The Colosseum is a historic site in Rome. | {'location': 'Rome', 'category': 'monument'}
- The Colosseum is a historic site in Rome. | {'location': 'Rome', 'category': 'monument'}
- Stonehenge is a prehistoric monument in England. | {'category': 'monument', 'location': 'England'}
- Stonehenge is a prehistoric monument in England. | {'location': 'England', 'category': 'monument'}
- Sagrada Familia is a basilica in Barcelo