In [3]:
%%capture
!pip -q install langchain huggingface_hub langchain-google-genai
!pip install langchain_experimental langchain_core google-generativeai google-ai-generativelanguage
!pip install openai google-search-results tiktoken chromadb lark
!pip install "langchain[docarray]"
!pip show langchain langchain-core

*Insert your api key here*

In [7]:
import os
os.environ['GOOGLE_API_KEY']="AIzaSyBleyLTeMGyf9cZiBITHoliNoOom9TrDiI"

In [8]:
import os
import google.generativeai as genai
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma
from langchain.schema import Document

genai.configure(api_key="")
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

*Car models described with metadata, vector store created using Chroma.*

In [9]:
docs = [
    Document(
        page_content="Sleek, aerodynamic design with powerful engine performance",
        metadata={"name": "Tesla Model S", "year": 2023, "rating": 95, "brand": "Tesla", "type": "electric", "country": "USA"},
    ),
    Document(
        page_content="Luxurious interior with advanced safety features and smooth ride",
        metadata={"name": "Mercedes-Benz S-Class", "year": 2024, "rating": 97, "brand": "Mercedes-Benz", "type": "sedan", "country": "Germany"},
    ),
    Document(
        page_content="Robust off-road capability with rugged exterior and spacious interior",
        metadata={"name": "Jeep Wrangler", "year": 2022, "rating": 90, "brand": "Jeep", "type": "SUV", "country": "USA"},
    ),
    Document(
        page_content="Sporty design with nimble handling and impressive acceleration",
        metadata={"name": "Porsche 911", "year": 2024, "rating": 96, "brand": "Porsche", "type": "sports car", "country": "Germany"},
    ),
    Document(
        page_content="Efficient hybrid technology with comfortable ride and ample cargo space",
        metadata={"name": "Toyota Prius", "year": 2023, "rating": 92, "brand": "Toyota", "type": "hybrid", "country": "Japan"},
    ),
    Document(
        page_content="Aggressive styling with high-performance engine and cutting-edge technology",
        metadata={"name": "Ford Mustang", "year": 2022, "rating": 94, "brand": "Ford", "type": "muscle car", "country": "USA"},
    ),
    Document(
        page_content="Compact city car with excellent fuel efficiency and agile maneuverability",
        metadata={"name": "Honda Civic", "year": 2024, "rating": 91, "brand": "Honda", "type": "compact car", "country": "Japan"},
    ),
    Document(
        page_content="Versatile family vehicle with spacious cabin and intuitive infotainment system",
        metadata={"name": "Toyota Highlander", "year": 2023, "rating": 93, "brand": "Toyota", "type": "crossover", "country": "Japan"},
    ),
    Document(
        page_content="Premium luxury sedan with refined interior and smooth, quiet ride",
        metadata={"name": "BMW 7 Series", "year": 2024, "rating": 95, "brand": "BMW", "type": "luxury sedan", "country": "Germany"},
    ),
]
vectorstore = Chroma.from_documents(docs, embeddings)


*Defines metadata for car attributes: brand, name, type, year, country, rating*

In [10]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

metadata_field_info = [
    AttributeInfo(
        name="brand",
        description="The brand or manufacturer of the car",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="name",
        description="The name of the car model",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="type",
        description="The type or category of the car (e.g., sedan, SUV, sports car)",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="year",
        description="The year the car was manufactured",
        type="integer",
    ),
    AttributeInfo(
        name="country",
        description="The country where the car was produced",
        type="string",
    ),
    AttributeInfo(
        name="rating",
        description="The overall rating of the car (0-100)",
        type="integer",
    ),
]
document_content_description = "Brief description of the car"


*Integrates language model with retriever for car attribute retrieval*

In [22]:
llm = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.7)

retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    verbose=True
)

In [12]:
retriever.get_relevant_documents("""What is the brand of the car with the name "Ford Mustang"?""")

[Document(page_content='Aggressive styling with high-performance engine and cutting-edge technology', metadata={'brand': 'Ford', 'country': 'USA', 'name': 'Ford Mustang', 'rating': 94, 'type': 'muscle car', 'year': 2022})]

In [14]:
retriever.get_relevant_documents("Name a hybrid car")

[Document(page_content='Efficient hybrid technology with comfortable ride and ample cargo space', metadata={'brand': 'Toyota', 'country': 'Japan', 'name': 'Toyota Prius', 'rating': 92, 'type': 'hybrid', 'year': 2023}),
 Document(page_content='Luxurious interior with advanced safety features and smooth ride', metadata={'brand': 'Mercedes-Benz', 'country': 'Germany', 'name': 'Mercedes-Benz S-Class', 'rating': 97, 'type': 'sedan', 'year': 2024}),
 Document(page_content='Aggressive styling with high-performance engine and cutting-edge technology', metadata={'brand': 'Ford', 'country': 'USA', 'name': 'Ford Mustang', 'rating': 94, 'type': 'muscle car', 'year': 2022}),
 Document(page_content='Sporty design with nimble handling and impressive acceleration', metadata={'brand': 'Porsche', 'country': 'Germany', 'name': 'Porsche 911', 'rating': 96, 'type': 'sports car', 'year': 2024})]

In [16]:
retriever.get_relevant_documents("What type of car is the 'Jeep Wrangler'")

[Document(page_content='Robust off-road capability with rugged exterior and spacious interior', metadata={'brand': 'Jeep', 'country': 'USA', 'name': 'Jeep Wrangler', 'rating': 90, 'type': 'SUV', 'year': 2022}),
 Document(page_content='Aggressive styling with high-performance engine and cutting-edge technology', metadata={'brand': 'Ford', 'country': 'USA', 'name': 'Ford Mustang', 'rating': 94, 'type': 'muscle car', 'year': 2022}),
 Document(page_content='Sporty design with nimble handling and impressive acceleration', metadata={'brand': 'Porsche', 'country': 'Germany', 'name': 'Porsche 911', 'rating': 96, 'type': 'sports car', 'year': 2024}),
 Document(page_content='Luxurious interior with advanced safety features and smooth ride', metadata={'brand': 'Mercedes-Benz', 'country': 'Germany', 'name': 'Mercedes-Benz S-Class', 'rating': 97, 'type': 'sedan', 'year': 2024})]

In [17]:
retriever.get_relevant_documents("Which country is associated with the Mercedes-Benz S-Class?")

[Document(page_content='Luxurious interior with advanced safety features and smooth ride', metadata={'brand': 'Mercedes-Benz', 'country': 'Germany', 'name': 'Mercedes-Benz S-Class', 'rating': 97, 'type': 'sedan', 'year': 2024}),
 Document(page_content='Aggressive styling with high-performance engine and cutting-edge technology', metadata={'brand': 'Ford', 'country': 'USA', 'name': 'Ford Mustang', 'rating': 94, 'type': 'muscle car', 'year': 2022}),
 Document(page_content='Premium luxury sedan with refined interior and smooth, quiet ride', metadata={'brand': 'BMW', 'country': 'Germany', 'name': 'BMW 7 Series', 'rating': 95, 'type': 'luxury sedan', 'year': 2024}),
 Document(page_content='Sporty design with nimble handling and impressive acceleration', metadata={'brand': 'Porsche', 'country': 'Germany', 'name': 'Porsche 911', 'rating': 96, 'type': 'sports car', 'year': 2024})]

## **Using Filter K=1**
*Here, k is set to 1, meaning it will retrieve up to 1 documents per query. You*

In [18]:
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    enable_limit=True,
    k=1,
    verbose=True,
)


*By default value is k=2*

In [21]:
retriever.get_relevant_documents("Which country is associated with the Mercedes-Benz S-Class?")

[Document(page_content='Luxurious interior with advanced safety features and smooth ride', metadata={'brand': 'Mercedes-Benz', 'country': 'Germany', 'name': 'Mercedes-Benz S-Class', 'rating': 97, 'type': 'sedan', 'year': 2024})]