In [None]:
!pip install google-genai

In [1]:
import chromadb

In [2]:
client = chromadb.PersistentClient("../../chroma_db")

In [3]:
client.list_collections()

[Collection(name=toyota_specs), Collection(name=migration_test)]

In [4]:
from dotenv import load_dotenv

In [5]:
load_dotenv("../../.env")

True

In [6]:
from chromadb.utils.embedding_functions import GoogleGenaiEmbeddingFunction

In [7]:
google_ef = GoogleGenaiEmbeddingFunction(
    model_name="models/text-embedding-004"
)

In [8]:
toyota_specs = client.get_collection(
    name="toyota_specs"
)

In [9]:
toyota_specs.count()

32

In [10]:
query_text = "What is the fuel efficiency of the Camry hybrid?"

In [11]:
query_vector = google_ef([query_text])[0]

In [12]:
results = toyota_specs.query(
    query_embeddings=[query_vector],
    n_results=5
)

In [13]:
results

{'ids': [['731620ad-755c-41c4-9d7c-0be592babfd5',
   'e842a974-65cc-4ad5-93e6-277c79a4e2a7',
   '954feb78-8952-434b-ae1f-6361b3638f04',
   'acf64b7a-ec9f-4688-a830-e29b66339f59',
   '9a35aeae-c001-4605-a790-53889c2659e3']],
 'embeddings': None,
 'documents': [['Key Features\nSafety\nToyota Safety Sense (TSS) 2.5+, which includes:\nPre-Collision System with Pedestrian Detection\nLane Departure Alert with Steering Assist\nAdaptive Cruise Control\nRoad Sign Assist and Lane Tracing Assist\nComfort and Technology\n7-inch or optional 9-inch touchscreen infotainment system\nApple CarPlay, Android Auto, and Amazon Alexa compatibility\nOptional panoramic sunroof and leather-trimmed seats\nHeads-up display and wireless charging\nDesign\nSleek, aerodynamic styling with LED headlights\nSporty trims like SE and XSE with an available two-tone roof\nFuel Efﬁciency\nGasoline Engine: ~28 MPG (city) / ~39 MPG (highway)\nHybrid Powertrain: ~51 MPG (city) / ~53 MPG (highway)\nCamry’s hybrid option stands 

In [None]:
print(query_text)
print("-"*100)
for i in range(len(results["ids"][0])):
    print(f"Distance: {results['distances'][0][i]}")
    print(f"ID: {results['ids'][0][i]}")
    print(f"Model: {results['metadatas'][0][i]['model_name']}")
    # print(f"Document: {results['documents'][0][i]}")
    print("-"*100)


In [None]:
results =toyota_specs.query(
    query_texts=[query_text],
    n_results=5
)

In [None]:
# Queries
queries = [
    "What is the fuel efficiency of the Camry hybrid?",
    "Which sedan is better for first-time buyers?",
    "Is AWD available on the Camry?",
    "What is the starting price of the Corolla?",
    "I need a fuel-efficient car for city driving, what do you recommend?",
    "Can you schedule a test drive for me?"
]

In [None]:
EMBEDDING_MODEL_NAME = "text-embedding-004"

In [None]:
from dotenv import load_dotenv

In [None]:
load_dotenv("../../.env")

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL_NAME) # Specify your model

In [None]:
from langchain_chroma import Chroma

In [None]:
vector_store = Chroma(
    collection_name="toyota_specs",
    embedding_function=embeddings,
    persist_directory="../../chroma_db"
)

In [None]:
queries[0]

In [None]:
results = vector_store.similarity_search(queries[0], k=5)

In [None]:
results

In [None]:
document = results[0]

In [None]:
document.id

In [None]:
document.metadata

In [None]:
document.metadata['source']

In [None]:
document.metadata['model_name']

In [None]:
document.page_content

In [None]:
print(queries[0])
print("-"*100)
for idx, document in enumerate(results, 1):
    print(f"Document {idx}:")
    print("-"*100)
    print(document.page_content)
    print("-"*100)
    metadata = document.metadata
    print(f"{metadata['source']} - {metadata['page']} - {metadata['chunk_index']} - {metadata['model_name']}")
    print("-"*100)



In [None]:
queries[0]

In [None]:
results_with_scores = vector_store.similarity_search_with_score(queries[0], k=5)

In [None]:
results_with_scores

In [None]:
document_with_score = results_with_scores[0]

In [None]:
document_with_score[0]

In [None]:
document_with_score[0].metadata

In [None]:
document_with_score[0].page_content

In [None]:
document_with_score[1]

In [None]:
print(queries[0])
print("-"*100)
for doc, score in results_with_scores:
    print(f"Score: {score} | Source: {doc.metadata['source']} | Content: {doc.page_content}")
    print("-"*100)

In [None]:
def get_toyota_specs(query):
    results = vector_store.similarity_search(query, k=5)
    print(query)
    print("-"*100)
    for idx, result in enumerate(results):
        print(f"Result {idx+1}:")
        print("-"*100)
        print(result.page_content[:200])
        print("-"*100)
        metadata = result.metadata
        print(f"{metadata['source']} - {metadata['page']} - {metadata['chunk_index']} - {metadata['model_name']}")
        print("-"*100)


In [None]:
def get_toyota_specs_with_scores(query):
    results_with_scores = vector_store.similarity_search_with_score(query, k=5)
    print(query)
    print("-"*100)
    for doc, score in results_with_scores:
        print(f"Score: {score} | Source: {doc.metadata['source']} | Content: {doc.page_content[:200]}")
        print("-"*100)

In [None]:
queries[1]

In [None]:
get_toyota_specs(queries[1])

In [None]:
get_toyota_specs_with_scores(queries[1])

In [None]:
queries[5]

In [None]:
get_toyota_specs(queries[5])

In [None]:
get_toyota_specs_with_scores(queries[5])