In [1]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.tools.retriever import create_retriever_tool
from langchain.prompts import SystemMessagePromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate, PromptTemplate

with open('./openai_api_key.txt') as f:
    openai_api_key = f.read()

# Create Embedding LLM (OpenAI)

In [2]:
embedding_llm = OpenAIEmbeddings(openai_api_key=openai_api_key)

# Load Databases

In [3]:
db = Chroma(persist_directory='./chroma', embedding_function=embedding_llm,
            collection_name="planetbucks")

# Using Chroma vector database
https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.chroma.Chroma.html

## Similarity search

In [4]:
query = "opening hours"
result = db.similarity_search_with_relevance_scores(query)
print(len(result))

4


In [5]:
result

[(Document(page_content='Shop Name: PlanetBucks\nWhat we sell: coffee drinks and coffee bean\n\nOperating time:\nMonday to Friday 7 am - 8 pm\nSaturday, Sunday, National Holiday 9 am - 4 pm\n\nAddress: ชั้น G ห้าง Emquartier สุขุมวิท กรุงเทพฯ\nBranch 1: Icon Siam Mall, Third Floor, South Wing, Bangkok, Thailand\nBranch 2: 103th Floor, Empire State Building, NYC\n\nPayment: cash, credit card, square, bitcoin', metadata={'source': '/Users/pisek.kul/Documents/case-done-codes/rag-demo-langchain/docs/general_info.txt'}),
  0.7053741924241201),
 (Document(page_content='\ufeffTable of Drink Menu,,\nCoffee Drink,Small Size Price ($),Large Size Price ($)\nEspresso,2.00,3.00\nAmericano,2.50,3.50\nCappuccino,3.00,4.50\nLatte,3.50,5.00\nMocha,4.00,5.50\n,,\nTable of Add-on items or change,,\nIce,0.50,\nOat milk,1.00,\nSoy milk,0.75,', metadata={'source': '/Users/pisek.kul/Documents/case-done-codes/rag-demo-langchain/docs/menu_price.csv'}),
  0.643032074177587),
 (Document(page_content='\ufeffTopic

In [6]:
result = db.similarity_search_with_relevance_scores(query, k=2)
print(len(result))

2


In [7]:
result

[(Document(page_content='Shop Name: PlanetBucks\nWhat we sell: coffee drinks and coffee bean\n\nOperating time:\nMonday to Friday 7 am - 8 pm\nSaturday, Sunday, National Holiday 9 am - 4 pm\n\nAddress: ชั้น G ห้าง Emquartier สุขุมวิท กรุงเทพฯ\nBranch 1: Icon Siam Mall, Third Floor, South Wing, Bangkok, Thailand\nBranch 2: 103th Floor, Empire State Building, NYC\n\nPayment: cash, credit card, square, bitcoin', metadata={'source': '/Users/pisek.kul/Documents/case-done-codes/rag-demo-langchain/docs/general_info.txt'}),
  0.7053741924241201),
 (Document(page_content='\ufeffTable of Drink Menu,,\nCoffee Drink,Small Size Price ($),Large Size Price ($)\nEspresso,2.00,3.00\nAmericano,2.50,3.50\nCappuccino,3.00,4.50\nLatte,3.50,5.00\nMocha,4.00,5.50\n,,\nTable of Add-on items or change,,\nIce,0.50,\nOat milk,1.00,\nSoy milk,0.75,', metadata={'source': '/Users/pisek.kul/Documents/case-done-codes/rag-demo-langchain/docs/menu_price.csv'}),
  0.643032074177587)]

## MMR (Maximum marginal relevance)
- MMR = iteratively find documents that are dissimilar to previous results.
- It could improve performance for retrievals.

In [8]:
db.max_marginal_relevance_search(query, k=2)

Number of requested results 20 is greater than number of elements in index 7, updating n_results = 7


[Document(page_content='Shop Name: PlanetBucks\nWhat we sell: coffee drinks and coffee bean\n\nOperating time:\nMonday to Friday 7 am - 8 pm\nSaturday, Sunday, National Holiday 9 am - 4 pm\n\nAddress: ชั้น G ห้าง Emquartier สุขุมวิท กรุงเทพฯ\nBranch 1: Icon Siam Mall, Third Floor, South Wing, Bangkok, Thailand\nBranch 2: 103th Floor, Empire State Building, NYC\n\nPayment: cash, credit card, square, bitcoin', metadata={'source': '/Users/pisek.kul/Documents/case-done-codes/rag-demo-langchain/docs/general_info.txt'}),
 Document(page_content="Yirgacheffe Coffee Beans - The Essence of Ethiopia\n\nOrigin: Sourced from the esteemed Yirgacheffe region within Sidamo, Ethiopia, where coffee cultivation is an art passed down through generations.\nAltitude: Cultivated at 1,700 to 2,200 meters, the ideal range for developing its signature floral and citrus notes.\nTaste Profile: A delightful floral bouquet with a citrusy twist, featuring lemon and bergamot, and accented by jasmine and honey. Exhibi

# Retriever

In [9]:
retriever = db.as_retriever(search_type='mmr', search_kwargs={'k': 2, 'fetch_k': 5})

In [10]:
retriever.invoke(query)

[Document(page_content='Shop Name: PlanetBucks\nWhat we sell: coffee drinks and coffee bean\n\nOperating time:\nMonday to Friday 7 am - 8 pm\nSaturday, Sunday, National Holiday 9 am - 4 pm\n\nAddress: ชั้น G ห้าง Emquartier สุขุมวิท กรุงเทพฯ\nBranch 1: Icon Siam Mall, Third Floor, South Wing, Bangkok, Thailand\nBranch 2: 103th Floor, Empire State Building, NYC\n\nPayment: cash, credit card, square, bitcoin', metadata={'source': '/Users/pisek.kul/Documents/case-done-codes/rag-demo-langchain/docs/general_info.txt'}),
 Document(page_content='\ufeffTopic,Geisha,Yirgacheffe,Blue Mountain,Sumatra Mandheling\nOrigin,"Hacienda La Esmeralda, Panama","Yirgacheffe region, Ethiopia","Blue Mountains, Jamaica","Mandheling, Sumatra, Indonesia"\nAltitude (meters),"1,500-2,000","1,700-2,200","910-1,700","1,000-1,500"\nFlavor Profile,"Jasmine, bergamot, honey, peaches, berries","Floral, lemon, bergamot, jasmine, honey","Sweet herbs, nuts, chocolate","Dark chocolate, cedar, tropical fruit"\nAcidity,Brigh