In [3]:
pip show pandas

Name: pandas
Version: 2.2.3
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Author: 
Author-email: The Pandas Development Team <pandas-dev@python.org>
License: BSD 3-Clause License

Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.

Copyright (c) 2011-2023, Open source contributors.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
  contributors may be u

In [None]:
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from google import genai
from google.genai.types import EmbedContentConfig

# Set up the Gemini client - ensure you have your API key set in environment variables
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
    raise ValueError("Please set the GEMINI_API_KEY environment variable")

client = genai.Client(api_key=api_key)
embed_model = "text-embedding-004"

# Mock restaurant data for Bangkok
restaurants = [
    {
        "name": "La Dotta",
        "description": "Authentic Italian restaurant specializing in handmade pasta dishes using imported ingredients from Italy. Located in Thonglor district of Bangkok.",
        "price_range": "$$",
        "address": "161/6 Thonglor Soi 9, Bangkok 10110",
        "cuisine": "Italian",
        "popular_dishes": ["Truffle Tagliatelle", "Cacio e Pepe", "Seafood Linguine"]
    },
    {
        "name": "Peppina",
        "description": "Neapolitan pizza restaurant with wood-fired ovens imported from Italy. Features traditional Italian dishes and a wide selection of Italian wines.",
        "price_range": "$$",
        "address": "27/1 Sukhumvit Soi 33, Bangkok 10110",
        "cuisine": "Italian, Pizza",
        "popular_dishes": ["Margherita Pizza", "Burrata", "Tiramisu"]
    },
    {
        "name": "L'Oliva",
        "description": "High-end Italian dining with focus on Northern Italian cuisine. Offers homemade pasta, risotto, and seafood specialties in the heart of Sukhumvit.",
        "price_range": "$$$",
        "address": "4 Sukhumvit Soi 36, Bangkok 10110",
        "cuisine": "Italian, Fine Dining",
        "popular_dishes": ["Risotto ai Funghi", "Osso Buco", "Branzino"]
    },
    {
        "name": "Appia",
        "description": "Roman-inspired trattoria serving hearty Italian comfort food including porchetta and homemade pasta in a rustic setting in Sukhumvit Soi 31.",
        "price_range": "$$$",
        "address": "20/4 Sukhumvit Soi 31, Bangkok 10110",
        "cuisine": "Italian, Roman",
        "popular_dishes": ["Porchetta", "Carbonara", "Saltimbocca"]
    },
    {
        "name": "Pizza Massilia",
        "description": "Upscale Italian restaurant specializing in gourmet pizzas with premium imported ingredients and authentic Italian recipes with a modern twist.",
        "price_range": "$$$",
        "address": "Sukhumvit Soi 49, Bangkok 10110",
        "cuisine": "Italian, Pizza",
        "popular_dishes": ["Truffle Pizza", "Parma Pizza", "Seafood Pizza"]
    },
    {
        "name": "Gianni's",
        "description": "Classic Italian restaurant in Sukhumvit offering traditional dishes from various regions of Italy, with an extensive wine collection.",
        "price_range": "$$$",
        "address": "34/1 Sukhumvit Soi 23, Bangkok 10110",
        "cuisine": "Italian",
        "popular_dishes": ["Lasagna", "Veal Milanese", "Panna Cotta"]
    },
    {
        "name": "Som Tam Nua",
        "description": "Popular Thai restaurant specializing in Northeastern Thai cuisine, especially spicy papaya salad and grilled chicken. Located in Siam Square.",
        "price_range": "$",
        "address": "392/14 Siam Square Soi 5, Bangkok 10330",
        "cuisine": "Thai, Isaan",
        "popular_dishes": ["Som Tam", "Gai Yang", "Larb Moo"]
    },
    {
        "name": "Pad Thai Ekkamai",
        "description": "Local favorite serving authentic pad thai and other traditional Thai noodle dishes in the trendy Ekkamai neighborhood.",
        "price_range": "$",
        "address": "337/5 Ekkamai Soi 2, Bangkok 10110",
        "cuisine": "Thai",
        "popular_dishes": ["Pad Thai", "Pad See Ew", "Guay Teow"]
    },
    {
        "name": "Isaan Der",
        "description": "Northeastern Thai cuisine featuring grilled meats, sticky rice, and spicy salads served in a casual atmosphere near Asok.",
        "price_range": "$",
        "address": "5/8 Sukhumvit Soi 20, Bangkok 10110",
        "cuisine": "Thai, Isaan",
        "popular_dishes": ["Nam Tok Moo", "Som Tam", "Moo Ping"]
    },
    {
        "name": "Gaggan",
        "description": "Progressive Indian restaurant offering innovative tasting menus with modern techniques while maintaining authentic flavors. Located in Lumpini.",
        "price_range": "$$$$",
        "address": "68/1 Soi Langsuan, Ploenchit Road, Bangkok 10330",
        "cuisine": "Indian, Molecular Gastronomy",
        "popular_dishes": ["Yogurt Explosion", "Charcoal", "Pork Vindaloo"]
    },
]

def get_embedding(text):
    """Get text embedding from Gemini API"""
    response = client.models.embed_content(
        model=embed_model,
        contents=[text],
        config=EmbedContentConfig(
            task_type="RETRIEVAL_QUERY" if len(text) < 100 else "RETRIEVAL_DOCUMENT",
            output_dimensionality=768,
        ),
    )
    return np.array(response.embeddings[0].values)

def create_restaurant_embeddings():
    """Create and return restaurant embeddings - this is the time-consuming step"""
    print("Generating restaurant embeddings... (this may take a while the first time)")
    
    # Prepare document embeddings and content
    restaurant_texts = [f"{r['name']}: {r['description']}" for r in restaurants]
    
    # Generate all embeddings
    restaurant_embeddings = []
    for i, text in enumerate(restaurant_texts):
        print(f"  Embedding restaurant {i+1}/{len(restaurant_texts)}: {restaurants[i]['name']}")
        embedding = get_embedding(text)
        restaurant_embeddings.append(embedding)
    
    print("Embedding generation complete!\n")
    return restaurant_embeddings

def search_restaurants(query, restaurant_embeddings, top_k=5):
    """Search restaurants based on query and pre-generated embeddings"""
    print(f"Generating query embedding for: '{query}'")
    
    # Get query embedding
    query_embedding = get_embedding(query) # spicy food
    
    print("Calculating similarities...")
    
    # Calculate similarities
    similarities = []
    for i, doc_embedding in enumerate(restaurant_embeddings): # compare with 10 restaurant
        similarity = cosine_similarity([query_embedding], [doc_embedding])[0][0]
        similarities.append({
            "restaurant": restaurants[i],
            "similarity": similarity
        })
    
    # Sort by similarity (highest first)
    sorted_results = sorted(similarities, key=lambda x: x["similarity"], reverse=True)
    
    # Return top k results
    return sorted_results[:top_k]



## INGESTION

In [9]:
# Embedding: Text -> Vector
# "Hello" -> [1,2,3,....,768]

# 10 Restaurant (text) -> 10 vector each of 768 dimension
np.shape(restaurant_embeddings)
restaurant_embeddings[2].shape

(768,)

In [12]:
restaurant_embeddings

[array([ 3.42046700e-02, -3.66739220e-02,  1.76429100e-03, -4.26758600e-02,
         1.73505560e-02, -5.93082050e-02, -8.16143100e-03, -3.50233100e-02,
         5.45525660e-02,  6.25833800e-02,  1.23849460e-02,  4.16562630e-02,
         1.67566600e-02,  1.24959150e-02, -8.47450300e-03, -3.19203960e-02,
         1.43453390e-03,  7.31722640e-02, -7.53072100e-02,  2.96198150e-02,
         9.14957150e-02, -7.13950260e-04,  6.78331200e-02, -4.66819670e-02,
        -7.83777400e-04,  9.59893200e-03, -3.08511100e-03, -3.09209430e-02,
        -4.10021900e-02, -2.10226100e-02,  2.57470700e-02,  7.49240700e-02,
        -3.48964370e-02, -3.37579620e-02, -1.96136240e-02,  5.09174830e-04,
        -4.83462260e-03, -1.75169700e-02,  1.98918770e-02, -6.61060100e-02,
        -1.04637360e-02,  6.51097300e-02,  1.16587510e-02,  4.03935720e-02,
        -6.28644750e-02, -2.60530450e-02, -1.53108560e-02,  3.50546540e-02,
         1.96576800e-02,  3.02127700e-02, -2.71799110e-03,  2.90595960e-02,
        -5.4

In [5]:
restaurant_embeddings = create_restaurant_embeddings()      


Generating restaurant embeddings... (this may take a while the first time)
  Embedding restaurant 1/10: La Dotta
  Embedding restaurant 2/10: Peppina
  Embedding restaurant 3/10: L'Oliva
  Embedding restaurant 4/10: Appia
  Embedding restaurant 5/10: Pizza Massilia
  Embedding restaurant 6/10: Gianni's
  Embedding restaurant 7/10: Som Tam Nua
  Embedding restaurant 8/10: Pad Thai Ekkamai
  Embedding restaurant 9/10: Isaan Der
  Embedding restaurant 10/10: Gaggan
Embedding generation complete!



In [None]:
restaurant_embeddings 
# Vector DB -> Chromodb, milvusdb, faiss....., elastic search ->

# Semantic Search

In [10]:
query

NameError: name 'query' is not defined

In [11]:
query = "indian food หน่อย" # user query # 2
results = search_restaurants(query, restaurant_embeddings) # 3 search results

print(f"\nTop {len(results)} matching restaurants:")
for i, result in enumerate(results):
    restaurant = result["restaurant"]
    similarity = result["similarity"]
    print(f"\n{i+1}. {restaurant['name']} (Relevance: {similarity:.4f})")
    print(f"   {restaurant['description']}")

Generating query embedding for: 'indian food หน่อย'
Calculating similarities...

Top 5 matching restaurants:

1. Gaggan (Relevance: 0.5599)
   Progressive Indian restaurant offering innovative tasting menus with modern techniques while maintaining authentic flavors. Located in Lumpini.

2. Isaan Der (Relevance: 0.4828)
   Northeastern Thai cuisine featuring grilled meats, sticky rice, and spicy salads served in a casual atmosphere near Asok.

3. Som Tam Nua (Relevance: 0.4493)
   Popular Thai restaurant specializing in Northeastern Thai cuisine, especially spicy papaya salad and grilled chicken. Located in Siam Square.

4. Pad Thai Ekkamai (Relevance: 0.4449)
   Local favorite serving authentic pad thai and other traditional Thai noodle dishes in the trendy Ekkamai neighborhood.

5. Pizza Massilia (Relevance: 0.4339)
   Upscale Italian restaurant specializing in gourmet pizzas with premium imported ingredients and authentic Italian recipes with a modern twist.


In [15]:
def generate_response(query, context): # Augmented
    """Generate a response using Gemini based on the query and retrieved context"""
    # model = genai.GenerativeModel(generation_model)
    
    prompt = f"""
You are a helpful restaurant recommendation assistant for Bangkok.
Use the provided restaurant information to answer the user's query.
Only recommend restaurants from the information provided.
If the query asks for something not in the provided information, politely indicate 
that you don't have that specific information but suggest the closest alternatives.

USER QUERY: {query}

RESTAURANT INFORMATION:
{context}

Please provide a helpful response that directly answers the user's query based on the restaurant information above.
Include specific details about the restaurants where relevant, such as popular dishes, location, and price range.
"""
    
    response = client.models.generate_content(
        model='gemini-2.0-flash',
        contents=prompt,
    )
    
    return response.text

# RAG

In [16]:
# User query
query = "ร้านอาหารอะไรมี indian food?"
# Vector Search

def retrieval_augmented_generation(query):
    restaurant_results = search_restaurants(query, restaurant_embeddings)

    # 3. Augmented and Generation
    print("Generating response...")
    response = generate_response(query, restaurant_results)
    return response, restaurant_results

response, restaurant_results = retrieval_augmented_generation(query)
print("GEMINI RESPONSE: ", response)
print("Search Results", restaurant_results)

Generating query embedding for: 'ร้านอาหารอะไรมี indian food?'
Calculating similarities...
Generating response...
GEMINI RESPONSE:  ร้านอาหารที่มีอาหารอินเดียคือ Gaggan ครับ เป็นร้านอาหารอินเดียแบบ Progressive ที่นำเสนอ tasting menu ที่สร้างสรรค์ด้วยเทคนิคที่ทันสมัย แต่ยังคงรสชาติแบบดั้งเดิมไว้ ร้านตั้งอยู่ในย่านลุมพินี ราคาอยู่ในช่วง $$$$ เมนูแนะนำคือ Yogurt Explosion, Charcoal และ Pork Vindaloo ครับ

Search Results [{'restaurant': {'name': 'Gaggan', 'description': 'Progressive Indian restaurant offering innovative tasting menus with modern techniques while maintaining authentic flavors. Located in Lumpini.', 'price_range': '$$$$', 'address': '68/1 Soi Langsuan, Ploenchit Road, Bangkok 10330', 'cuisine': 'Indian, Molecular Gastronomy', 'popular_dishes': ['Yogurt Explosion', 'Charcoal', 'Pork Vindaloo']}, 'similarity': np.float64(0.567949704344598)}, {'restaurant': {'name': 'Isaan Der', 'description': 'Northeastern Thai cuisine featuring grilled meats, sticky rice, and spicy salads ser

In [None]:
# Similar here thing of this like a chat bot that is grounded based on some documents, choose a data you like and play with it.
# e.g. do q/a with your chatbot grounded on your data.
