# 01 Embedding and Vector Store


In [1]:
import chromadb
from sentence_transformers import SentenceTransformer
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Import documents
with open('data/scraped_offers.json', 'r', encoding='utf-8') as f:
    offers_document = json.load(f)

with open('data/program.json', 'r', encoding='utf-8') as f:
    program_document = json.load(f)



documents = [offers_document, program_document]


In [3]:
# Intiailze Sentence-Transformer model for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Create ChromaDB Client
chroma_client = chromadb.Client()

# Create a collection
collection = chroma_client.create_collection("winzerfest_knowledge_base")

In [4]:
# Ingest and embed documents
documents_to_add = [doc['content'] for doc in documents]
metadata = [{"source": doc['source']} for doc in documents]
ids = [f"doc_{i}" for i in range(len(documents_to_add))]

embeddings = model.encode(documents_to_add).tolist()

collection.add(
    documents=documents_to_add,
    embeddings=embeddings,
    metadatas=metadata,
    ids=ids
)

In [5]:
embeddings

[[-0.0027422525454312563,
  0.0015746186254546046,
  -0.002433980815112591,
  -0.10296400636434555,
  -0.03779926896095276,
  0.08825765550136566,
  0.01616535149514675,
  0.07990739494562149,
  0.013908357359468937,
  -0.01137692853808403,
  0.02115793339908123,
  -0.05596385523676872,
  0.0102284736931324,
  -0.06610066443681717,
  0.045756254345178604,
  0.002587226452305913,
  0.01633976772427559,
  0.026081601157784462,
  -0.0059870341792702675,
  -0.027741743251681328,
  -0.05436966195702553,
  -0.03965122997760773,
  -0.003339423332363367,
  0.032805513590574265,
  -0.009046432562172413,
  0.07593391090631485,
  -0.08780477941036224,
  0.06652767211198807,
  0.01623084396123886,
  -0.11867792159318924,
  -0.08469489961862564,
  0.10905313491821289,
  0.06167208403348923,
  -0.05787833407521248,
  0.02530154213309288,
  0.07578383386135101,
  -0.06346364319324493,
  0.0014143133303150535,
  -0.033111270517110825,
  0.08357656747102737,
  -0.018579022958874702,
  0.069100484251976

# 02 Integration with OpenAI Agent

In [6]:
def retrieve_context(user_query, n_results=3):
    """
    Finds the most relevant documents in the event knowledge base.
    """
    # Embed user query
    query_embedding = model.encode([user_query]).tolist()

    # Perform similarity search
    results = collection.query(
        query_embeddings=query_embedding,
        n_results=n_results
    )

    # Return relevant documents as single string
    context_str = "\n\n".join(results["documents"][0])

    return context_str


## Configure Agent 

In [7]:
load_dotenv(override=True)
google_api_key = os.getenv("GEMINI_API_KEY")
GEMINI_BASE_URL = os.getenv("GEMINI_BASE_URL")
gemini = OpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)

In [8]:
system_prompt = f"You are acting as a local guide for the Besigheimer Winzerfest. You are answering questions on the programm of the festival. \
    You are given context that you can use to answer questions. \
    Be friendly and engaging. If you don't know the answer, say so."


In [11]:
def chat(message, history):
    # Retrieve relevant context
    context = retrieve_context(message)

    # Constructing the final prompt
    final_prompt = f"{system_prompt} \
        Context: {context} \
        User question: {message}"

    messages = [{"role": "system", "content": system_prompt}]+ history + [{"role": "user", "content": final_prompt}]

    # get response
    response = gemini.chat.completions.create(
        model="gemini-2.5-flash-preview-05-20",
        messages=messages,
        temperature=0.7,
    )
    return response.choices[0].message.content
    

In [10]:
gr.ChatInterface(chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




Context: 01
Genusskonzept GmbH & Co.KG
A Freitag, B Samstag, C Sonntag, D Montag
A Freitag, B Samstag, C Sonntag, D Montag
Auf dem Kelterplatz

Angebot
Imbissgerichte, schwäbisch

---NEW ITEM---02
Genusskonzept GmbH & Co.KG
A Freitag, B Samstag, C Sonntag, D Montag, Gewölbekeller, Musik
A Freitag, B Samstag, C Sonntag, D Montag, Gewölbekeller, Musik
FASSKELLER
Angebot
Getränke
Musik
Bands und DJ

---NEW ITEM---03
Chorgemeinschaft Besigheim
C Sonntag
C Sonntag
Vereinszimmer Alte Kelter
Angebot
Kaffee + Kuchen von 14 bis 17 Uhr

---NEW ITEM---04
TSV Ottmarsheim, EK Besigheim Handball e.V., RSV Besigheim, HHC Walheim-Besigheim, Spvgg Besigheim, Chorgemeinschaft Besigheim
A Freitag, B Samstag, Bühne, C Sonntag, D Montag, Musik, Weinprobierstand
A Freitag, B Samstag, Bühne, C Sonntag, D Montag, Musik, Weinprobierstand
Weindorf unterm Schirm
Kelterplatz
Angebot
6 Weinstände mit unterschiedlichen Weinen der Felsengartenkellerei + Champagner aus Ay
Musik
Fr: Musikverein Stadtkapelle Besigheim,