In [1]:
from fastembed import TextEmbedding
from qdrant_client import QdrantClient, models

In [2]:
qd_client = QdrantClient("http://localhost:6333")

In [3]:
EMBEDDING_DIMENSIONALITY = 512
model_handle = "jinaai/jina-embeddings-v2-small-en"

In [4]:
# Q1 
documents: list[str] = [
    "I just discovered the course. Can I join now?"
]
model = TextEmbedding(model_name=model_handle)
embeddings = list(model.embed(documents))

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/367 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

onnx/model.onnx:   0%|          | 0.00/130M [00:00<?, ?B/s]

In [11]:
# Q5 enbedding with BAAI
query="I just discovered the course. Can I join now?"
model_handle = "BAAI/bge-small-en"
model = TextEmbedding(model_name=model_handle)
embeddings = list(model.embed(query))
embeddings[0].shape





Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

model_optimized.onnx:   0%|          | 0.00/133M [00:00<?, ?B/s]

(384,)

In [12]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()


In [32]:
print("Type of documents_raw:", type(documents_raw))
print("Length of documents_raw:", len(documents_raw))
print("\nFields in first document:")
for key in documents_raw[0].keys():
    print(f"- {key}")
print("\nFields in second document:")
for key in documents_raw[0]['documents'][0].keys():
    print(f"- {key}")

Type of documents_raw: <class 'list'>
Length of documents_raw: 3

Fields in first document:
- course
- documents

Fields in second document:
- text
- section
- question
- course


In [34]:
documents_raw[0]['documents'][0]

{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
 'section': 'General course-related questions',
 'question': 'Course - When will the course start?',
 'course': 'data-engineering-zoomcamp'}

In [35]:


documents = []

for course in documents_raw:
    course_name = course['course']
    # if course_name != 'machine-learning-zoomcamp':
    #     continue
    #  documents = list(map(lambda  x: course_name+' '+x['question']+' '+x['text'], course['documents']))

    for doc in course['documents']:
      
        documents.append(doc)

In [37]:
documents[0]

{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
 'section': 'General course-related questions',
 'question': 'Course - When will the course start?',
 'course': 'data-engineering-zoomcamp'}

In [39]:
collection_name = "zoomcamp-faq"
EMBEDDING_DIMENSIONALITY=384

# Check if collection exists and delete it
try:
    qd_client.get_collection(collection_name=collection_name)
    qd_client.delete_collection(collection_name=collection_name)
except:
    pass

# Create new collection
qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

True

In [40]:
qd_client.create_payload_index(
    collection_name=collection_name,
    field_name="course",
    field_schema="keyword"
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [41]:
points = []

for i, doc in enumerate(documents):
    text = doc['question'] + ' ' + doc['text']
    vector = models.Document(text=text, model=model_handle)
    point = models.PointStruct(
        id=i,
        vector=vector,
        payload=doc
    )
    points.append(point)

In [43]:
points[0]

PointStruct(id=0, vector=Document(text="Course - When will the course start? The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.", model='BAAI/bge-small-en', options=None), payload={'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’

In [42]:
qd_client.upsert(
    collection_name=collection_name,
    points=points
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [44]:
course = 'machine-learning-zoomcamp'
query_points = qd_client.query_points(
    collection_name=collection_name,
    query=models.Document(
        text=query,
        model=model_handle 
    ),
    query_filter=models.Filter( 
        must=[
            models.FieldCondition(
                key="course",
                match=models.MatchValue(value=course)
            )
        ]
    ),
    limit=5,
    with_payload=True
)

results = []

for point in query_points.points:
    results.append(point.payload)

In [49]:
# Q6 score of the first point
print(query_points.points[0].payload.get('text'))

Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.
In order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.


In [96]:
def vector_search(query,course=None):
    print('vector_search is used')
    
    query_params = {
        'collection_name': collection_name,
        'query': models.Document(
            text=query,
            model=model_handle 
        ),
        'limit': 5,
        'with_payload': True
    }
    
    if course is not None:
        query_params['query_filter'] = models.Filter(
            must=[
                models.FieldCondition(
                    key="course",
                    match=models.MatchValue(value=course)
                )
            ]
        )
    
    query_points = qd_client.query_points(**query_params)
    
    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

In [59]:
vector_search('how many questions are in the FAQ?')

vector_search is used


[{'text': 'Here',
  'section': 'Module 4: Deployment',
  'question': 'Where is the FAQ for Prefect questions?',
  'course': 'mlops-zoomcamp'},
 {'text': 'This is most likely that you interchanged the first step of the multiplication\nYou used  instead of\nAdded by Emmanuel Ikpesu',
  'section': '1. Introduction to Machine Learning',
  'question': 'Question 7: FINAL MULTIPLICATION not having 5 column',
  'course': 'machine-learning-zoomcamp'},
 {'text': 'Ans: about 7GB free for all the containers to be provisioned and then the psql still needs to run and ingest the taxi data, so maybe 10gb in total?',
  'section': 'Workshop 2 - RisingWave',
  'question': 'Setup - Qn: How much free disk space should we have? [source]',
  'course': 'data-engineering-zoomcamp'},
 {'text': 'TODO',
  'section': '10. Kubernetes and TensorFlow Serving',
  'question': 'How to get started with Week 10?',
  'course': 'machine-learning-zoomcamp'},
 {'text': 'TODO',
  'section': '8. Neural Networks and Deep Learnin

In [61]:
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv()  # defaults to .env in the same directory

client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": "tell me a joke"}]
)
print(response.choices[0].message.content)

Why did the scarecrow win an award? 

Because he was outstanding in his field!


In [62]:
!wget https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


--2025-07-03 14:18:28--  https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8002::154, 2606:50c0:8003::154, 2606:50c0:8000::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8002::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3485 (3.4K) [text/plain]
Saving to: ‘chat_assistant.py’


2025-07-03 14:18:28 (7.62 MB/s) - ‘chat_assistant.py’ saved [3485/3485]



In [74]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.

At the end of each response, ask the user a follow up question based on your answer.
""".strip()

In [89]:
search_tool = {
    "type": "function",
    "function": {
        "name": "vector_search",
        "description": "Search for documents in the knowledge base",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The search query"
                }
            },
            "required": ["query"]
        }
    }
}

tools = [search_tool]

chat_messages = [
    {"role": "developer", "content": developer_prompt},
    {"role": "user", "content": query}
]

response = client.chat.completions.create(
    model='gpt-4o-mini',
    messages=chat_messages,
    tools=tools
)
print(response.choices[0].message.content)

None


In [98]:
import importlib
import chat_assistant
importlib.reload(chat_assistant)

tools = chat_assistant.Tools()


In [99]:
tools.add_tool(vector_search, search_tool)

tools.get_tools()


[{'type': 'function',
  'function': {'name': 'vector_search',
   'description': 'Search for documents in the knowledge base',
   'parameters': {'type': 'object',
    'properties': {'query': {'type': 'string',
      'description': 'The search query'}},
    'required': ['query']}}}]

In [100]:

developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.

At the end of each response, ask the user a follow up question based on your answer.
""".strip()

chat_interface = chat_assistant.ChatInterface()

chat = chat_assistant.ChatAssistant(
    tools=tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    client=client
)
chat.run()

vector_search is used


vector_search is used


vector_search is used


Chat ended.
