In [3]:
# ollama pull llama3.2
import json

from ollama import chat
from ollama import ChatResponse

from qdrant_client import QdrantClient, models
from fastembed import TextEmbedding

from tqdm.auto import tqdm


EMBEDDING_DIMENSIONALITY = 512
MODEL_HANDLE             = "jinaai/jina-embeddings-v2-small-en"
COLLECTION_NAME          = "rag-vector-search"


```
docker pull qdrant/qdrant

docker run -p 6333:6333 -p 6334:6334 \
   -v "$(pwd)/qdrant_storage:/qdrant/storage:z" \
   qdrant/qdrant
```

In [5]:
def prepare_document(document_path: str):
    with open(document_path, "r") as f_in:
        raw_documents = json.load(f_in)

    documents = []

    for course_dict in raw_documents:
        for doc in course_dict["documents"]:
            doc["course"] = course_dict["course"]
            documents.append(doc)

    return documents

In [9]:
documents = prepare_document("../01_introduction/documents.json")

documents[100]

{'text': 'In this section of the course, the 5432 port of pgsql is mapped to your computer’s 5432 port. Which means you can access the postgres database via pgcli directly from your computer.\nSo No, you don’t need to run it inside another container. Your local system will do.',
 'section': 'Module 1: Docker and Terraform',
 'question': 'PGCLI - INKhould we run pgcli inside another docker container?',
 'course': 'data-engineering-zoomcamp'}

In [2]:
qd_client = QdrantClient("http://localhost:6333") #connecting to local Qdrant instance

In [4]:
qd_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,  # Dimensionality of the vectors
        distance=models.Distance.COSINE  # Distance metric for similarity search
    )
)

True

In [10]:
points = []

for id, doc in enumerate(documents):
    text = doc["question"] + " " + doc["text"]
    vector = models.Document(text=text, model=MODEL_HANDLE)

    point = models.PointStruct(
        id=id,
        vector=vector, # embed text locally with "jinaai/jina-embeddings-v2-small-en" from FastEmbed
        payload=doc
    )
    points.append(point)

qd_client.upsert(
    collection_name=COLLECTION_NAME,
    points=points
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [11]:
qd_client.create_payload_index(
    collection_name=COLLECTION_NAME,
    field_name="course",
    field_schema="keyword" # exact matching on string metadata fields
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [24]:
def vector_search(query, course="mlops-zoomcamp", limit=5):

    query_points = qd_client.query_points(
        collection_name=COLLECTION_NAME,
        query=models.Document(
            text=query,
            model=MODEL_HANDLE
        ),
        query_filter=models.Filter( # filter by course name
            must=[
                models.FieldCondition(
                    key="course",
                    match=models.MatchValue(value=course)
                )
            ]
        ),
        limit=limit, # top closest matches
        with_payload=True # to get metadata in the results
    )

    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

In [13]:
def llm(question, results):
  
  prompt_template = """
  you're a course teaching assistant. answer the QUESTION based on the context below. 
  use only the facts in the context to answer the question. 
  if you don't know the answer, say "I don't know".

  QUESTION: {question}

  CONTEXT: {context}
  """

  context = "" 

  for doc in results:
      context = context + f"\nsection: {doc['section']}\n" + \
          f"question: {doc['question']}\n" + \
          f"text: {doc['text']}\n\n"
  prompt = prompt_template.format(question=question, context=context).strip()

  response: ChatResponse = chat(model="llama3.2", messages=[
    {
      "role": "user",
      "content": prompt,
    },
  ])

  return (response.message.content)

In [14]:
def rag(question: str, 
        num_results: int = 5, 
        course: str = "data-engineering-zoomcamp"
        ) -> str:
    # documents = prepare_document("documents.json")
    results = vector_search(question, course=course, limit=num_results)
    
    if len(results) == 0:
        return "I don't know"

    answer = llm(question, results)

    return answer

In [16]:
question = "the course already started, can I still join?"

In [25]:
results = vector_search(question)

In [None]:
from pprint import pprint

pprint(results)

[{'course': 'mlops-zoomcamp',
  'question': 'What’s the difference between the 2023 and 2022 course?',
  'section': '+-General course questions',
  'text': 'The difference is the Orchestration and Monitoring modules. Those '
          'videos will be re-recorded. The rest should mostly be the same.\n'
          'Also all of the homeworks will be changed for the 2023 cohort.'},
 {'course': 'mlops-zoomcamp',
  'question': 'Is the AWS free tier enough for doing this course?',
  'section': 'Module 1: Introduction',
  'text': 'For many parts - yes. Some things like kinesis are not in AWS free '
          'tier, but you can do it locally with localstack.'},
 {'course': 'mlops-zoomcamp',
  'question': 'Will there be a 2024 Cohort? When will the 2024 cohort start?',
  'section': '+-General course questions',
  'text': 'Yes, it will start in May 2024'},
 {'course': 'mlops-zoomcamp',
  'question': 'Are we free to choose our own topics for the final project?',
  'section': '+-General course quest

In [28]:
rag(question)

'Based on the context, I\'d say that yes, you can still join the course after the start date has passed. The text states: "Yes, even if you don\'t register, you\'re still eligible to submit the homeworks." This implies that joining the course at any time is possible, as long as you complete the assignments and meet the project deadlines.'