In [15]:
from pymilvus import MilvusClient

mv_client = MilvusClient("course_info.db")

In [16]:
if mv_client.has_collection(collection_name="course_info"):
    mv_client.drop_collection(collection_name="course_info")
mv_client.create_collection(
    collection_name="course_info",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)

In [17]:
import json 
with open('document.json') as f_in: 
    docs_raw = json.load(f_in)
documents = []
for course_dict in docs_raw : 
    for doc in course_dict['documents'] : 
        doc['course'] = course_dict['course']
        documents.append(doc)

In [18]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-mpnet-base-v2")



In [19]:
#created the dense vector using the pre-trained model
operations = []
for i, doc in enumerate(documents):
    doc["id"] = i
    doc["vector"] = model.encode(doc["text"]).tolist()
    operations.append(doc)

In [20]:
print("Data has", len(documents), "entities, each with fields: ", documents[0].keys())
print("Vector dim:", len(documents[0]["vector"]))

Data has 948 entities, each with fields:  dict_keys(['text', 'section', 'question', 'course', 'id', 'vector'])
Vector dim: 768


In [21]:
res = mv_client.insert(collection_name="course_info", data=documents)

print(res)

{'insert_count': 948, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 

In [36]:
def elastic_search(query) : 
    query_vectors = [model.encode(query)]
    res = mv_client.search(
        collection_name="course_info",  
        data=query_vectors,  
        filter="course == 'data-engineering-zoomcamp'",
        limit=5,  
        output_fields=["text", "question", "section", "course"],  
    )
    return res[0]

In [42]:
def build_prompt(query, search_result) : 
    prompt_template = """
        You're a course teaching assistant. Answer the QUESTION based on the the CONTEXT. 
        Use only the facts from the CONTEXT when answering the QUESTION. 
        If the CONTEXT doesn't contain the answer, output NONE 

        QUESTION: {question}

        CONTEXT: 
        {context}
        """.strip()
    context = ""

    for doc in search_result: 
        entity = doc['entity']
        context = context + f"section: {entity['section']}\nquestion: {entity['question']}\nanswer: {entity['text']}\n\n"
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt
    

In [44]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',)
def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [45]:
def rag(query): 
    results = elastic_search(query)
    prompt = build_prompt(query, results)
    answer = llm(prompt)
    return answer

In [48]:
print(rag('I just discovered the course. Can i still join ?'))

Based on the CONTEXT provided:

It appears that since you're just now discovering or registering for this Data Engineering Bootcamp hosted by Coursera through DATAtalkSquare, there is still time to join if registration hasn't been processed yet as courses typically have a period during which they remain open. The context does not explicitly state whether it’s still possible to enroll after certain deadlines or policies are in place; therefore from this information alone I would say YES, you could likely register even though the course has started since registration usually opens before the start date of 15th Jan 2024 at 17h00 as per Question: "Course - When will the course start?" Here's your answer assuming a general policy for educational institutions allowing prospective students to still enroll until official class materials or sessions begin.

As there is no specific mention of deadlines, policies on joining after starting in other sections of questions within these provided conte