In [1]:
from openai import OpenAI

In [2]:
client = OpenAI(
    base_url = 'http://localhost:11434/v1/',
    api_key = 'ollama',
)

In [3]:
response = client.chat.completions.create(
    model="phi3",
    messages=[{"role":"user", "content":"Is it too late to join the course?"}]
)

In [4]:
print(response.choices[0].message.content)

 To determine if it's not too late to join a particular course, you would need to consider several factors:

1. **Course Enrollment Deadlines**: Check the specific dates by which enrollments are closed for that course. If today is before this deadline, then it isn't too late yet.

2. **Online vs. On-campus Courses**: Online courses typically have more flexible enrollment periods compared to on-campus courses because they don't require physical attendance during a specific term or semester.

3. **Backup Options**: If the course is fully booked, see if there are alternative sections available for the same content or an equivalent course that fits your schedule and learning goals.

4. **University Policies**: Some institutions might have provisions such as late enrollment policies, where they allow students to join a course at certain periods throughout its duration. However, this could affect tuition fees and scheduling options.

5. **Discuss with the Instructor or Administrator**: Conta

In [4]:
#!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

In [5]:
import minsearch

In [6]:
import json

In [7]:
with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [8]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [9]:
documents[500]

{'text': 'Q: “In lesson 2.8 why is y_pred different from y? After all, we trained X_train to get the weights that when multiplied by X_train should give exactly y, or?”\nA: linear regression is a pretty simple model, it neither can nor should fit 100% (nor any other model, as this would be the sign of overfitting). This picture might illustrate some intuition behind this, imagine X is a single feature:\nAs our model is linear, how would you draw a line to fit all the "dots"?\nYou could "fit" all the "dots" on this pic using something like scipy.optimize.curve_fit (non-linear least squares) if you wanted to, but imagine how it would perform on previously unseen data.\nAdded by Andrii Larkin',
 'section': '2. Machine Learning for Regression',
 'question': 'Why linear regression doesn’t provide a “perfect” fit?',
 'course': 'machine-learning-zoomcamp'}

In [10]:
from minsearch import Index

index = Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [11]:
index.fit(documents)

<minsearch.Index at 0x781cb01aa8f0>

In [12]:
q = 'The course has already started; can I still enroll?'

In [13]:
boost = {'question': 3.0, 'section': 0.5}

results = index.search(
    query=q,
    filter_dict = {'course': 'machine-learning-zoomcamp'},
    boost_dict=boost,
    num_results = 5
)

In [14]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.
If the CONTEXT doesn't contain the answer, output NONE

QUESTION: {question}

CONTEXT: {context}
""".strip()

context = ""

for doc in results:
    context =  context + f"section: : {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

prompt = prompt_template.format(question=q, context=context).strip()

In [15]:
print(prompt)

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.
If the CONTEXT doesn't contain the answer, output NONE

QUESTION: The course has already started; can I still enroll?

CONTEXT: section: : General course-related questions
question: The course has already started. Can I still join it?
answer: Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.
In order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.

section: : General course-related questions
question: I don't know math. Can I take the course?
answer: Yes! We'll cover some linear algebra in the course, but in general, there will be very few formulas, mostly code.

In [16]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict = {'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results = 5
    )

    return results

In [17]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.
If the CONTEXT doesn't contain the answer, output NONE

QUESTION: {question}

CONTEXT: {context}
""".strip()

    context = ""
    
    for doc in search_results:
        context =  context + f"section: : {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [18]:
def llm(prompt):
    response = client.chat.completions.create(
        model="phi3",
        messages=[{"role":"user", "content": prompt}]
    )

    return response.choices[0].message.content

In [55]:
query = "How do I run Kafka?"

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [56]:
rag('The course has already started, can I still enroll?')

' Yes, even if you don\'t register before the start date, you\'re still eligible to submit the homeworks. Be aware that there will be deadlines for turning in the final projects.\n\n(Note: The provided answer does not explicitly state "enroll" after the course has started, but it implies a possibility of late submission, which might be considered as an analogous way of answering the question about enrollment post start.)'

In [19]:
from elasticsearch import Elasticsearch

In [20]:
es_client = Elasticsearch('http://localhost:9200')

In [21]:
es_client.info()

ObjectApiResponse({'name': '6e57ca44bcbd', 'cluster_name': 'docker-cluster', 'cluster_uuid': '6w1Eq5HJTS2uqs76wVL7Sg', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [22]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [23]:
from tqdm.auto import tqdm

In [24]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/948 [00:00<?, ?it/s]

In [25]:
query = "I just discovered the course. Can I still join?"

In [26]:
def elastic_search(query):

    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)

    result_docs = []

    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [27]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [28]:
rag(query)

' Yes, you can still join the course even after the start date. The materials will be kept available for self-paced study after the course finishes as well.\n\nsection: : General course-related questions\nquestion: Course - Can I follow the course after it finishes?\nanswer: Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes...\n\noutput: Yes'