In [1]:
from openai import OpenAI
import os
import json
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200')
es_client.info()

ObjectApiResponse({'name': '682b603f9a13', 'cluster_name': 'docker-cluster', 'cluster_uuid': 't2IA-dk1T3SC8XOp5J4WGA', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [8]:
client = OpenAI(base_url= 'http://localhost:11434/v1/',
               api_key="ollama"
               )

In [3]:
with open('documents.json', 'r') as f:
    doc_raws = json.load(f)

documents = []
for course_dict in doc_raws:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [4]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}
index_name = "course-question"
es_client.indices.create(index = index_name, body = index_settings)
from tqdm.auto import tqdm
for doc in tqdm(documents):
    es_client.index(index = index_name, document = doc)

BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [course-question/dn1RaoDnQ5ugTHCR0h6M7g] already exists')

In [5]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}
    result  = index.search(query = query,
                 boost_dict = boost,
                 num_results = 5
                )
    return result

def elastic_search (query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }
    es_result = es_client.search(index = index_name, body=search_query)
    es_result_docs = []
    for hit in es_result['hits']['hits']:
        es_result_docs.append(hit['_source'])


    return es_result_docs
    
def build_prompt(query, search_result):

    context = ""
    for doc in search_result:
        context =context + f"section :{doc['section']} \nquestion:{doc['question']}\nanswer:{doc['text']}\n\n"
    
    prompt_template = """
    You are a course teaching assistant. Answer the given QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION: {question}\n
    CONTEXT: {context}
    """.strip()
    prompt = prompt_template.format(question = query, 
                                    context = context).strip()
    return prompt

def llm (prompt):
    response = client.chat.completions.create(
        model= 'phi3',
        messages = [{'role':'user', 'content':prompt}])
    return response.choices[0].message.content

In [6]:
def rag(query):
    results = elastic_search(query)#search(query)
    prompt = build_prompt(query, results)
    answer = llm(prompt)
    return answer

In [10]:
query = "i just discovered about the course, but can i still join it ?"
rag(query)

' According to our Frequently Asked Questions (FAQ) database, here is a relevant answer regarding joining courses at your convenience: \n\n"Yes, you may be able to join late for some of our classes if we have openings. However, this largely depends on the availability and size of these specific sessions. We suggest that potential students register early as seats are limited and might fill quickly."'