In [1]:
# Take database and fit a toy search 
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x794ff348f310>

In [2]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [5]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt
    
def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results


def rag(query):
    search_results = search(query) #toy search engine 
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [8]:
query = "I just discovered the course. Can I still join it?"
rag(query)

' Yes, you can still join the course even after its start date. The materials will be kept available for you to follow at your own pace and continue preparing with homeworks and working on final projects. You can also subscribe to the Google Calendar event and register before the course starts as it is scheduled to begin on 15th Jan 2 Points:\n\n- Subscription link not provided in the context, hence cannot provide a specific URL here. However, you are directed to subscribe to an associated Google Calendar by visiting their respective page for desktop users.\n\n- To register before the course starts, use the given link (not explicitly stated but mentioned as "this link" in the FAQ).'

### Option 1 

For Linux
```bash
curl -fsSL https://ollama.com/install.sh | sh

ollama start
ollama pull phi3
ollama run phi3

```
### Option 2

 It is running a container for the Ollama project. 
```bash
docker run -it \
    -v ollama:/root/.ollama \
    -p 11434:11434 \
    --name ollama \
    ollama/ollama

```
pulling the model 

```bash
docker exec -it ollama bash
ollama pull phi3
```

### Option 3

```bash
docker-compose up

docker exec -it ollama bash
ollama pull phi3

```


## With Elastic Search

In [9]:
from elasticsearch import Elasticsearch
from tqdm.auto import tqdm

In [10]:
es_client = Elasticsearch("http://localhost:9200")

In [11]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name  = "course-questions"

es_client.indices.create(index=index_name, body = index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [12]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc )

  0%|          | 0/948 [00:00<?, ?it/s]

In [13]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    # iterate through the internal structure 
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs


elastic_search(query)

[{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
  'section': 'General course-related questions',
  'question': 'Course - Can I still join the course after the start date?',
  'course': 'data-engineering-zoomcamp'},
 {'text': 'Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.\nYou can also continue looking at the homeworks and continue preparing for the next cohort. I guess you can also start working on your final capstone project.',
  'section': 'General course-related questions',
  'question': 'Course - Can I follow the course after it finishes?',
  'course': 'data-engineering-zoomcamp'},
 {'text': 'You can start by installing and setting up all the dependencies and requirements:\nGoogle cloud account\nGoogle Cloud SDK\nPython 3 (insta

In [14]:
def rag(query):
    search_results = elastic_search(query)  
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

rag(query)

' Yes, you can still join the course even if you discover it after its start date. You will have access to all the materials after the course finishes, so you can follow along at your own pace. Additionally, you are eligible to submit homework assignments and work on final capstone projects even without registering initially.\n\nHowever, please keep in mind that there will be deadlines for turning in final project submissions, and it is recommended not to leave everything until the last minute.'