# Simple RAG pipeline using [Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)
- Note: It will be quite slow without GPU.

In [13]:
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from elasticsearch import Elasticsearch
from tqdm.auto import tqdm

## Build the elasticsearch index

In [12]:
# make sure have elasticsearch running (01-practice-rag-intro to see how to run it)
es_client = Elasticsearch("http://localhost:9200")
print(es_client.info())


{'name': 'b5f0476ddbbb', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'h9uyOt7hRT2KBBtjRNlCWg', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [14]:
# build the index
index_name = "course-questions"

# load the raw document
with open('data/documents.json', 'rt') as f:
    docs_raw = json.load(f)

# create documents database (list of dict)
documents = []
for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

# index settings
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "question": {"type": "text"},
            "section": {"type": "text"},
            "course": {"type": "keyword"}
        }
    }
}

index_name = "course-questions"
es_client.indices.create(index=index_name, body=index_settings)

# index the documents
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/948 [00:00<?, ?it/s]

In [16]:
# function to search the index
def elastic_search(query):
    search_query = {
    "size": 5, 
    "query": {
        "bool": {  
            "must": {
                "multi_match": { 
                    "query": query, 
                    "fields": ["question^3", "text", "section"],
                    "type": "best_fields" 
                }
            },
            "filter": {
                "term": {
                    "course": "data-engineering-zoomcamp" 
                }
            }
        }
    }
}
    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

elastic_search("How do I get the certificate?")

[{'text': "No, you can only get a certificate if you finish the course with a “live” cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the time the course is running.",
  'section': 'General course-related questions',
  'question': 'Certificate - Can I follow the course in a self-paced mode and get a certificate?',
  'course': 'data-engineering-zoomcamp'},
 {'text': "All mage files are in your /home/src/folder where you saved your credentials.json so you should be able to access them locally. You will see a folder for ‘Pipelines’,  'data loaders', 'data transformers' & 'data exporters' - inside these will be the .py or .sql files for the blocks you created in your pipeline.\nRight click & ‘download’ the pipeline itself to your local machine (which gives you metadata, pycache and other files)\nAs above, download each .py/.sql file that corresponds to each block y

## Load the model

In [17]:
torch.random.manual_seed(42)

<torch._C.Generator at 0x10cc1f6d0>

In [20]:
model_name = "microsoft/Phi-3.5-mini-instruct"
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="mps",
                                             torch_dtype='auto',
                                             trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [22]:
messages = [
    {"role": "user", "content": "The course has already started, can I still join?"}]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False, # if true, the model will return the full text of the input prompt
    "temperature": 0.5,
    "do_sample": False,
}

output = pipe(messages, **generation_args)
print(output[0]['generated_text'])



 Whether you can join a course that has already started depends on several factors, including the policies of the institution or organization offering the course, the nature of the course itself, and the specific circumstances. Here are some considerations:

1. **Institutional Policy**: Some courses have strict enrollment policies, while others may allow late enrollment under certain conditions. Check with the course coordinator or the institution's registrar to understand their policy.

2. **Course Capacity**: If the course has limited seats and is fully booked, it may not be possible to join. However, if there are still openings, you might be able to enroll.

3. **Waitlist**: In some cases, you can join a waitlist for a course that is already full. If a spot opens up, you may be able to take the course.

4. **Special Circumstances**: Some institutions may make exceptions for students who have extenuating circumstances, such as illness or family emergencies.

5. **Online Courses**: If

In [23]:
# define functions to build the prompt
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


def llm(prompt):
    messages = [
        {"role": "user", "content": prompt},
    ]

    generation_args = {
        "max_new_tokens": 500,
        "return_full_text": False,
        "temperature": 0.0,
        "do_sample": False,
    }

    output = pipe(messages, **generation_args)
    return output[0]['generated_text'].strip()

# updated rag(query)
def rag_pipeline(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

rag_pipeline("The course has already started, can I still join?")



"Based on the provided context, you can still join the course even after the start date. The context mentions that you can submit homework even if you don't register, but be mindful of the deadlines for final projects. There is no mention of restrictions on joining after the course has started, so it implies that you are still eligible to join."