## Creating ElasticSearch Index

In [3]:
from elasticsearch import Elasticsearch

In [4]:
es_client = Elasticsearch('http://localhost:9200')

In [5]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "category": {"type": "text"},
            "situation_en": {"type": "text"},
            "situation_kr": {"type": "text"},
            "question_en": {"type": "text"},
            "question_kr": {"type": "text"},
            "answer_en": {"type": "text"},
            "answer_kr": {"type": "text"},
        }
    }
}

index_name = "learning-english"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'learning-english'})

## Load documents

In [7]:
import json
from tqdm import tqdm

with open('../situations.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

documents = []

for doc in tqdm(docs_raw):
    documents.append(doc)

100%|██████████| 129/129 [00:00<00:00, 1332672.95it/s]


In [8]:
documents[0]

{'category': 'Restaurant',
 'situation_en': 'You are at a restaurant and your food is taking too long.',
 'situation_kr': '당신은 식당에 있는데 음식이 너무 오래 걸리고 있습니다.',
 'question_en': 'Why is my food taking so long?',
 'question_kr': '내 음식이 왜 이렇게 오래 걸리나요?',
 'answer_en': 'The kitchen is busy, but your food will be ready soon.',
 'answer_kr': '주방이 바쁘지만, 곧 음식이 준비될 것입니다.'}

In [9]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

100%|██████████| 129/129 [00:00<00:00, 271.06it/s]


In [10]:
query = 'how do I order decaf coffee?'

In [50]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ['category', 'situation_en^3', 'situation_kr^3', 'question_en', 'question_kr', 'answer_en', 'answer_kr'],
                        "type": "best_fields"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append([hit['_source'], hit['_score']])
    
    return result_docs

In [54]:
elastic_search('how to order a book?')

[[{'category': 'Bookstore',
   'situation_en': 'You are at a bookstore and want to know if they can order a book for you.',
   'situation_kr': '당신은 서점에 있으며 책을 주문할 수 있는지 알고 싶습니다.',
   'question_en': 'Can you order this book for me?',
   'question_kr': '이 책을 주문해 주시겠어요?',
   'answer_en': 'Yes, we can order it and it should arrive within a week.',
   'answer_kr': '네, 주문할 수 있으며 일주일 내로 도착할 예정입니다.'},
  24.009493],
 [{'category': 'Library',
   'situation_en': 'You are at a library and need help finding a book.',
   'situation_kr': '당신은 도서관에 있으며 책을 찾는 데 도움이 필요합니다.',
   'question_en': 'Can you help me find this book?',
   'question_kr': '이 책을 찾는 데 도와주시겠어요?',
   'answer_en': "Sure, it's located in the fiction section on the second floor.",
   'answer_kr': '물론이죠, 이 책은 2층 소설 코너에 있습니다.'},
  13.687327],
 [{'category': 'Flight',
   'situation_en': 'You are trying to book a flight and need to know the baggage policy.',
   'situation_kr': '당신은 항공편을 예약하려고 하며 수하물 정책을 알고 싶습니다.',
   'question_en': 'What is 

## Retrival

In [57]:
def build_prompt(query, search_results):
    prompt_template = """
You are an English teacher who is helping students learn English expressions for different real-life situations. 
Based on the provided CONTEXT provide an appropriate response in English and Korean to the user's QUESTION.
Use only the facts from the CONTEXT to answer the QUESTION. Do not add any additional information.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = "\n".join(
        f"Category: {doc[0]['category']}\n"
        f"Situation English: {doc[0]['situation_en']}\n"
        f"Situation Korean: {doc[0]['situation_kr']}\n"
        f"Question English: {doc[0]['question_en']}\n"
        f"Question Korean: {doc[0]['question_kr']}\n"
        f"Answer English: {doc[0]['answer_en']}\n"
        f"Answer Korean: {doc[0]['answer_kr']}\n"
        for doc in search_results
    )
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [58]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    prompt = ChatPromptTemplate.from_template(prompt)
    llm = OllamaLLM(model="llama3.1")
    chain = prompt | llm
    answer = chain.invoke({})
    
    return answer

In [59]:
answer = rag('what do I say if I want to order a decaf coffee?')
print(answer)

Based on the provided CONTEXT, which is a cafe situation where you want to know if they can offer refills on coffee, here's the response:

**Question:** What do I say if I want to order a decaf coffee?

**English Response:** Do you have decaf coffee?
**Korean Response:** 디카페인 커피가 있어요?
