In [22]:
import logging
import sys
import os

import tiktoken
import openai
from dotenv import load_dotenv
load_dotenv()

from llama_index import (
   VectorStoreIndex,
   SimpleDirectoryReader,
   OpenAIEmbedding,
   PromptHelper,
   ServiceContext
)
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.storage.storage_context import StorageContext
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.vector_stores import ElasticsearchStore
from llama_index.postprocessor import SentenceTransformerRerank
from llama_index.schema import QueryBundle
from llama_index.llms import OpenAI

from prompts import REWRITE_QUERIES_TEMPLATE, text_qa_template


logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


os.environ['OPENAI_API_KEY'] = os.environ['OPENAI_API_KEY']
openai.api_key = os.getenv('OPENAI_API_KEY')
llm = OpenAI(model="gpt-3.5-turbo")


def generate_queries(query: str, num_queries: int = 3):
   response = llm.predict(
      REWRITE_QUERIES_TEMPLATE, num_queries=num_queries, query=query
   )

   queries = response.split("\n")
   queries_str = "\n".join(queries)
   print(f"Generated queries:\n{queries_str}")
   print("="*100)

   return queries


In [23]:

class ChatEngine:
    def __init__(self, documents_path="../../data/", new_indexing=False):
        self.vector_store = ElasticsearchStore(
                                es_url="http://localhost:9200",
                                index_name="law_index",
                            )
        self.node_parser = SentenceWindowNodeParser.from_defaults(
            window_size=3,
            window_metadata_key="window",
            original_text_metadata_key="original_text",
        )
        self.llm = OpenAI(model='gpt-3.5-turbo', temperature=0.7, max_tokens=256)
        self.embed_model = OpenAIEmbedding()
        self.prompt_helper = PromptHelper(
                                context_window=4096,
                                num_output=256,
                                chunk_overlap_ratio=0.1,
                                chunk_size_limit=None
                            )
        self.service_context = ServiceContext.from_defaults(
                                llm=llm,
                                embed_model=self.embed_model,
                                node_parser=self.node_parser,
                                prompt_helper=self.prompt_helper
                            )

        self.documents = SimpleDirectoryReader(documents_path).load_data()
        self.sentence_nodes = self.node_parser.get_nodes_from_documents(self.documents)
        self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
        self.index = VectorStoreIndex.from_vector_store(
            self.vector_store,
            storage_context=self.storage_context,
            service_context=self.service_context
        )
        if new_indexing:
            self.index = VectorStoreIndex(
                self.sentence_nodes,
                storage_context=self.storage_context,
                service_context=self.service_context,
            )


    def chat_en(self, queries: list[str], query_origin):

        retriever = self.index.as_retriever(
            similarity_top_k=3,
            # vector_store_query_mode="hybrid",
            alpha=0.5,
            text_qa_template = text_qa_template
        )

        # Get all node after retrieval step
        retrieved_nodes = []
        for query in queries:
            retrieved_nodes += retriever.retrieve(query)
        retrieved_nodes += retriever.retrieve(query_origin)


        # Rerank
        query_bundle = QueryBundle(query_origin)


        rerank = SentenceTransformerRerank(
            top_n = 3,
            model = "BAAI/bge-reranker-base"
        )


        retrieved_nodes = rerank.postprocess_nodes(
            retrieved_nodes, query_bundle
        )

        # Replace with sentence window node
        postprocessor = MetadataReplacementPostProcessor(
            target_metadata_key="window",
        )

        window_nodes = postprocessor.postprocess_nodes(retrieved_nodes)
        for i in window_nodes:
            print('REFRENCES: \n')
            print(i.get_score())
            print(i.get_content())
            print('='*100)


        # Generate response with top_k result
        context_str = "\n\n".join([r.get_content() for r in window_nodes])

        llm = OpenAI(model="gpt-3.5-turbo")
        response = llm.predict(
            text_qa_template, context_str=context_str, query_str=query_origin
        )

        print(response)
        return response

In [24]:
if __name__ == "__main__":
    query = "Các cơ sở của trường đại học Tôn Đức Thắng?"
    queries = generate_queries(query)
    chat = ChatEngine().chat_en(queries, query)

    print(chat)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Generated queries:
1. Các ngành học phổ biến tại trường đại học Tôn Đức Thắng là gì?
2. Có bao nhiêu cơ sở của trường đại học Tôn Đức Thắng và chúng nằm ở đâu?
3. Trường đại học Tôn Đức Thắng có những chương trình đào tạo nào được đánh giá cao?
INFO:elastic_transport.t

Batches: 100%|██████████| 1/1 [00:00<00:00,  1.46it/s]


REFRENCES: 

0.8300757
Có 04 chương trình đạt chuẩn kiểm định FIBAA: Tháng 11/2021, Hội đồng xét công nhận chất lượng của tổ chức FIBAA (Foundation for International Business Administration Accreditation) đã chính thức công nhận và cấp con dấu chất lượng (Quality Seal) cho 4 chương trình đào tạo bậc đại học của Trường Đại học Tôn Đức Thắng (TDTU).  Thời hạn công nhận là 5 năm (2021-2026).  Các chương trình được kiểm định và chứng nhận bởi FIBAA bao gồm: Quan hệ lao động, Quy hoạch vùng và đô thị, Xã hội học, Việt Nam học - chuyên ngành Du lịch và Quản lý du lịch.

 Các cơ sở của trường đại học Tôn Đức Thắng
Trụ sở chính: 19 Nguyễn Hữu Thọ, phường Tân Phong, Quận 7.
 Phân hiệu Khánh Hòa: Số 22, đường Nguyễn Đình Chiểu, Phường Vĩnh Phước, Thành phố Nha Trang, tỉnh Khánh Hòa.
 Cơ sở Bảo Lộc: phường Lộc Tiến, Tp Bảo Lộc, Lâm Đồng.

REFRENCES: 

0.8300757
Có 04 chương trình đạt chuẩn kiểm định FIBAA: Tháng 11/2021, Hội đồng xét công nhận chất lượng của tổ chức FIBAA (Foundation for Internat

In [27]:
import logging
import sys
import os

import tiktoken
import openai
from dotenv import load_dotenv
load_dotenv()


from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI
from llama_index.text_splitter import TokenTextSplitter
from llama_index.node_parser import SimpleNodeParser
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores import ElasticsearchStore
from llama_index.storage.storage_context import StorageContext


from prompts import base_prompt_template

documents = SimpleDirectoryReader("../../data/").load_data()


node_parser = SimpleNodeParser.from_defaults(
  separator=" ",
  chunk_size=1024,
  chunk_overlap=20,
  tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
)

llm = OpenAI(model='gpt-3.5-turbo', temperature=0.7, max_tokens=256)
embed_model = OpenAIEmbedding()

prompt_helper = PromptHelper(
  context_window=4096,
  num_output=256,
  chunk_overlap_ratio=0.1,
  chunk_size_limit=None
)


vector_store = ElasticsearchStore(
  index_name="law_bot",
  es_cloud_id="a360a60c18784a4288ef610006c3b861:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJDAwZGM4M2JiYjU3NjRjZTliZDJlYjEyNTAwNTA2N2MxJDQzOTI5MzIyNGNlMjRiZDZhOTRkODYzOWQyZTNlYWJl",
  es_api_key="bUR1b3lZMEIzSUxOY1MxYjRvMEQ6ZE9PMS01UGlSSVdvdEhncUVkWmlWQQ=="
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

service_context = ServiceContext.from_defaults(
  llm=llm,
  embed_model=embed_model,
  node_parser=node_parser,
  prompt_helper=prompt_helper
)


def indexing_simple_rag(flag=False, path="../../data/"):
    index = None
    if flag == True:
        documents = SimpleDirectoryReader(path).load_data()
        index = VectorStoreIndex.from_documents(
          documents,
          service_context=service_context,
          storage_context=storage_context,
        )
        return index

    index = VectorStoreIndex.from_vector_store(
            vector_store,
            storage_context=storage_context,
            service_context=service_context
    )
    return index


def genaration_qa(question, new_index=False, path="../../data/"):
  if new_index == True:
    index = indexing_simple_rag(flag=new_index, path=path)
  else:
     index = indexing_simple_rag(flag=False)
  query_engine = index.as_query_engine(text_qa_template=base_prompt_template)
  response = query_engine.query(question)
  return response


if __name__ == "__main__":
   ques = genaration_qa(question="Bạn là ai?")
   print(ques)

INFO:elastic_transport.transport:GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
GET https://00dc83bbb5764ce9bd2eb125005067c1.us-central1.gcp.cloud.es.io:443/ [status:200 duration:1.004s]
INFO

In [41]:
import os
import nest_asyncio
from dotenv import load_dotenv

nest_asyncio.apply()
load_dotenv()

from datasets import Dataset

from ragas import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
from ragas.metrics.critique import harmfulness



os.environ['OPENAI_API_KEY'] = os.environ['OPENAI_API_KEY']
openai.api_key = os.getenv('OPENAI_API_KEY')


from prompts import text_qa_template
from simpleRAG import genaration_qa


data_list = [
    {
        'question': "Tác động hạn chế cạnh tranh là gì",
        'ground_truth': "Tác động hạn chế cạnh tranh là tác động loại trừ, làm giảm, sai lệch hoặc cản trở cạnh tranh trên thị trường.",
        'answer': "Tác động hạn chế cạnh tranh là tác động loại trừ, làm giảm, sai lệch hoặc cản trở cạnh tranh trên thị trường.",
        'contexts': ["France is a country located in Western Europe.", "The Eiffel Tower is located in Paris."]
    },
    {
        'question': "Who wrote 'Romeo and Juliet'?",
        'ground_truth': "William Shakespeare",
        'answer': "William Shakespeare",
        'contexts': ["'Romeo and Juliet' is a tragedy written by William Shakespeare.", "It was first published in 1597."]
    },
    {
        'question': "What is the chemical symbol for water?",
        'ground_truth': "H2O",
        'answer': "H2O",
        'contexts': ["Water is a chemical compound composed of two hydrogen atoms and one oxygen atom.", "It is essential for life on Earth."]
    }
]
ds = Dataset.from_list(data_list)



if __name__ == "__main__":
    result = evaluate(
        ds,
        metrics=[
            context_precision,
            faithfulness,
            answer_relevancy,
            context_recall,
        ],
    )

    result



Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST

Evaluating:   8%|▊         | 1/12 [00:02<00:23,  2.11s/it]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST

Evaluating:  17%|█▋        | 2/12 [00:03<00:14,  1.43s/it]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST

Evaluating:  33%|███▎      | 4/12 [00:04<00:06,  1.15it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST

Evaluating:  58%|█████▊    | 7/12 [00:04<00:02,  1.88it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST

Evaluating:  67%|██████▋   | 8/12 [00:05<00:02,  1.82it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://ap

Evaluating:  83%|████████▎ | 10/12 [00:06<00:01,  1.94it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST

Evaluating:  92%|█████████▏| 11/12 [00:06<00:00,  2.24it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Evaluating: 100%|██████████| 12/12 [00:09<00:00,  1.22it/s]
