# Cài đặt và import các module cần thiết

In [28]:
%pip install "unstructured[all-docs]" unstructured-client watermark langchain-groq langchain fastembed qdrant_client python-dotenv


[0m

In [29]:
import os
import textwrap

import chromadb
import langchain
import openai
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader, UnstructuredPDFLoader, YoutubeLoader, PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.llms import GPT4All
from pdf2image import convert_from_path
from langchain import LLMChain, PromptTemplate
from langchain_groq import ChatGroq



# Thiết lập call API của Grog và test mô hình trước khi thực hiện RAG

In [30]:
from groq import Groq

# Set the environment variable
os.environ['GROQ_API_KEY'] = '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'

# Initialize the client using the environment variable
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": """
<|im_start|>system
You are a Japanese history chatbot. You will receive a multi-choices question about Japanese history and you must present the right answer as well
as some explainations if possible

<|im_start|>user
### Question:
Who is the de-facto leader of the Western Army in 1600 :
### Choices:
A. Akechi Mitsuhide
B. Mori Terumoto
C. Tokugawa Ieyasu
D. Ishida Mitsunari
### Answer:

<|im_start|>assistant
""".strip(),
        }
    ],
    model="gemma-7b-it",
)

print(chat_completion.choices[0].message.content)


**Answer:** C. Tokugawa Ieyasu

**Explanation:** Tokugawa Ieyasu played a pivotal role in the Battle of Sekigahara in 1600, which effectively ended the Sengoku Period and paved the way for the establishment of the Tokugawa Shogunate. His military prowess and diplomatic skills enabled him to consolidate power and become the de-facto leader of the Western Army.


# RAG

## Load data và thực hiện chia nhỏ thành các chunks

In [31]:
# Chuyển file PDF về dạng text
pdf_loader = UnstructuredPDFLoader("data.pdf")
pdf_pages = pdf_loader.load_and_split()

# Text Splitters
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(pdf_pages)
len(texts)





58

## Thực hiện embedding và lưu vào vector db

In [33]:
# Sử dụng mô hình embedding
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
hf_embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)

# Chuyển toàn bộ text thông qua mô hình embedding về dạng vector và lưu dưới dạng db
db = Chroma.from_documents(texts, hf_embeddings, persist_directory="db")




<langchain_community.vectorstores.chroma.Chroma at 0x793fe49c4040>

In [34]:
db

<langchain_community.vectorstores.chroma.Chroma at 0x793fe49c4040>

## Thiết lập custom prompt và tạo chain

In [35]:
custom_prompt_template = """
You are a chatbot with extensive knowledge of Japanese history. Use the following information to answer the user's question.
If you don't know the answer, just say you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

"""

In [36]:
prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])

In [37]:
from langchain.prompts.prompt import PromptTemplate
from langchain_groq import ChatGroq
from langchain.memory import ChatMessageHistory
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

retriever = db.as_retriever()

model =ChatGroq(
            groq_api_key=os.environ.get("GROQ_API_KEY"),
            model_name="gemma-7b-it" )

chain = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    chain_type_kwargs={'prompt': prompt}
)


## Thử nghiệm mô hình sau khi RAG

In [40]:
response = chain.run("""Who is the de-facto leader of the Western Army in 1600 :
### Choices:
A. Akechi Mitsuhide
B. Mori Terumoto
C. Tokugawa Ieyasu
D. Ishida Mitsunari""")
response

'**D. Ishida Mitsunari**\n\nThe provided text explicitly states that Ishida Mitsunari was the de facto head of the Western Army in 1600.'