<a href="https://colab.research.google.com/github/duongtran96/Project_LLM/blob/main/RAG_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Trắc Nghiệm**

In [None]:
#1: B Retrieval Augmented Generation.
#2: A Truy vấn thêm các thông tin bên ngoài.
#3

## Install the necessary library packages


In [None]:
!pip install  -q transformers==4.41.2
!pip install  -q  bitsandbytes==0.43.1
!pip install  -q  accelerate==0.31.0
!pip install  -q  langchain==0.2.5
!pip install  -q  langchainhub==0.1.20
!pip install  -q  langchain-chroma==0.1.1
!pip install  -q  langchain-community==0.2.5
!pip install  -q  langchain-openai==0.1.9
!pip install  -q  langchain_huggingface==0.0.3
!pip install  -q  chainlit==1.1.304
!pip install  -q  python-dotenv==1.0.1
!pip install  -q  pypdf==4.2.0
!pip install  -q  numpy=1.24.2
!npm install  -g  localtunnel

## Setup vector database

In [None]:
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface.llms import HuggingFacePipeline

from langchain.memory import ConversationBufferMemory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.chains import ConversationalRetrievalChain

from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub

## Read File pdf
Loader = PyPDFLoader
FILE_PATH = "/content/YOLOv10_Tutorials.pdf"
loader = Loader(FILE_PATH)
documents = loader.load()

## Text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)

docs = text_splitter.split_documents(documents)

print("Number of sub-documents: ", len(docs))
print(docs[0])

##Initialize instance vectorization
embedding = HuggingFaceEmbeddings()

## Initialize Vector Database
vector_db = Chroma.from_documents(documents = docs, embedding = embedding)
retriever = vector_db.as_retriever()

## Initialize Large Language Models

# **Initialize** Large Language Models

In [None]:
## Declare some settings needed for the model
nf4_config =  BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_use_double_quant = True,
    bnb_4bit_compute_dtype = torch.bfloat16
)

## Initizalize model and tokenizer
model_name = "lmsys/vicuna-7b-v1.5"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = nf4_config,
    low_cpu_mem_usage = True
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

## Integrate tokenizer and model into one pipeline for ease of use
model_pipeline = pipeline(
    "text-generation",
    model = model,
    tokenizer = tokenizer,
    max_new_tokens = 512,
    pad_token_id = tokenizer.eos_token_id,
    device_map = 'auto'
)

llm = HuggingFacePipeline(
    pipeline = model_pipeline,
)

## run program
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)



In [None]:
USER_QUESTION = "Bài toán Object Detection"
output = rag_chain.invoke(USER_QUESTION)
answer = output.split("Answer: ")[1].strip()
print(answer)