In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from tqdm.autonotebook import tqdm as notebook_tqdm
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530"

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings
# Load model embedding
model_name = "Alibaba-NLP/gte-large-en-v1.5"
model_kwargs = {'device': 'cuda', 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': False}
cache_folder = "/workspace/vinhnq/cache_weights"
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
    cache_folder=cache_folder
)

  embeddings = HuggingFaceEmbeddings(


In [7]:
from langchain.vectorstores import Milvus
# Kết nối với database đã tạo sẵn
NAME="Vinhv2"
vector_db = Milvus(
    embeddings,
    connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT},
    collection_name=NAME,
)

  vector_db = Milvus(


In [8]:
# Chạy trước với RAG
from time import perf_counter as timer
start_time = timer()
query = "what is deep learning?"
docs = vector_db.similarity_search_with_score(query, k=3)
end_time = timer()

print(f"[INFO] Time taken to get scores: {end_time-start_time:.5f} seconds.")

# Score càng thấp càng tốt
print("Query:", query)
print("Result:")
for doc, score in docs:
    print("----------------------")
    print("Score", score)
    print(doc.page_content)

[INFO] Time taken to get scores: 0.66678 seconds.
Query: what is deep learning?
Result:
----------------------
Score 0.7429640293121338
Accepted as a long survey paper at ACM CSUR 2021
A Deep Learning Classiﬁers: Mathematical and Technical Background
A.1 Deep Neural Networks (DNNs)
Neural networks are a class of machine learning models made up of layers of neurons (elementary computing
units).
A neuron takes an n-dimensional feature vector x= [x1,x2...xn]from the input or the lower level neuron
and outputs a numerical output y= [y1,y2...ym], such that
yj=φ(∑n
i=1wjixi+bj) (15)
to the neurons in higher layers or the output layer. For the neuron j, yjis the output and bjis the bias term,
while wjiare the elements of a layer’s weight matrix. The function φis the nonlinear activation function,
----------------------
Score 0.7345472574234009
[55] and AlexNet [56] contain few layers. In 2014, Simonyan and Zisserman [57] explored a deeper CNN
model called VGGNet, which contains 19 layers, and

In [None]:
import transformers
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import login
from langchain.llms import HuggingFacePipeline
#login(token = "Your token")
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

# Gọi model llama
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
pipeline = transformers.pipeline(
    "text-generation",
    return_full_text=True,
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
    temperature=0.1,
    max_new_tokens=512,
    repetition_penalty=1.1,
    num_return_sequences=1,
    )
llm = HuggingFacePipeline(pipeline=pipeline)


2024-11-30 09:04:34.738787: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-30 09:04:34.738839: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-30 09:04:34.741113: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-30 09:04:34.749885: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/vinhnq/.cache/huggingface/token
Login successful


Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.11s/it]
  llm = HuggingFacePipeline(pipeline=pipeline)


In [None]:
from langchain.prompts import ChatPromptTemplate

template = """
Human: You are a chatbot designed to support experts in the field of computer science by using the Retrieval-Augmented Generation (RAG) technique. You need to provide accurate, detailed, and in-depth answers on topics related to computer science. Please answer the experts' queries with all your dedication. Here are the specific requirements:

1. Ensure that your answers are based on the most recent and accurate information available.
2. If the question exceeds your current knowledge, admit that you don't know and apologize to the experts.
3. For technical terms, provide answers with abbreviations, for example, Deep Learning (DL), Machine Learning (ML), Large Language Models (LLMs).
4. Structure your answers meticulously as if presenting a detailed report:
   - Emphasize major points with headings or numbered sections.
   - Use bullet points for key lists or steps.
   - Highlight important considerations or cautions with marked notes (*).
5. Provide references to the documents you use for your answer.
6. Consider the answer based on the context below, which is retrieved information from relevant documents, but remember not to explicitly state that you are answering based on context.
Context:
{context}

Question:
{question}

Assistant:

"""

prompt = ChatPromptTemplate.from_template(template)


In [15]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
retriever = vector_db.as_retriever(search_kwargs={"k": 3})
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [9]:
'''import pandas as pd
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables    import RunnablePassthrough

# Định nghĩa rag_chain của bạn
retriever = vector_db.as_retriever(search_kwargs={"k": 2})
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm.bind(skip_prompt=True)
    | StrOutputParser()
)
# Đọc file CSV
file_path = '/workspace/vinhnq/RAG_bot/QA for Quora and Chatbot - Trang tính1.csv'
df = pd.read_csv(file_path)

# Duyệt qua từng hàng trong cột "Question"
for index, row in df.iterrows():
    question = row['Question']  # Lấy câu hỏi từ cột "Question"
    # Chuẩn bị input cho rag_chain
    # Thực thi rag_chain
    answer = rag_chain.invoke(question)
    # Lưu câu trả lời vào cột "Vinbot"
    df.at[index, 'Vinbot'] = answer  # Điều chỉnh nếu cần thiết

# Lưu kết quả vào file CSV mới
output_file_path = '/workspace/vinhnq/RAG_bot/QA for Quora and Chatbot - Trang tính1.csv'
df.to_csv(output_file_path, index=False)

print(f"Đã lưu câu trả lời vào {output_file_path}")'''


'import pandas as pd\nfrom langchain_core.output_parsers import StrOutputParser\nfrom langchain_core.runnables    import RunnablePassthrough\n\n# Định nghĩa rag_chain của bạn\nretriever = vector_db.as_retriever(search_kwargs={"k": 2})\nrag_chain = (\n    {"context": retriever, "question": RunnablePassthrough()}\n    | prompt\n    | llm.bind(skip_prompt=True)\n    | StrOutputParser()\n)\n# Đọc file CSV\nfile_path = \'/workspace/vinhnq/RAG_bot/QA for Quora and Chatbot - Trang tính1.csv\'\ndf = pd.read_csv(file_path)\n\n# Duyệt qua từng hàng trong cột "Question"\nfor index, row in df.iterrows():\n    question = row[\'Question\']  # Lấy câu hỏi từ cột "Question"\n    # Chuẩn bị input cho rag_chain\n    # Thực thi rag_chain\n    answer = rag_chain.invoke(question)\n    # Lưu câu trả lời vào cột "Vinbot"\n    df.at[index, \'Vinbot\'] = answer  # Điều chỉnh nếu cần thiết\n\n# Lưu kết quả vào file CSV mới\noutput_file_path = \'/workspace/vinhnq/RAG_bot/QA for Quora and Chatbot - Trang tính1.cs

In [10]:
outputs = rag_chain.invoke("what is machine learning ?")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [11]:
# Định nghĩa hàm này để in dễ nhìn hơn
import textwrap

def print_wrapped(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    print(wrapped_text)
print_wrapped(outputs)

Human:  Human: You are a chatbot designed to support experts in the field of
computer science by using the Retrieval-Augmented Generation (RAG) technique.
You need to provide accurate, detailed, and in-depth answers on topics related
to computer science. Please answer the experts' queries with all your
dedication. Here are the specific requirements:  1. Ensure that your answers are
based on the most recent and accurate information available. 2. If the question
exceeds your current knowledge, admit that you don't know and apologize to the
experts. 3. For technical terms, provide answers with abbreviations, for
example, Deep Learning (DL), Machine Learning (ML), Large Language Models
(LLMs). 4. Structure your answers meticulously as if presenting a detailed
report:    - Emphasize major points with headings or numbered sections.    - Use
bullet points for key lists or steps.    - Highlight important considerations or
cautions with marked notes (*). 5. Provide references to the documents y

In [17]:
import gradio as gr

# Biến lưu trữ mô hình và các tham số hiện tại
current_pipeline = llm.pipeline
current_temperature = 0.1
current_max_new_tokens = 512
current_repetition_penalty = 1.1

# Hàm khởi tạo pipeline với các tham số điều chỉnh
def create_pipeline(temperature, max_new_tokens, repetition_penalty):
    return transformers.pipeline(
        "text-generation",
        return_full_text=True,
        model=model_id,
        model_kwargs={"torch_dtype": torch.bfloat16},
        device_map="auto",
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        repetition_penalty=repetition_penalty,
        num_return_sequences=1,
    )

# Hàm xử lý đầu vào từ người dùng và trả về kết quả từ chatbot
def chat_with_bot(user_input, history, temperature, max_new_tokens, repetition_penalty):
    global current_pipeline, current_temperature, current_max_new_tokens, current_repetition_penalty

    # Kiểm tra xem các tham số có thay đổi không
    if (current_pipeline is None or 
        temperature != current_temperature or 
        max_new_tokens != current_max_new_tokens or 
        repetition_penalty != current_repetition_penalty):

        # Tạo pipeline với các tham số từ giao diện
        current_pipeline = create_pipeline(temperature, max_new_tokens, repetition_penalty)
        current_temperature = temperature
        current_max_new_tokens = max_new_tokens
        current_repetition_penalty = repetition_penalty
    
    llm = HuggingFacePipeline(pipeline=current_pipeline)

    # Cấu hình lại rag_chain với các thành phần mới
    rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm.bind(skip_prompt=True)
        | StrOutputParser()
    )

    # Gọi rag_chain để lấy kết quả
    output = rag_chain.invoke(user_input)
    output= "Vinbot 🤓: "+  output
    # Cập nhật lịch sử chat
    history.append((user_input, output))
    return "", history

# CSS tùy chỉnh để tạo giao diện giống ChatGPT
css = """
.gradio-container {
    font-family: 'Arial', sans-serif;
    background: #f5f5f5;
    color: #333;
    padding: 20px;
    height: 100vh;
}

#chatbox {
    max-width: 90%;
    height: 100%;
    margin: auto;
    border: 1px solid #ccc;
    border-radius: 10px;
    padding: 10px;
    background: white;
    box-shadow: 0 4px 8px rgba(0,0,0,0.1);
    display: flex;
    flex-direction: column;
}

#header {
    text-align: center;
    padding: 10px;
    background-color: #4CAF50;
    color: white;
    border-radius: 10px 10px 0 0;
}

#footer {
    text-align: center;
    padding: 10px;
    color: #888;
    margin-top: 20px;
    font-size: 0.9em;
}

#user_input_row {
    display: flex;
    width: 100%;
    padding: 5px;
    border-top: 1px solid #ccc;
    background-color: #f9f9f9;
    justify-content: space-between;
}

#user_input {
    flex: 1; /* Ô nhập liệu sẽ chiếm hết không gian còn lại */
}

#submit_button {
    background-color: green;
    flex: 0.1;
    color: black;
    width: auto; /* Đặt lại chiều rộng tự động để phù hợp với nội dung */
    border: none;
    padding: 8px 16px; /* Điều chỉnh padding để nút nhìn đẹp hơn */
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.3s ease;
}

#submit_button:hover {
    background-color: #45A049;
}
"""



# Tạo giao diện với Gradio
with gr.Blocks(css=css) as iface:
    gr.HTML(
        """
        <div id="header">
            <h1>Chatbot Khoa học máy tính</h1>
            <p>Được hỗ trợ bởi mô hình Meta-Llama. Đặt câu hỏi của bạn về khoa học máy tính và nhận câu trả lời chi tiết!</p>
        </div>
        """
    )
    
    with gr.Column(elem_id="chatbox"):
        chatbot = gr.Chatbot()
        state = gr.State([])
        
        with gr.Row(elem_id="user_input_row"):  # Sử dụng elem_id để áp dụng CSS riêng
            user_input = gr.Textbox(
                show_label=False, 
                placeholder="Nhập câu hỏi của bạn...",
                elem_id="user_input"
            )
            submit_button = gr.Button("Gửi câu hỏi", elem_id="submit_button")

        with gr.Row():
            temperature = gr.Slider(0.1, 1.0, value=current_temperature, step=0.1, label="Temperature")
            max_new_tokens = gr.Slider(10, 1024, value=current_max_new_tokens, step=10, label="Max New Tokens")
            repetition_penalty = gr.Slider(1.0, 2.0, value=current_repetition_penalty, step=0.1, label="Repetition Penalty")

        submit_button.click(chat_with_bot, [user_input, state, temperature, max_new_tokens, repetition_penalty], [user_input, chatbot])

        # Thêm script JavaScript để bắt sự kiện khi nhấn Enter trong input
        gr.HTML("""
        <script>
        document.getElementById('user_input').addEventListener('keyup', function(event) {
            if (event.key === 'Enter') {
                document.getElementById('submit_button').click();
            }
        });
        </script>
        """)
    
    gr.HTML(
        """
        <div id="footer">
            <p>&copy; 2024 Chatbot Khoa học máy tính. Được phát triển bởi Nhóm Khoa học Máy tính.</p>
        </div>
        """
    )

# Khởi chạy giao diện
iface.launch(share=True)




Running on local URL:  http://127.0.0.1:7865
Running on public URL: https://eee71818a9c1f8cafd.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


