In [1]:
URL = "https://drive.google.com/uc?export=download&id=1tixqlxq7INPVh_sTf4IMwVuviwBMtkH2"

!wget -O faiss_db.zip "$URL"

--2025-05-03 15:59:25--  https://drive.google.com/uc?export=download&id=1tixqlxq7INPVh_sTf4IMwVuviwBMtkH2
Resolving drive.google.com (drive.google.com)... 74.125.137.101, 74.125.137.100, 74.125.137.138, ...
Connecting to drive.google.com (drive.google.com)|74.125.137.101|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://drive.usercontent.google.com/download?id=1tixqlxq7INPVh_sTf4IMwVuviwBMtkH2&export=download [following]
--2025-05-03 15:59:25--  https://drive.usercontent.google.com/download?id=1tixqlxq7INPVh_sTf4IMwVuviwBMtkH2&export=download
Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 74.125.137.132, 2607:f8b0:4023:c03::84
Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|74.125.137.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 11537 (11K) [application/octet-stream]
Saving to: ‘faiss_db.zip’


2025-05-03 15:59:27 (45.0 MB/s) - ‘faiss_db.zip’ saved [11537/11537]

In [2]:
!unzip faiss_db.zip

Archive:  faiss_db.zip
   creating: faiss_db/
  inflating: faiss_db/index.pkl      
  inflating: faiss_db/index.faiss    


### 1. 安裝並引入必要套件

In [3]:
!pip install -U langchain langchain-community sentence-transformers faiss-cpu gradio openai



In [4]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

In [5]:
from openai import OpenAI
import gradio as gr

### 2. 自訂 E5 embedding 類別

In [6]:
class CustomE5Embedding(HuggingFaceEmbeddings):
    def embed_documents(self, texts):
        texts = [f"passage: {t}" for t in texts]
        return super().embed_documents(texts)

    def embed_query(self, text):
        return super().embed_query(f"query: {text}")

### 3. 載入 `faiss_db`

In [7]:
embedding_model = CustomE5Embedding(model_name="intfloat/multilingual-e5-small")
db = FAISS.load_local("faiss_db", embedding_model, allow_dangerous_deserialization=True)
retriever = db.as_retriever()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


### 4. 設定好我們要的 LLM

In [8]:
import os
from google.colab import userdata

如之前, 我們會用 OpenAI API。這裡使用 Groq 服務, 可改成你要的服務。

In [9]:
api_key = userdata.get('Groq')

In [10]:
os.environ["OPENAI_API_KEY"] = api_key

這裡的模型和 `base_url` 是用 Groq, 如果用其他服務請自行修改。

In [11]:
model = "llama3-70b-8192"
base_url="https://api.groq.com/openai/v1"

In [12]:
client = OpenAI(
    base_url=base_url # 使用 OpenAI 本身不需要這段
)

### 5. `prompt` 設計

In [19]:
system_prompt = "你是台北的 AI 約會嚮導，請根據資料來回應情侶們的問題。請親切、簡潔並附帶具體建議。只能使用台灣習慣的繁體中文回應。"

prompt_template = """
根據下列資料嚴格限定使用台灣習慣的繁體中文回答問題：
{retrieved_chunks}

使用者的問題是：{question}

請根據資料內容回覆。
"""

### 6. 使用 RAG 來回應

搜尋與使用者問題相關的資訊，根據我們的 prompt 樣版去讓 LLM 回應。

In [20]:
chat_history = []

def chat_with_rag(user_input):
    global chat_history
    # 取回相關資料
    docs = retriever.get_relevant_documents(user_input)
    retrieved_chunks = "\n\n".join([doc.page_content for doc in docs])

    # 將自定 prompt 套入格式
    final_prompt = prompt_template.format(retrieved_chunks=retrieved_chunks, question=user_input)

    # 呼叫 OpenAI API
    response = client.chat.completions.create(
    model=model,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": final_prompt},
    ]
    )
    answer = response.choices[0].message.content

    chat_history.append((user_input, answer))
    return answer

### 7. 用 Gradio 打造 Web App

In [21]:
with gr.Blocks() as demo:
    gr.Markdown("# AI 約會嚮導")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="請輸入你的問題...")

    def respond(message, chat_history_local):
        response = chat_with_rag(message)
        chat_history_local.append((message, response))
        return "", chat_history_local

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

demo.launch(share=True,debug=True)

  chatbot = gr.Chatbot()


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://d99a9d9327f3442760.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://d99a9d9327f3442760.gradio.live


