In [1]:
from sentence_transformers import SentenceTransformer
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pypdf import PdfReader
import faiss, numpy as np
from groq import Groq
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 初始化
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

In [4]:
# 讀 PDF
pdf = PdfReader("data/sample.pdf")
raw = "\n".join([p.extract_text() for p in pdf.pages if p.extract_text()])

In [5]:
# 切 chunk
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks = splitter.split_text(raw)

In [6]:
# Embedding
emb = embedder.encode(chunks, normalize_embeddings=True)
emb = np.array(emb, dtype="float32")
dim = emb.shape[1]

In [7]:
# 建立 index
index = faiss.IndexFlatIP(dim)
index.add(emb)

In [8]:
def search(query, k=5):
    qv = embedder.encode([query], normalize_embeddings=True).astype("float32")
    D, I = index.search(qv, k)
    return [chunks[i] for i in I[0]]

def rag(question, k=5):
    ctx = "\n".join(search(question, k))
    messages = [
        {"role":"system","content":"You are a precise assistant. Answer ONLY using CONTEXT."},
        {"role":"user","content":f"CONTEXT:\n{ctx}\n\nQUESTION: {question}"}
    ]
    resp = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=messages,
        temperature=0.0,
        max_tokens=400
    )
    return resp.choices[0].message.content


In [9]:
print(rag("What methodology does the paper use?"))

The methodology used in the paper involves the following:

1. Data Collection and Preprocessing (Section 3.1), specifically setting up and composing data (Section 3.1.1)
2. Using Deep Learning for Sign Language Recognition (Section 2.2), including CNN-based Methods (Section 2.2.1)
3. Augmented model with superior generalization capabilities, demonstrated by uniform excellence across all metrics


In [10]:
class Memory:
    def __init__(self, max_turns=5):
        self.turns = []
        self.max_turns = max_turns
    def add(self, q, a):
        self.turns.append((q,a))
        if len(self.turns) > self.max_turns:
            self.turns.pop(0)
    def text(self):
        return "\n".join([f"User:{q}\nAI:{a}" for q,a in self.turns])

In [11]:
memory = Memory()

def chat(question):
    history = memory.text()
    ctx = "\n".join(search(question, 5))
    messages = [
        {"role":"system","content":"You are a precise assistant. Use CONTEXT and HISTORY."},
        {"role":"user","content":f"HISTORY:\n{history}\n\nCONTEXT:\n{ctx}\n\nQUESTION:{question}"}
    ]
    resp = client.chat.completions.create(model="llama-3.1-8b-instant",messages=messages,temperature=0.0,max_tokens=400)
    answer = resp.choices[0].message.content
    memory.add(question, answer)
    return answer

In [12]:
print(chat("What methodology does the paper use?"))


Based on the provided CONTEXT, the paper uses the following methodology:

1. Data Collection and Preprocessing (Section 3.1)
   - Data Setup and Composition (Section 3.1.1)

Additionally, it is mentioned that the paper uses the following approaches for Sign Language Recognition:

1. Deep Learning (Section 2.2)
   - CNN-based Methods (Section 2.2.1)

No specific information on Model Architecture (Section 3.5) is provided in the given context.


In [13]:
print(chat("And what are the results?"))

Based on the provided CONTEXT, the results of the paper can be summarized as follows:

1. **Effect of Data Augmentation**: The results show that the data augmentation techniques had a substantial impact on model performance. The model trained with augmented data achieved near-perfect accuracy (99.94%) on the validation set, compared to 80.17% for the non-augmented model.

2. **Model Stability**: The augmented model demonstrated remarkable stability throughout training, with consistent performance and minimal fluctuations. In contrast, the non-augmented model exhibited pronounced instability throughout training.

3. **Generalization Capabilities**: The results indicate that the augmented model has superior generalization capabilities, as demonstrated by its uniform excellence across all metrics.

4. **Improved Accuracy**: The data augmentation techniques led to a significant improvement in model performance, with the augmented model achieving:
   - 99.94% accuracy on the validation set


In [14]:
import streamlit as st

st.title("PDF RAG Chatbot")
question = st.text_input("Ask a question:")
if st.button("Submit") and question:
    answer = chat(question)
    st.write(answer)


2025-09-04 23:16:49.641 
  command:

    streamlit run /Users/duan/llm-engineer-roadmap/.llmvenv/lib/python3.13/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-09-04 23:16:49.643 Session state does not function when running a script without `streamlit run`


In [16]:
!streamlit run app.py


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://10.0.0.177:8501[0m
[0m
[34m[1m  For better performance, install the Watchdog module:[0m

  $ xcode-select --install
  $ pip install watchdog
            [0m
2025-09-04 23:17:12.841 Uncaught app execution
Traceback (most recent call last):
  File "/Users/duan/llm-engineer-roadmap/.llmvenv/lib/python3.13/site-packages/streamlit/runtime/scriptrunner/exec_code.py", line 128, in exec_func_with_error_handling
    result = func()
  File "/Users/duan/llm-engineer-roadmap/.llmvenv/lib/python3.13/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 669, in code_to_exec
    exec(code, module.__dict__)  # noqa: S102
    ~~~~^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/duan/llm-engineer-roadmap/quick-demo/app.py", line 50, in <module>
    answer = chat(question)
  File "/Users/duan/llm-engineer-roadmap/quick-demo/app.py"