In [1]:
# Cell 1: install libs
!pip install --upgrade pip
!pip install pdfplumber sentence-transformers faiss-cpu langchain openai


Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2
Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting pdfminer.six==20250506 (from pdfplumber)
  Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)


In [2]:
# Cell 2: upload your VW Beetle manual PDF
from google.colab import files
uploaded = files.upload()  # click "Choose Files" and pick your vw_beetle_manual.pdf
print("Uploaded files:", list(uploaded.keys()))


Saving Disc & Drum.pdf to Disc & Drum.pdf
Uploaded files: ['Disc & Drum.pdf']


In [3]:
# Cell 3: extract text from uploaded PDF (adjust filename if different)
import pdfplumber
import os

# change this to the exact uploaded filename if needed
pdf_files = [f for f in os.listdir() if f.lower().endswith(".pdf")]
print("Found PDFs:", pdf_files)
pdf_path = pdf_files[0]  # uses first uploaded PDF

texts = []
with pdfplumber.open(pdf_path) as pdf:
    for i, page in enumerate(pdf.pages):
        page_text = page.extract_text()
        if page_text:
            texts.append(f"--- PAGE {i+1} ---\n" + page_text)

manual_text = "\n\n".join(texts)
print("✅ Extracted characters:", len(manual_text))
print("Preview:\n", manual_text[:1000])


Found PDFs: ['Disc & Drum.pdf']
✅ Extracted characters: 49133
Preview:
 --- PAGE 1 ---
22000033 VVoollkksswwaaggeenn NNeeww BBeeeettllee GGLLSS
11999988--22000044 BBRRAAKKEESS DDiisscc -- NNeeww BBeeeettllee
1998-2004 BRAKES
Disc - New Beetle
MODEL IDENTIFICATION
New Beetle is equipped with one of two brake systems on the front of the vehicle. To identify brake system
installed on a particular model, see BRAKE SYSTEM IDENTIFICATION table. For visual identification,
see Fig. 1 -Fig. 3 .
BRAKE SYSTEM IDENTIFICATION (NEW BEETLE)
Engine Size Brake System Axle
1.8L FN3 Front
1.9L & 2.0L FS III Front
1.8L, 1.9L & 2.0L C38 Rear
HHeellppmmeelleeaarrnn RReeppaaiirr MMaannuuaallss
WWeeddnneessddaayy,, DDeecceemmbbeerr 1144,, 22000055 77::5599::1266 AAMM PPaaggee 11 ©© 22000044 MMiittcchheellll RReeppaaiirr IInnffoorrmmaattiioonn CCoommppaannyy,, LLLLCC..

--- PAGE 2 ---
2003 Volkswagen New Beetle GLS
1998-2004 BRAKES Disc - New Beetle
Helpmelearn Repair Manuals
Wednesday, December 14, 2005 7:59:

In [4]:
# Cell 4: chunk the manual into pieces
!pip install --quiet tiktoken  # langchain's splitter may rely on tokenizers; safe to include

from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_text(manual_text)
print("✅ Number of chunks:", len(chunks))
# show first chunk preview
print("Chunk 0 preview:\n", chunks[0][:800])


✅ Number of chunks: 73
Chunk 0 preview:
 --- PAGE 1 ---
22000033 VVoollkksswwaaggeenn NNeeww BBeeeettllee GGLLSS
11999988--22000044 BBRRAAKKEESS DDiisscc -- NNeeww BBeeeettllee
1998-2004 BRAKES
Disc - New Beetle
MODEL IDENTIFICATION
New Beetle is equipped with one of two brake systems on the front of the vehicle. To identify brake system
installed on a particular model, see BRAKE SYSTEM IDENTIFICATION table. For visual identification,
see Fig. 1 -Fig. 3 .
BRAKE SYSTEM IDENTIFICATION (NEW BEETLE)
Engine Size Brake System Axle
1.8L FN3 Front
1.9L & 2.0L FS III Front
1.8L, 1.9L & 2.0L C38 Rear
HHeellppmmeelleeaarrnn RReeppaaiirr MMaannuuaallss
WWeeddnneessddaayy,, DDeecceemmbbeerr 1144,, 22000055 77::5599::1266 AAMM PPaaggee 11 ©© 22000044 MMiittcchheellll RReeppaaiirr IInnffoorrmmaattiioonn CCoommppaannyy,, LLLLCC..

--- PAGE 2 ---


In [5]:
# Cell 5: embed chunks and build FAISS index
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import pickle

embedder = SentenceTransformer('all-MiniLM-L6-v2')  # fast & small
embeddings = embedder.encode(chunks, show_progress_bar=True, convert_to_numpy=True)

d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)
print("✅ Index built. Number of vectors:", index.ntotal)

# Save index and chunks to disk so you can reload later
faiss.write_index(index, "beetle_index.faiss")
with open("beetle_chunks.pkl", "wb") as f:
    pickle.dump(chunks, f)
print("✅ Saved index and chunks to files.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Index built. Number of vectors: 73
✅ Saved index and chunks to files.


In [6]:
# Cell 6: search and show relevant context (no LLM yet)
import pickle, numpy as np, faiss

# load saved files if you restarted runtime
index = faiss.read_index("beetle_index.faiss")
with open("beetle_chunks.pkl", "rb") as f:
    chunks = pickle.load(f)

def retrieve(question, top_k=3):
    q_vec = embedder.encode([question], convert_to_numpy=True)
    distances, ids = index.search(q_vec, top_k)
    results = []
    for i, idx in enumerate(ids[0]):
        results.append({"score": float(distances[0][i]), "chunk_index": int(idx), "text": chunks[idx]})
    return results

# quick test
q = "Why is my Beetle producing white smoke from the exhaust?"
res = retrieve(q, top_k=3)
for r in res:
    print("----- Score:", r['score'], "Chunk:", r['chunk_index'])
    print(r['text'][:800], "\n\n")


----- Score: 1.274591326713562 Chunk: 67
--- PAGE 52 ---
2003 Volkswagen New Beetle GLS
1998-2004 BRAKES Disc - New Beetle
and damage. Replace defective parts as necessary.
Reassembly
1. Use all parts supplied in repair kit. Apply a thin coat of assembly lubricant G 052 150 A2 to piston and
seal before inserting. Fit piston seal into cylinder. Slide dust seal onto piston. Slowly insert piston into
bore. On self-adjusting rear calipers, use Special Tool (VW 3272) along with Collar (3272/1) or
equivalent, to help screw piston into housing by turning knurled wheel clockwise. If piston is pushed
back with a piston resetting tool or by operating foot brake automatic adjustment in the brake caliper is
destroyed.
2. Seat inner lip of dust seal in groove on cylinder housing. Open bleed screw and push piston into bore as
far as possible.  


----- Score: 1.305336594581604 Chunk: 0
--- PAGE 1 ---
22000033 VVoollkksswwaaggeenn NNeeww BBeeeettllee GGLLSS
11999988--22000044 BBRRAAKKEESS DDiisscc --

In [7]:
# Cell 7: optional LLM answer using OpenAI (set your API key securely)
import os
import openai

# set your key here or use Colab secrets (better) - NEVER share your key publicly
# os.environ["OPENAI_API_KEY"] = "sk-REPLACE_WITH_YOUR_KEY"
openai.api_key = os.environ.get("OPENAI_API_KEY")  # or set directly for quick testing

def ask_with_context(question, top_k=3):
    retrieved = retrieve(question, top_k)
    context = "\n\n---\n\n".join([r['text'] for r in retrieved])
    prompt = f"You are a vehicle service assistant. Use only the information from the manual context below to answer the question. Be concise and give step-by-step troubleshooting.\n\nCONTEXT:\n{context}\n\nQUESTION: {question}\n\nANSWER:"
    resp = openai.ChatCompletion.create(
        model="gpt-4o-mini",  # replace with any available model in your account
        messages=[{"role":"system","content":"You are a helpful vehicle service assistant."},
                  {"role":"user","content":prompt}],
        max_tokens=400,
        temperature=0.1
    )
    return resp.choices[0].message.content

# Example (ensure your OPENAI_API_KEY is set in the environment)
# print(ask_with_context("How do I adjust the clutch on a VW Beetle?"))


In [None]:
# Cell 8: simple interactive Q&A loop
print("Ask questions about the VW Beetle manual. Type 'quit' to exit.")
while True:
    q = input("\nQuestion: ").strip()
    if q.lower() in ("quit", "exit"):
        break
    retrieved = retrieve(q, top_k=3)
    print("\n--- Retrieved context (top 3) ---")
    for i, r in enumerate(retrieved):
        print(f"\n[{i+1}] chunk #{r['chunk_index']} (score={r['score']:.4f})\n{r['text'][:500]}\n")
    # optional LLM step
    use_llm = input("Call LLM to format answer? (y/n): ").strip().lower()
    if use_llm == "y":
        try:
            print("\nLLM answer:\n", ask_with_context(q, top_k=3))
        except Exception as e:
            print("LLM call failed:", e)
    else:
        print("You can read the retrieved context above or run the LLM step to generate a consolidated answer.")


Ask questions about the VW Beetle manual. Type 'quit' to exit.

--- Retrieved context (top 3) ---

[1] chunk #34 (score=1.2035)
--- PAGE 24 ---
2003 Volkswagen New Beetle GLS
1998-2004 BRAKES Disc - New Beetle
Fig. 22: Disconnecting Brake Pedal From Booster
Courtesy of VOLKSWAGEN UNITED STATES, INC.
Helpmelearn Repair Manuals
Wednesday, December 14, 2005 7:59:17 AM Page 24 © 2004 Mitchell Repair Information Company, LLC.


[2] chunk #58 (score=1.2313)
--- PAGE 43 ---
2003 Volkswagen New Beetle GLS
1998-2004 BRAKES Disc - New Beetle
PARKING BRAKE CABLES
Use following illustrations to aid in removal and installation of parking brake lever and cables. See Fig. 37 -Fig.
39 .
Fig. 37: Removing & Installing Parking Brake Cables (1 Of 3)
Courtesy of VOLKSWAGEN UNITED STATES, INC.
Helpmelearn Repair Manuals
Wednesday, December 14, 2005 7:59:18 AM Page 43 © 2004 Mitchell Repair Information Company, LLC.

--- PAGE 44 ---
2003 Volkswagen New Beetle GLS



[3] chunk #16 (score=1.2326)
stops. Apply