# Install Dependencies

In [1]:
%pip uninstall -y \
  langchain langchain-core langchain-community langchain-text-splitters \
  langchain-groq langsmith numpy pandas

%pip install --no-cache-dir \
  numpy==1.26.4 \
  pandas==2.2.1 \
  scikit-learn==1.4.2 \
  faiss-cpu==1.8.0 \
  sentence-transformers==2.6.1 \
  transformers==4.38.2 \
  pymupdf==1.23.26 \
  rank_bm25==0.2.2 \
  langchain==0.1.20 \
  langchain-core==0.1.53 \
  langchain-community==0.0.38 \
  langchain-text-splitters==0.0.1 \
  groq


Found existing installation: langchain 1.2.8
Uninstalling langchain-1.2.8:
  Successfully uninstalled langchain-1.2.8
Found existing installation: langchain-core 1.2.9
Uninstalling langchain-core-1.2.9:
  Successfully uninstalled langchain-core-1.2.9
[0mFound existing installation: langsmith 0.6.9
Uninstalling langsmith-0.6.9:
  Successfully uninstalled langsmith-0.6.9
Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
Found existing installation: pandas 2.2.2
Uninstalling pandas-2.2.2:
  Successfully uninstalled pandas-2.2.2
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas==2.2.1
  Downloading pandas-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting scikit-learn==1.4.2

# Import Libraries

In [5]:
import os
import fitz  # PyMuPDF

from langchain_text_splitters import (
    RecursiveCharacterTextSplitter,
    CharacterTextSplitter
)

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder

from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from langchain.prompts import PromptTemplate

from groq import Groq
import logging

logging.getLogger("langchain_text_splitters").setLevel(logging.ERROR)



# PDF Text Extraction

In [6]:
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        blocks = page.get_text("blocks")
        for block in blocks:
            if len(block) > 4 and block[4].strip():
                text += block[4] + "\n\n"
    doc.close()
    return text.strip()

pdf_path = "sample-service-manual 1.pdf"
full_text = extract_text_from_pdf(pdf_path)

print("Extracted text length:", len(full_text))
print("First 500 chars:\n", full_text[:500])

Extracted text length: 880957
First 500 chars:
 Suspension System 


Inspection and Verification 


1.
Road test. 


z Verify the customer concern by carrying out a road test on a smooth road. If any vibrations are 


apparent, refer to Section 100-04 . 


2.
Inspect tires. 


z Check the tire pressure with all normal loads in the vehicle and the tires cold. Refer to the 


Vehicle Certification (VC) label. 


z Verify that all tires are sized to specification. Refer to the VC label. 


z Inspect the tires for incorrect wear and damage. Insta


# Recursive Chunking

In [7]:
def recursive_chunking(text, size=500, overlap=50):
    return RecursiveCharacterTextSplitter(
        chunk_size=size,
        chunk_overlap=overlap
    ).split_text(text)

chunks = recursive_chunking(full_text)
print("Total chunks:", len(chunks))


Total chunks: 2125


# Embedding Model
Using Sentence-Transformers for embeddings (all-MiniLM-L6-v2).

In [8]:
embedding_model = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# Hybrid Retrieval with Cross-Encoder Reranker

In [9]:
def create_bm25(chunks):
    return BM25Retriever.from_texts(chunks, k=5)

def create_dense(chunks):
    vectorstore = FAISS.from_texts(chunks, embedding_model)
    return vectorstore.as_retriever(search_kwargs={"k": 5})

def create_hybrid(bm25, dense):
    return EnsembleRetriever(
        retrievers=[bm25, dense],
        weights=[0.5, 0.5]
    )

bm25 = create_bm25(chunks)
dense = create_dense(chunks)
hybrid = create_hybrid(bm25, dense)


In [10]:
def create_reranked(hybrid_retriever, top_n=5):
    cross_encoder = HuggingFaceCrossEncoder(
        model_name="cross-encoder/ms-marco-MiniLM-L-6-v2"
    )

    compressor = CrossEncoderReranker(
        model=cross_encoder,
        top_n=top_n
    )

    return ContextualCompressionRetriever(
        base_retriever=hybrid_retriever,
        base_compressor=compressor
    )

retriever = create_reranked(hybrid, top_n=5)




config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

# Retrieval Augmentation and Groq LLaMA 3.3 Inference

> Get your Groq API key from https://console.groq.com  
> The key is hidden here for security reasons.


In [None]:
os.environ["GROQ_API_KEY"] = ""

client = Groq(api_key=os.environ["GROQ_API_KEY"])


In [12]:
prompt = PromptTemplate.from_template("""
You are an expert automotive technician extracting ONLY real specifications from the service manual context.
Extract NOTHING that is not explicitly stated.
Do NOT guess, infer, hallucinate, or add any extra text.

User query: {query}

Most relevant manual text (ranked by relevance):
{context}

Return ONLY a valid JSON array where the response will be easier to understand in key : value form where left side attribute tells about the the right side attribute

If no specification matches, return empty array: []
""")


In [13]:
def groq_llm(prompt_input) -> str:
    # LangChain may pass PromptValue or string
    if hasattr(prompt_input, "to_string"):
        prompt_text = prompt_input.to_string()
    else:
        prompt_text = str(prompt_input)

    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {"role": "user", "content": prompt_text}
        ],
        temperature=0.0
    )
    return response.choices[0].message.content
llm = RunnableLambda(groq_llm)

 # Retrieval-to-Generation Pipeline


In [14]:
rag_chain = (
    {
        "query": RunnablePassthrough(),
        "context": lambda q: retriever.invoke(q)
    }
    | RunnablePassthrough.assign(
        context=lambda inputs: "\n\n".join(
            doc.page_content for doc in inputs["context"]
        )
    )
    | prompt
    | llm
)


# Testing with few queries

In [15]:
query = "Torque specification for brake caliper bolts"
output = rag_chain.invoke(query)

print(output)

[
  {"Brake caliper anchor plate bolts": "250 Nm, 184 lb-ft"},
  {"Brake caliper flow bolt": "35 Nm, 26 lb-ft"},
  {"Brake caliper guide pin bolts": "37 Nm, 27 lb-ft"}
]


In [16]:
query = "Torque specification for brake caliper plate bolts"
output = rag_chain.invoke(query)

print(output)

["Brake caliper anchor plate bolts" : 250, "Brake caliper anchor plate bolts (lb-ft)" : 184]


In [17]:
query = "What is a pinpoint test?"
output = rag_chain.invoke(query)

print(output)

[
  {"Pinpoint Test G": "ONE OR MORE TPMS SENSORS DO NOT TRAIN AND NO DTCs ARE PRESENT"},
  {"Pinpoint Test I": "DTCs C109D, C109E and C109F"},
  {"Pinpoint Test B": "THE TRAILER BRAKES ARE INOPERATIVE"},
  {"Pinpoint Test C": "THE TRAILER BRAKES ARE ALWAYS ENGAGED"},
  {"Pinpoint Test A": "INNER EDGE/SHOULDER WEAR"}
]


In [18]:
query = "Explain Camber and Caster Adjustment"
output = rag_chain.invoke(query)

print(output)

[
  {"Increase caster": "In"},
  {"Decrease caster": "Out"},
  {"Increase camber": "In"},
  {"Decrease camber": "Out"},
  {"Increase caster and camber simultaneously": "In"},
  {"Decrease caster and camber simultaneously": "Out"},
  {"Tighten the nuts": "55 Nm (41 lb-ft)"}
]


In [19]:
query = "What are the components of front suspension"
output = rag_chain.invoke(query)

print(output)

[
  {"Front Suspension Components for RWD": "Lower ball joints, Lower control arms, Shock absorber and coil spring assemblies, Stabilizer bar and links, Upper control arms, Wheel bearings and wheel hubs, Wheel knuckles, Wheel studs"},
  {"Front Suspension Components for 4WD": "Ball joints, Coil springs, Lower control arms, Shock absorbers, Stabilizer bar and links, Upper control arms, Wheel bearing and wheel hubs, Wheel knuckles, Wheel studs"}
]
