In [2]:
from dotenv import load_dotenv
import chromadb
import os

load_dotenv('../.env.local')
storage_path = '../'+os.getenv('STORAGE_PATH')
print(storage_path)

../chromadb


In [3]:
from IPython.display import display, Markdown
def view_text_in_markdown(page_content):
    display(Markdown(page_content))

## Vector Embeddings

In [4]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

In [5]:
embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)

def get_vector_store(collection_name):
  persistent_client = chromadb.PersistentClient(path='../chromadb')
  return Chroma(client=persistent_client,
                embedding_function=embeddings,
                collection_name=collection_name)

In [7]:
# Load vector database
vector_db = get_vector_store('short_manual')

  return Chroma(client=persistent_client,


In [8]:
retriever = vector_db.as_retriever()

In [9]:
retriever.invoke('What is this document about?')

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  3.23it/s]


[Document(metadata={'source': 'pdf_files/owner_manual_p283-p300.pdf'}, page_content='data that will assist in understanding how a vehicle’s systems performed under certain crash or near crash- like situations, such as an air bag deployment or hitting a road obstacle >> page 268.'),
 Document(metadata={'source': 'pdf_files/owner_manual_p283-p300.pdf'}, page_content='data that will assist in understanding how a vehicle’s systems performed under certain crash or near crash- like situations, such as an air bag deployment or hitting a road obstacle >> page 268.'),
 Document(metadata={'source': 'pdf_files/owner_manual_p283-p300.pdf'}, page_content="damage, or even failure, of the axle and tires. A\n\ntire could explode and injure someone. Do not spin your vehicle's wheels faster than 30 mph (48 km/h) or for longer than 30 seconds continuously without stopping when you are stuck and do not let anyone near a spinning wheel, no matter what the speed.\n\nTOWING A DISABLED VEHICLE\n\nThis section

## Retrieval

In [7]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [8]:
# LLM from Ollama
local_model = "mistral"
llm = ChatOllama(model=local_model)

In [9]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You will impersonate the owner's manual of the RAM 1500 vehicle, model year 2025, Crew Cab version. The answers should be as close to the source as possible.
    Your task is to generate five different versions of the given user question to retrieve relevant documents
    from a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [10]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [20]:
retrieve_docs = retriever.invoke('What is the vehicle line') # get relevant documents

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  6.86it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 13.31it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  5.94it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 16.96it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  7.58it/s]


In [21]:
# small doc
retrieve_docs

[]

In [18]:
# big doc
retrieve_docs

 Document(metadata={'source': '../pdf_files/Owners_Manual-Ram_1500_25_Crew_Cab.pdf'}, page_content='The auxiliary switches manage the relays that power four or six blunt cut wires. These wires are located under the hood to the right, near the battery.\n\nIn addition to the four or six auxiliary switch wires, a fused battery wire and ignition wire are also found in this location.\n\nSERVICING AND MAINTENANCE 327\n\nA kit of splices and heat shrink tubing are provided with the auxiliary switches to aid in the connection/ installation of your electrical devices.\n\nFuse And Wire Color Chart\n\nNOTE:\n\nFuses for the auxiliary switches can be found in the auxiliary Power Distribution Center (PDC), located in the engine compartment toward the front of the vehicle, in front of the main PDC. Remove upper shield to access. If equipped, additional auxiliary switch fuses will be located in the main PDC.'),
 Document(metadata={'source': '../pdf_files/Owners_Manual-Ram_1500_25_Crew_Cab.pdf'}, page

In [19]:
len(retrieve_docs)

11

In [20]:
view_text_in_markdown(retrieve_docs[1].page_content)

SAFETY

281

282 IN CASE OF EMERGENCY

IN CASE OF EMERGENCY

HAZARD WARNING FLASHERS

The Hazard Warning Flashers button is located on the upper switch bank just below the radio.

Hazard Warning Flashers Button

NOTE:

If your vehicle is equipped with a 12-inch Uconnect display, the Hazard Warning Flashers button is located above the display.

Hazard Warning Flashers Button with 12-inch display

NOTE:

If your vehicle is equipped with a 14.5-inch Uconnect display, the Hazard Warning Flashers button is located to the left of the display.

Hazard Warning Flashers Button with 14.5-inch display

In [15]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [16]:
resposta = chain.invoke("Where is located the hazard flashers button?")
view_text_in_markdown(resposta)

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  7.54it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 14.98it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  5.69it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 12.24it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  8.42it/s]


 Based on the provided context, there's no information about a location of a "hazard flashers button." This question cannot be answered.

In [31]:
resposta = chain.invoke("Please list all the support centers that assist button can connect")
view_text_in_markdown(resposta)

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  8.26it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 14.69it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  5.21it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 10.15it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  8.54it/s]


1. Roadside Assistance
  2. Brand Connect Customer Care (If available)
  3. Vehicle Customer Care
  4. Uconnect Customer Care

In [24]:
resposta = chain.invoke("Please list all the support centers that assist button can connect")
view_text_in_markdown(resposta)

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  3.03it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 52.69it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  5.29it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 12.23it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  5.57it/s]


1. Roadside Assistance
  2. Brand Connect Customer Care (If available)
  3. Vehicle Customer Care
  4. Uconnect Customer Care

In [25]:
resposta = chain.invoke("This is an owner manual of a vehicle. Can you specify which vehicle?")
view_text_in_markdown(resposta)

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  8.54it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 13.80it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  5.99it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 10.89it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  7.72it/s]


 The document does not specify an exact model or make of the vehicle, but based on the source file name, it appears to be a Ram 1500 25 Crew Cab vehicle.

In [26]:
resposta = chain.invoke("Can you specify which vehicle?")
view_text_in_markdown(resposta)

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  6.60it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 10.78it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  4.86it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 12.57it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  8.50it/s]


 I can't definitively say which vehicle as the provided information does not contain enough details to identify a specific model. The given data are excerpts from the owner's manual of a Ram 1500 truck, but that's only one possibility among many vehicles with similar manuals.

In [27]:
# Delete all collections in the db
vector_db.delete_collection()