In [26]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    os.chdir(project_root)

In [28]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"GPU available: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU")

print(f"Using device: {device}")

GPU available: NVIDIA GeForce RTX 3060 Laptop GPU
Number of GPUs: 1
Using device: cuda


In [29]:
from dotenv import load_dotenv

load_dotenv()

False

In [30]:
import os

from langsmith import Client

client = Client()

#### Load document

In [31]:
from langchain_community.document_loaders import PDFPlumberLoader

from paths import DATA_DIR

pdf_loader = PDFPlumberLoader(DATA_DIR / "docs" / "company_overview.pdf")
docs = pdf_loader.load()

docs

[Document(metadata={'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'file_path': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'page': 0, 'total_pages': 2, 'Title': 'Untitled document', 'Producer': 'Skia/PDF m139 Google Docs Renderer'}, page_content='Company Name: EchoNova Audio\nDocument Title: Company Overview & Core Values\nDate: June 29, 2025\nCompany Overview\nEchoNova Audio is a multinational retailer and support provider specializing in premium\naudio electronics and related services. We distribute world-class products such as wireless\nheadphones, portable speakers, earbuds, and smart audio gear from top manufacturers\nincluding Sony, Bose, JBL, Apple, and Sennheiser.\nOperating across the United States and key European markets—such as Germany, France,\nthe UK, Netherlands, and Sweden—we serve both individual consumers and corporate\nclients with a seamless customer experience, fast logistics, and industry-recognized

#### Split docs 

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
splits = text_splitter.split_documents(docs)

splits

[Document(metadata={'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'file_path': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'page': 0, 'total_pages': 2, 'Title': 'Untitled document', 'Producer': 'Skia/PDF m139 Google Docs Renderer'}, page_content='Company Name: EchoNova Audio\nDocument Title: Company Overview & Core Values\nDate: June 29, 2025\nCompany Overview\nEchoNova Audio is a multinational retailer and support provider specializing in premium\naudio electronics and related services. We distribute world-class products such as wireless'),
 Document(metadata={'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'file_path': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'page': 0, 'total_pages': 2, 'Title': 'Untitled document', 'Producer': 'Skia/PDF m139 Google Docs Renderer'}, page_content='headphones, portable speakers, earbuds, and smart audio gear from top man

#### Vector Store

In [33]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langsmith import Client

client = Client()

vectorstore = Chroma.from_documents(
    documents=splits, embedding=HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [34]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 2}, name="Ticket Agent")

results = retriever.invoke("What is the company name?")

results

[Document(id='c7e37100-4ad5-4b68-bc20-06aed51dfc74', metadata={'file_path': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'page': 0, 'Title': 'Untitled document', 'total_pages': 2, 'Producer': 'Skia/PDF m139 Google Docs Renderer'}, page_content='global brands.\n● Paid Services: Extended warranties, device protection plans, music service bundles,\nand setup assistance.\n● Customer Support: Specialized departments for Tech Support, Billing, Shipping,\nLegal, Sales, and Customer Care.'),
 Document(id='33df6c9a-2ef9-4946-9959-98d4de5470be', metadata={'page': 0, 'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'Producer': 'Skia/PDF m139 Google Docs Renderer', 'total_pages': 2, 'Title': 'Untitled document', 'file_path': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf'}, page_content='global brands.\n● Paid Services: Extended wa

#### Multi Query Retriever

In [35]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_deepseek import ChatDeepSeek

mq_llm = ChatDeepSeek(model="deepseek-chat", temperature=0)

mq_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=mq_llm)

mq_retriever.invoke("What is the company name?")

[Document(id='33df6c9a-2ef9-4946-9959-98d4de5470be', metadata={'Title': 'Untitled document', 'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'file_path': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'total_pages': 2, 'page': 0, 'Producer': 'Skia/PDF m139 Google Docs Renderer'}, page_content='global brands.\n● Paid Services: Extended warranties, device protection plans, music service bundles,\nand setup assistance.\n● Customer Support: Specialized departments for Tech Support, Billing, Shipping,\nLegal, Sales, and Customer Care.'),
 Document(id='bfe2a3e0-febd-497b-a63a-3de91e5e8248', metadata={'file_path': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'Title': 'Untitled document', 'Producer': 'Skia/PDF m139 Google Docs Renderer', 'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.pdf', 'total_pages': 2, 'page': 0}, page_content='global brands.\n● Paid Services: Extended wa

#### Generation

In [36]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_prompt = hub.pull("rlm/rag-prompt")
llm = ChatDeepSeek(model="deepseek-chat", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


chain = (
    {
        "context": mq_retriever | format_docs,
        "question": RunnablePassthrough(),
    }
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [37]:
chain.invoke("What is the company name?")

"The company name is not mentioned in the provided context. I don't know."