In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    os.chdir(project_root)

In [3]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"GPU available: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU")

print(f"Using device: {device}")

GPU available: NVIDIA GeForce RTX 3060 Laptop GPU
Number of GPUs: 1
Using device: cuda


In [4]:
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
import os

from langsmith import Client

client = Client()

#### Load document

##### docx

In [6]:
from paths import DATA_DIR

docx_files = list((DATA_DIR / "docs").glob("*.docx"))

docx_files

[PosixPath('/home/tufman/src/personal/ticket-agent/data/docs/company_overview.docx'),
 PosixPath('/home/tufman/src/personal/ticket-agent/data/docs/services_documentation.docx')]

In [7]:
from langchain_community.document_loaders import Docx2txtLoader

loaders = [Docx2txtLoader(file) for file in docx_files]

docs = []
for loader in loaders:
    docs.extend(loader.load())

len(docs)

2

In [8]:
print(docs[1].page_content)

EchoNova Audio – Paid Services Documentation



1. Extended Warranty

Description:
 Adds extra protection beyond the standard manufacturer’s warranty. Covers hardware defects, internal component failures, and general wear within coverage scope.

Plans & Pricing:

Coverage Length

Price (per device)

+1 year

$19.99

+2 years

$34.99

+3 years

$49.99

Included:

Replacement or repair for eligible malfunctions


Free two-way shipping for warranty claims


24/7 access to warranty claim portal


Exclusions:

Accidental damage (use Device Protection Plan)


Water or fire damage


Cosmetic wear (scratches, dents)




2. Device Protection Plan

Description:
 Full accident coverage including drops, spills, water exposure, and cracked components. Covers repairs or one-time full replacement.

Plans & Pricing (per device):

Device Category

Monthly

Annual

Headphones / Earbuds

$4.99

$49.99

Portable Speakers (<$250)

$5.99

$59.99

Premium Speakers (>$250)

$7.99

$79.99

Included:

Unlimited

#### Split docs 

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800, chunk_overlap=0, separators=["\n\n\n\n", "\n\n", "\n", " ", ""]
)

splits = text_splitter.split_documents(docs)

len(splits)

11

In [10]:
for doc in splits:
    print(len(doc.page_content))
    print(doc.page_content + "\n" + "-" * 100 + "\n")

97
Company Name: EchoNova Audio
 Document Title: Company Overview & Core Values
 Date: June 29, 2025
----------------------------------------------------------------------------------------------------

792
Company Overview

EchoNova Audio is a multinational retailer and support provider specializing in premium audio electronics and related services. We distribute world-class products such as wireless headphones, portable speakers, earbuds, and smart audio gear from top manufacturers including Sony, Bose, JBL, Apple, and Sennheiser.

Operating across the United States and key European markets—such as Germany, France, the UK, Netherlands, and Sweden—we serve both individual consumers and corporate clients with a seamless customer experience, fast logistics, and industry-recognized service standards.

We partner with respected B2B distributors such as TechDepot Europe, SmartAudio Direct, and HiFi Distribution Group, and also directly serve end users via our online and retail operations.


#### Vector Store

In [None]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langsmith import Client

client = Client()

vectorstore = Chroma.from_documents(
    documents=splits, embedding=HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
)

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 2}, name="Ticket Agent")

results = retriever.invoke("What is the company name?")

results

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


[Document(id='d5db00ae-4ee9-4dd4-b880-e8b63ac6df5e', metadata={'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.docx'}, page_content='Company Name: EchoNova Audio\n Document Title: Company Overview & Core Values\n Date: June 29, 2025'),
 Document(id='53fb3b6c-6d3a-40e6-9b7a-33191008bbc1', metadata={'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.docx'}, page_content='Mission Statement\n\nTo deliver premium audio solutions with industry-leading customer care, ensuring a smooth experience from purchase to long-term product support.\n\n\n\nCore Services\n\nRetail & Distribution: Headphones, speakers, and audio accessories from leading global brands.\n\nPaid Services: Extended warranties, device protection plans, music service bundles, and setup assistance.\n\nCustomer Support: Specialized departments for Tech Support, Billing, Shipping, Legal, Sales, and Customer Care.\n\nB2B Sales: Reseller partnerships, corporate bulk orders, infl

#### Multi Query Retriever

In [None]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_deepseek import ChatDeepSeek
from langchain_openai import ChatOpenAI

mq_llm = ChatOpenAI(temperature=0)

mq_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=mq_llm)

mq_retriever.invoke("What is the company name?")

[Document(id='d5db00ae-4ee9-4dd4-b880-e8b63ac6df5e', metadata={'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.docx'}, page_content='Company Name: EchoNova Audio\n Document Title: Company Overview & Core Values\n Date: June 29, 2025'),
 Document(id='53fb3b6c-6d3a-40e6-9b7a-33191008bbc1', metadata={'source': '/home/tufman/src/personal/ticket-agent/data/docs/company_overview.docx'}, page_content='Mission Statement\n\nTo deliver premium audio solutions with industry-leading customer care, ensuring a smooth experience from purchase to long-term product support.\n\n\n\nCore Services\n\nRetail & Distribution: Headphones, speakers, and audio accessories from leading global brands.\n\nPaid Services: Extended warranties, device protection plans, music service bundles, and setup assistance.\n\nCustomer Support: Specialized departments for Tech Support, Billing, Shipping, Legal, Sales, and Customer Care.\n\nB2B Sales: Reseller partnerships, corporate bulk orders, infl

#### Generation

In [None]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_prompt = hub.pull("rlm/rag-prompt")
gen_llm = ChatDeepSeek(model="deepseek-chat", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


chain = (
    {
        "context": mq_retriever | format_docs,
        "question": RunnablePassthrough(),
    }
    | rag_prompt
    | gen_llm
    | StrOutputParser()
)

In [None]:
query = "What is the company name?"
response = chain.invoke(query)
print(response)

The company name is EchoNova Audio.


#### Evaluation

In [None]:
general_docs_test_cases = [
    ("What is the name of the company?", "EchoNova Audio"),
    (
        "Which countries in Europe does EchoNova serve directly",
        "Germany, France, UK, Netherlands, Sweden.",
    ),
    (
        "What kind of audio products does EchoNova sell?",
        "Wireless headphones, portable speakers, earbuds, smart audio gear.",
    ),
    (
        "What are core values of the company?",
        "Responsiveness, Clarity, Respect, Trust and Flexibility",
    ),
    (
        "What does EchoNova mean by “Trust” in their core values?",
        "Secure billing, GDPR compliance, and accountable privacy practices.",
    ),
    (
        "What’s the main difference between the Extended Warranty and the Device Protection Plan?",
        "Extended Warranty covers hardware/internal failures over time; Protection Plan covers accidental damage like drops or water.",
    ),
    (
        "How much does it cost to protect a pair of headphones annually under the Device Protection Plan?",
        "$49.99 per year.",
    ),
    (
        "What’s included in the Premium Support Plan that speeds up issue resolution?",
        "Priority phone/chat, brand-trained agents, engineer access, 1–2 day RMA.",
    ),
    (
        "Can you get multiple Setup Assistance sessions in different languages?",
        "Yes, sessions are available in English, German, and French.",
    ),
]

product_manual_test_cases = []

In [None]:
from ragas import EvaluationDataset

dataset = []

for query_set, class_label in [
    (general_docs_test_cases, "general"),
    (product_manual_test_cases, "manual"),
]:
    for query, reference in query_set:
        dataset.append(
            {
                "user_input": query,
                "retrieved_contexts": [
                    doc.page_content for doc in mq_retriever.invoke(query)
                ],
                "response": chain.invoke(query),
                "reference": reference,
                "metadata": {"class": class_label},
            }
        )

evaluation_dataset = EvaluationDataset.from_list(dataset)

In [None]:
evaluation_dataset

EvaluationDataset(features=['user_input', 'retrieved_contexts', 'response', 'reference'], len=2)

In [None]:
from ragas import evaluate
from ragas.metrics import AnswerAccuracy, ContextRelevance, ResponseGroundedness

eval_llm = ChatDeepSeek(model="deepseek-chat", temperature=0)


result = evaluate(
    dataset=evaluation_dataset,
    metrics=[AnswerAccuracy(), ContextRelevance(), ResponseGroundedness()],
    llm=eval_llm,
).to_pandas()

Evaluating:   0%|          | 0/6 [00:00<?, ?it/s]

##### Results

In [None]:
result.mean(numeric_only=True)

nv_accuracy                 1.0
nv_context_relevance        1.0
nv_response_groundedness    1.0
dtype: float64