In [33]:
%pwd

'c:\\Users\\em\\Desktop\\PFA\\Customer_Support_LLMOps'

In [4]:
import os 
os.chdir("../")

%pwd

'c:\\Users\\em\\Desktop\\PFA\\Customer_Support_LLMOps'

In [34]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [35]:
# Extract text from PDF files
def load_pdf_files(data):
    loader = DirectoryLoader(
        data,                  # path to the folder containing PDF files
        glob="*.pdf",          # load only files ending with .pdf
        loader_cls=PyPDFLoader # use PyPDFLoader for each file
    )

    documents = loader.load()
    return documents

In [36]:
extracted_data = load_pdf_files("data")
extracted_data

[Document(metadata={'producer': 'ReportLab PDF Library - www.reportlab.com', 'creator': '(unspecified)', 'creationdate': '2025-08-13T21:14:05+00:00', 'author': '(anonymous)', 'keywords': '', 'moddate': '2025-08-13T21:14:05+00:00', 'subject': '(unspecified)', 'title': '(anonymous)', 'trapped': '/False', 'source': 'data\\CODNetwork_Complete_Updated.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content="CODNetwork\nWe Enable Everyone to Start Grow and Scale Ecommerce Business in\nthe Middle East, Africa and Beyond.\nSourcing Fulfillment Call-Center Warehousing Shipping Remittance\n Join us today!\nYou can join us today by choosing the model that suits you. We offer both Seller and Affiliate\nmodels.\nCODNetwork Seller :\nStart selling your own products and goods in multiple countries in MENA region by taking\nadvantage of our services. Transform Your Cash on Delivery Business with COD Network Services.\nFrom sourcing to delivery, we manage your business needs securely, effic

In [37]:
len(extracted_data)

6

In [38]:
from typing import List
from langchain.schema import Document

def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, return a new list of Document objects
    containing only 'source' in metadata and the original page_content.
    """
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )
    return minimal_docs

In [39]:
minimal_docs = filter_to_minimal_docs(extracted_data)
minimal_docs

[Document(metadata={'source': 'data\\CODNetwork_Complete_Updated.pdf'}, page_content="CODNetwork\nWe Enable Everyone to Start Grow and Scale Ecommerce Business in\nthe Middle East, Africa and Beyond.\nSourcing Fulfillment Call-Center Warehousing Shipping Remittance\n Join us today!\nYou can join us today by choosing the model that suits you. We offer both Seller and Affiliate\nmodels.\nCODNetwork Seller :\nStart selling your own products and goods in multiple countries in MENA region by taking\nadvantage of our services. Transform Your Cash on Delivery Business with COD Network Services.\nFrom sourcing to delivery, we manage your business needs securely, efficiently, and globally.\nOur Solutions\nEnabling online sellers with efficient, quality-focused procurement to delivery solutions, freeing you\nto innovate and market.\nAccount Manager\nYour Ultimate Partner in Business Growth. With an Account Manager at COD Network, reach your\ngoals effortlessly. We enhance your sales and delivery

### Split the documents into smaller chunks

In [40]:
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
    )
    texts_chunk = text_splitter.split_documents(minimal_docs)
    return texts_chunk

In [41]:
texts_chunk = text_split(minimal_docs)
print(f"Number of chunks: {len(texts_chunk)}")
texts_chunk

Number of chunks: 22


[Document(metadata={'source': 'data\\CODNetwork_Complete_Updated.pdf'}, page_content='CODNetwork\nWe Enable Everyone to Start Grow and Scale Ecommerce Business in\nthe Middle East, Africa and Beyond.\nSourcing Fulfillment Call-Center Warehousing Shipping Remittance\n Join us today!\nYou can join us today by choosing the model that suits you. We offer both Seller and Affiliate\nmodels.\nCODNetwork Seller :\nStart selling your own products and goods in multiple countries in MENA region by taking\nadvantage of our services. Transform Your Cash on Delivery Business with COD Network Services.'),
 Document(metadata={'source': 'data\\CODNetwork_Complete_Updated.pdf'}, page_content='From sourcing to delivery, we manage your business needs securely, efficiently, and globally.\nOur Solutions\nEnabling online sellers with efficient, quality-focused procurement to delivery solutions, freeing you\nto innovate and market.\nAccount Manager\nYour Ultimate Partner in Business Growth. With an Account Ma

### embedding model

In [42]:
from langchain_huggingface import HuggingFaceEmbeddings

def download_embeddings():
    """
    Download and return the HuggingFace embeddings model.
    """
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name
    )
    return embeddings

embedding = download_embeddings()

In [43]:
embedding

HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [56]:
# load the .env file
from dotenv import load_dotenv
import os
load_dotenv()

# Access the API keys
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")


os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [45]:
from pinecone import Pinecone 
pinecone_api_key = PINECONE_API_KEY

pinecone_client = Pinecone(api_key=pinecone_api_key)
pinecone_client

<pinecone.pinecone.Pinecone at 0x29ca6c94aa0>

In [46]:
from pinecone import ServerlessSpec 

index_name = "customer-support-chatbot-with-llmops-index"

if not pinecone_client.has_index(index_name):
    pinecone_client.create_index(
        name = index_name,
        dimension=384,                                          # Dimension of the embeddings
        metric= "cosine",                                       # Cosine similarity
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )


index = pinecone_client.Index(index_name)

In [47]:
# store our vectors
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=texts_chunk,
    embedding=embedding,
    index_name=index_name
)



In [48]:
# Load Existing index 

# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding
)


In [49]:
# retrieve some documents
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
retrieved_docs = retriever.invoke("What is cod network?")
retrieved_docs

[Document(id='ab52ce50-efa5-46fa-972d-33f790e68490', metadata={'source': 'data\\CODNetwork_Info.pdf'}, page_content="An all-in-one digital platform COD Network's Affiliate platform is a service that allows you to sell\nproducts without having capital to buy the commodity wholesale. We provide all services from\nimporting goods, call centers, warehousing, shipping and collecting funds and sending them to our\nclient accounts.\nWe help people achieve and earn highly profitable commissions through their marketing of the\nproducts we make available to them on the Affiliate website."),
 Document(id='e5048b18-850c-4fbf-935f-436c65b58bc0', metadata={'source': 'data\\CODNetwork_Complete_Updated.pdf'}, page_content="An all-in-one digital platform\nCOD Network's Affiliate platform is a service that allows you to sell products without having capital\nto buy the commodity wholesale. We provide all services from importing goods, call centers,\nwarehousing, shipping and collecting funds and sending 

### Refine the response with the LLM model (GPT4)

In [50]:
# import the GPT-4o model
from langchain_openai import ChatOpenAI

chatModel = ChatOpenAI(model="gpt-4o-mini")

In [51]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [53]:
system_prompt = (
    "You are CODNetwork’s official customer support assistant.\n"
    "Provide clear, concise, and polite answers to customer questions.\n"
    "Use ONLY the provided context from CODNetwork’s official documentation, FAQs, and product/service descriptions.\n"
    "If the answer is not in the context, politely say you don’t have that information and suggest contacting human support.\n"
    "Always maintain a professional and friendly tone consistent with CODNetwork’s brand.\n"
    "Prioritize accuracy over creativity.\n"
    "Never make up information, policies, or offers.\n"
    "Use simple language customers can understand.\n"
    "If a question is unclear, ask for clarification before answering.\n"
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [54]:
question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [55]:
response = rag_chain.invoke({"input": "what is codNetwork"})
print(response["answer"])

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-or-v1*************************************************************2712. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

### Refine the response with the LLM model (Google Gimini)

In [28]:
from langchain_google_genai import ChatGoogleGenerativeAI

chatModel = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",  # or another available variant
    temperature=0.4,
    max_tokens=500
)  # :contentReference[oaicite:1]{index=1}

In [29]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [30]:
# Prompt setup
system_prompt = (
    "You are CODNetwork’s official customer support assistant.\n"
    "Provide clear, concise, and polite answers to customer questions.\n"
    "Use ONLY the provided context from CODNetwork’s official documentation, FAQs, and product/service descriptions.\n"
    "If the answer is not in the context, politely say you don’t have that information and suggest contacting human support.\n"
    "Always maintain a professional and friendly tone consistent with CODNetwork’s brand.\n"
    "Prioritize accuracy over creativity.\n"
    "Never make up information, policies, or offers.\n"
    "Use simple language customers can understand.\n"
    "If a question is unclear, ask for clarification before answering.\n"
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [31]:
question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [34]:
response = rag_chain.invoke({"input": "where codNetwork is working in which countries?"})
print(response["answer"])

CODNetwork enables e-commerce businesses to start, grow, and scale in the Middle East, Africa, and beyond. For sellers, you can sell your products in multiple countries within the MENA region.


# Test Code 