In [1]:
import ads
import os
import tempfile

from ads.llm.deploy import ChainDeployment

from langchain.chains import RetrievalQA
from langchain_community.chat_models import ChatOCIGenAI
from langchain_community.document_loaders import UnstructuredPowerPointLoader
from langchain_community.embeddings import OCIGenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter

ads.set_auth(auth="resource_principal")

### Load Documents
file_extension = ".pptx"
directory_path = "/home/datascience/demo/pptx"
documents = []

for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)
    if filename.endswith(file_extension) and os.path.isfile(file_path):
        loader = UnstructuredPowerPointLoader(file_path)
        documents.extend(loader.load())

# The 'data' variable now holds a list of LangChain Document objects
# You can inspect the content and metadata of the documents:
#print(documents[0].page_content)
#print(documents[0].metadata)

## Split Text
# Initialize the RecursiveCharacterTextSplitter
# chunk_size: The maximum size of each chunk (in characters by default).
# chunk_overlap: The number of characters to overlap between consecutive chunks,
#                helping to maintain context.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,  # Use character length for chunk size
    is_separator_regex=False, # Treat separators literally
)

# Split the loaded documents
split_docs = text_splitter.split_documents(documents)

# Print the resulting chunks
print(f"Number of original documents: {len(documents)}")
print(f"Number of split chunks: {len(split_docs)}\n")

#for i, chunk in enumerate(split_docs):
#    print(f"Chunk {i+1}:\n{chunk.page_content}\n---")


Number of original documents: 3
Number of split chunks: 588



In [2]:
### Connect to OCI embeddings and generative AI

oci_embeddings = OCIGenAIEmbeddings(
    model_id="cohere.embed-english-light-v3.0",
    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
    compartment_id="ocid1.compartment.oc1..aaaaaaaa52sp42nqmtwwzzvmp5mmldri26razhrbyw7cvixmims7p5crsg7a",
)

oci_chat = ChatOCIGenAI(
    model_id="cohere.command-a-03-2025",
    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
    compartment_id="ocid1.compartment.oc1..aaaaaaaa52sp42nqmtwwzzvmp5mmldri26razhrbyw7cvixmims7p5crsg7a",
    model_kwargs={"temperature": 0.7, "max_tokens": 500},
)

print("Connected to OCI embeddings and generative AI.")

Connected to OCI embeddings and generative AI.


In [3]:
### Build Vector Store (FAISS)
vectorstore = FAISS.from_documents(split_docs, oci_embeddings)

print("FAISS vector store built.")

INFO:faiss.loader:Loading faiss.
INFO:faiss.loader:Successfully loaded faiss.
FAISS vector store built.


In [4]:
### Build Chain (OCI chat-based Retrieval QA)
retriever = vectorstore.as_retriever()

#rag_prompt_template = """Answer the question based only on the following context:
#{context}
#Question: {question}
#"""

#rag_prompt_template = """Try to the question using only the following context, but, if that fails, use your general knowledge.  You don't have to mention whether you did or did not use the context.
#{context}
#Question: {question}
#"""

rag_prompt_template = """Try to the question using only the following context, but, if that fails, use your general knowledge.
{context}
Question: {question}
"""

rag_prompt = PromptTemplate(template=rag_prompt_template, input_variables=["context", "question"])

#rag = RetrievalQA.from_chain_type(
#    llm=oci_chat,
#    retriever=retriever,
#    chain_type_kwargs={"prompt": rag_prompt,},
#)

rag = RetrievalQA.from_chain_type(
    llm=oci_chat,
    retriever=retriever,
)

print("RAG chain built.")

RAG chain built.


In [5]:
### Invoke the chain (unit test)
print(rag.invoke("What are the 4 pillars of performance?"))
print(rag.invoke("Why is Productivity important?"))
print(rag.invoke("Can I ignore any of the 4 pillars of performance?"))
print(rag.invoke("Which customers has Yudhvir worked with?"))

{'query': 'What are the 4 pillars of performance?', 'result': 'The 4 pillars of performance, as outlined in the provided context, are:\n\n1. **Growth**  \n2. **Productivity**  \n3. **Health**  \n4. **Innovation**  \n\nThese pillars form the basis of the "Evolving 4-Pillar Approach" for evaluating or driving performance.'}
{'query': 'Why is Productivity important?', 'result': "Productivity is important because it measures an individual's contribution to revenue. This metric helps assess how effectively an employee is generating value for the organization, which is crucial for understanding performance and making informed decisions about resource allocation and growth strategies."}
{'query': 'Can I ignore any of the 4 pillars of performance?', 'result': 'Based on the provided context, it seems that the 4 pillars of performance (Growth, Productivity, Health, and Innovation) are presented as an "Evolving 4-Pillar Approach," which implies that they are interconnected and work together to dr

In [6]:
#### Use ADS to deploy the unit-tested chain
### Create the ADS deployment object
#artifact_dir = tempfile.mkdtemp()
#
#ads_deployment = ChainDeployment(
#    chain=rag,
#    artifact_dir=artifact_dir,
#    force_overwrite=True
#)
#
#ads_deployment.summary_status()

In [7]:
## Prepare the ADS deployment
#ads_deployment.prepare(
#    inference_conda_env="python_p312_any_x86_64_v1",
#    inference_python_version="3.12",
#)
#
## Summarize the checkpoitn ADS workflow status
#ads_deployment.summary_status()