In [1]:
import os
from langchain.document_loaders import PyPDFLoader, CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OCIGenAIEmbeddings
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, END, MessagesState, StateGraph
from langchain.schema import AIMessage, HumanMessage, SystemMessage

In [2]:
from dotenv import load_dotenv
load_dotenv()

# Step 1: Load PDF and CSV Documents
def load_documents():
    # Load PDF Document
    pdf_loader = PyPDFLoader("demo.pdf")
    pdf_documents = pdf_loader.load()
    return pdf_documents
 
# Step 2: Split the documents into smaller chunks for better processing
def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100
    )
    split_docs = text_splitter.split_documents(documents)
    return split_docs

# Load and process documents
documents = load_documents()
chunks = split_documents(documents)

In [None]:

embeddings = OCIGenAIEmbeddings(
    model_id=os.getenv("CON_GEN_AI_EMB_MODEL_ID"),
    service_endpoint=os.getenv("CON_GEN_AI_SERVICE_ENDPOINT"),
    compartment_id=os.getenv("CON_GEN_AI_COMPARTMENT_ID"),
    auth_type="API_KEY",
    model_kwargs={"input_type": "SEARCH_DOCUMENT"}
)

db = FAISS.from_documents(chunks, embeddings)
retriever = db.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 50, 'fetch_k': 150}
)

In [None]:
chat = ChatOCIGenAI(
            model_id=os.getenv("CON_GEN_AI_CHAT_MODEL_ID"),
            service_endpoint=os.getenv("CON_GEN_AI_SERVICE_ENDPOINT"),
            compartment_id=os.getenv("CON_GEN_AI_COMPARTMENT_ID"),
            provider="meta",
            is_stream=True,
            auth_type=os.getenv("CON_GEN_AI_AUTH_TYPE"),
            model_kwargs={
                "max_tokens": 1024,
                "temperature": 0.6,
                "top_p": 0.7,
                "top_k": 20,
                "frequency_penalty": 0,
            },
        )

workflow = StateGraph(state_schema=MessagesState)

# Define the retriever function
def retrieve_relevant_documents(state: MessagesState):
    query = state["messages"][-1].content  # Extract last message as query
    retrieved_docs = retriever.invoke(query)  # Assuming retriever is defined
    retrieved_content = "\n".join([doc.page_content for doc in retrieved_docs])
    state["messages"].append(SystemMessage(content=f"Context: {retrieved_content}"))
    # print("state en el retrieve", state)
    return state  # Return updated state

# Define the function that calls the model
def call_model(state: MessagesState):
    system_prompt = (
        "You are a helpful assistant that call me by my name if posible. "
        "Use the following retrieved documents as context to provide accurate responses. "
        "Answer all questions to the best of your ability and if the answer is not in the context, respond with 'The question is out of my context.' ."
    )

    messages = [SystemMessage(content=system_prompt)] + state["messages"]
    response = chat.invoke(messages)
    return {"messages": response}

# Define the nodes and edges
workflow.add_node("retriever", retrieve_relevant_documents)
workflow.add_node("model", call_model)
workflow.add_edge(START, "retriever")
workflow.add_edge("retriever", "model")
workflow.add_edge("model", END)  # Add end node



<langgraph.graph.state.StateGraph at 0x1a1ac0eb0e0>

In [None]:
def translate_chat_history(lista):
    translated_history = []    
    for item in lista:
        role = item['role'].lower()
        message = item['message']
        if role == 'human':
            translated_history.append(HumanMessage(content=message))
        elif role == 'assistant':
            translated_history.append(AIMessage(content=message))    
    return translated_history

# Example usage:
import json
body = {
    "p_query":"what are the top risks mentioned in the document?",
    "p_role" :"private",
    "p_history": "[{\"role\":\"Assistant\",\"message\":\"Hello, what is your name?\"},\
    {\"role\":\"Human\",\"message\":\"Hello, my name is Evelyn\"}]"
}
p_history = json.loads(body['p_history'])
chat_history = translate_chat_history(p_history)

# Add simple in-memory checkpointer
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)
response  = app.invoke(
    {
        "messages": chat_history
        + [HumanMessage(content=body["p_query"])]
    },
    config={"configurable": {"thread_id": "1"}},
)
response["messages"][-1].pretty_print()



Hola Evelyn, 

The top risks mentioned in the document include:

1. Dependence on advertising revenue: The company generates a significant portion of its revenue from advertising, and any decline in advertising spending or loss of partners could harm its business.

2. Competition: The company faces intense competition in the technology industry, which could lead to a loss of users, advertisers, and revenue.

3. Regulatory risks: The company is subject to various laws and regulations, including those related to data protection, intellectual property, and antitrust, and any failure to comply with these laws could result in significant liabilities and penalties.

4. Cybersecurity risks: The company is vulnerable to cyber attacks, which could compromise user data and harm its business.

5. International risks: The company's international operations expose it to additional risks, including restrictions on foreign ownership and investments, import and export requirements, and fluctuations i