In [None]:
%%capture --no-stderr
%pip install -U langgraph langchain_openai langchain_community pypdf chromadb

In [None]:
from google.colab import userdata
from openai import OpenAI
import os

api_key = userdata.get('OPENAI_API_KEY')
MODEL="gpt-4o-mini"

# Langsmith, comment away if not needed
# os.environ["LANGCHAIN_TRACING_V2"] = userdata.get('LANGCHAIN_TRACING_V2')
# os.environ["LANGCHAIN_ENDPOINT"] = userdata.get('LANGCHAIN_ENDPOINT')
# os.environ["LANGCHAIN_PROJECT"] = userdata.get('LANGCHAIN_PROJECT')
# os.environ["LANGCHAIN_API_KEY"] = userdata.get('LANGCHAIN_API_KEY')

In [None]:
from typing_extensions import TypedDict
from IPython.display import Image, display
from langgraph.graph import StateGraph, START, END, MessagesState
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
import os
from pydantic import BaseModel, Field
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage

In [None]:
current_directory = os.getcwd()
print(f"The current working directory is: {current_directory}")

# List contents of the current directory
print("\nContents of the current directory:")
for item in os.listdir(current_directory):
    print(item)

In [None]:
file_name = "grokkin-paper.pdf"
pdf_path = f"{file_name}"

# Check if the file exists
if not os.path.exists(pdf_path):
    print(f"Error: The file {pdf_path} does not exist.")
    # You might want to raise an exception here or provide alternative paths
    raise FileNotFoundError(f"The file {pdf_path} does not exist.")

In [None]:
pdf_paths = [pdf_path]
docs = []
for path in pdf_paths:
    loader = PyPDFLoader(path)
    docs.extend(loader.load())

# 2. Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
                                                                    chunk_size=1000,
                                                                    chunk_overlap=200,
                                                                    )
doc_splits = text_splitter.split_documents(docs)

# 3. Create embeddings and store in vector database
vectorstore = Chroma.from_documents(
                                    documents=doc_splits,
                                    collection_name="pdf-rag",
                                    embedding=OpenAIEmbeddings(api_key=api_key),
                                    )

# 4. Create a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [None]:
from langchain.tools.retriever import create_retriever_tool

text = "Search paper and return information about grokkin in neural networks."
retriever_tool = create_retriever_tool(
                                        retriever,
                                        "retrieve_grokkin_info",
                                        text,
                                        )

tools = [retriever_tool]

In [None]:
from langgraph.checkpoint.memory import MemorySaver

memory = MemorySaver()

In [None]:
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import ToolNode
from langgraph.prebuilt import tools_condition

llm = ChatOpenAI(api_key=api_key,model=MODEL)


def bot_node(state: MessagesState) -> MessagesState:
  print(state)
  while True:
      try:
          model = llm.bind_tools(tools)
          result = model.invoke(state["messages"]) # 30 seconds timeout

          if not result.tool_calls and (
              not result.content
              or isinstance(result.content, list)
              and not result.content[0].get("text")
          ):
              # Add a message to request a valid response
              messages = state["messages"] + [("user", "Respond with a real output.")]
              state = {**state, "messages": messages}
          else:
              # Break the loop when valid output is obtained
              break
      except TimeoutError:
          print(e)
          # Handle timeout
          return {"messages": [("system", "Operation failed, timeout error. Please try again.")]}
      except Exception as e:
          # Handle other exceptions
          print(e)
          return {"messages": [("system", "An error occurred. Please try again.")]}

  # Return the final state after processing the runnable
  return {"messages": result}


graph_flow = StateGraph(MessagesState)
graph_flow.add_node("chatbot", bot_node)
retrieve = ToolNode(tools)
graph_flow.add_node("retrieve", retrieve)

graph_flow.add_conditional_edges(
  "chatbot",
  # Assess agent decision
  tools_condition,
  {
      # Translate the condition outputs to nodes in our graph
      "tools": "retrieve",
      END: END,
  },
)

graph_flow.add_edge(START, "chatbot")
graph_flow.add_edge("retrieve", "chatbot")


graph = graph_flow.compile(checkpointer=memory)

In [None]:
try:
    display(Image(graph.get_graph().draw_mermaid_png()))
except Exception as e:
    print(e)

In [None]:
# Another way of building the chat
thread_id = "12222"
config = {"configurable": {"thread_id": thread_id}}

while True:
    user_input = input("\nUser: ")
    if user_input == "q":
        break
    events = graph.stream(
        {"messages": [HumanMessage(content=user_input)]}, config, stream_mode="values"
    )
    for event in events:
        event["messages"][-1].pretty_print()