In [None]:
# Ref: https://python.langchain.com/docs/tutorials/chatbot/
# Monitor and evaluate an LLM application using LangSmith
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass(prompt="LangChain API Key: ")
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "{PROJECT_NAME}"

# Ref: https://python.langchain.com/docs/integrations/chat/ollama/
# Instantiation of model using Ollama
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="tinyllama",
    temperatute=0.3
)

# Ref: https://python.langchain.com/docs/tutorials/rag/
# Ref: https://www.educative.io/blog/ollama-guide
# Ref: https://community.deeplearning.ai/t/try-filtering-complex-metadata-from-the-document-using-langchain-community-vectorstores-utils-filter-complex-metadata/628474/2
# Using Chroma as the vector store
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores.utils import filter_complex_metadata
embeddings=OllamaEmbeddings(model="tinyllama");
vector_store = Chroma(embedding_function=embeddings)

In [None]:
# Ref: https://python.langchain.com/docs/integrations/chat/ollama/
# Ref: https://python.langchain.com/docs/how_to/sequence/
# Ref: https://github.com/REZ3LIET/personal_chatbot/blob/main/Scripts/qa_chatbot.py
# Ref: https://medium.com/@ankit_data_scientist/end-to-end-creation-and-deployment-of-chatbot-with-ollama-langchain-langserve-and-langsmith-5b2f6f500c37
# Ref: https://python.langchain.com/docs/tutorials/rag/
# Ref: https://github.com/langchain-ai/langchain/issues/4838
# Ref: https://python.langchain.com/docs/integrations/document_loaders/browserbase/
# Ref: https://python.langchain.com/docs/integrations/document_loaders/firecrawl/
# Ref: https://python.langchain.com/api_reference/unstructured/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html

import bs4
from langchain import hub
from langchain_community.document_loaders.firecrawl import FireCrawlLoader
#from langchain_unstructured import UnstructuredLoader
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# load content on the page 
WEB_PATH = "https://en.wikipedia.org/wiki/Baseball"
# WEB_PATH = "https://onlinelibrary.wiley.com/doi/full/10.1002/mef2.70004"
FIRECRAWL_API_KEY = "{FIRECRAWL_API_KEY}"
web_loader = FireCrawlLoader(
    api_key=FIRECRAWL_API_KEY, url=WEB_PATH, mode="scrape"
)
web_docs = web_loader.load()

# split text into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
split_web_docs = text_splitter.split_documents(web_docs)
chunk_index = vector_store.add_documents(documents=filter_complex_metadata(split_web_docs))

In [None]:
# Ref: https://python.langchain.com/v0.2/docs/tutorials/rag/
prompt = hub.pull("rlm/rag-prompt")

In [None]:
# Define state (data structure) for the RAG application, which consists of question, context, and answer
# Ref: https://python.langchain.com/docs/tutorials/rag/
# Ref: https://langchain-ai.github.io/langgraph/concepts/low_level/#graphs
# Ref: https://www.getzep.com/ai-agents/langgraph-tutorial
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [None]:
# Define retrieval step
# Ref: https://python.langchain.com/docs/tutorials/rag/

def retrieve(state: State):
    retrieved_info = vector_store.similarity_search(state["question"])
    return {"context": retrieved_info}

In [None]:
# Define generate step
# Ref: https://python.langchain.com/docs/tutorials/rag/
def generate(state: State):
    context_content = "\n\n".join(ext_doc.page_content for ext_doc in state["context"])
    ext_messages = prompt.invoke({"question": state["question"],"context": context_content})
    response = llm.invoke(ext_messages)
    return {"answer": response.content}

In [None]:
# Using LangGraph to implement an application, consisting of retrieval and generation steps
# Ref: https://www.getzep.com/ai-agents/langgraph-tutorial
# Ref: https://python.langchain.com/docs/tutorials/rag/

graph_builder = StateGraph(State).add_sequence([retrieve,generate])
graph_builder.add_edge(START,"retrieve")
graph = graph_builder.compile()

In [None]:
# Ref: https://python.langchain.com/v0.2/docs/tutorials/rag/

user_question = input("Enter your question about baseball: ")

response = graph.invoke({"question": user_question})

print(f'Answer: {response["answer"]}\n\n')

In [None]:
# clear the database
#vector_store.delete_collection()