In [1]:
# Ref: https://python.langchain.com/docs/tutorials/chatbot/
# Monitor and evaluate an LLM application using LangSmith
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass(prompt="LangChain API Key: ")
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "{PROJECT_NAME}"

# Ref: https://python.langchain.com/docs/tutorials/rag/
# Ref: https://www.educative.io/blog/ollama-guide
# Ref: https://community.deeplearning.ai/t/try-filtering-complex-metadata-from-the-document-using-langchain-community-vectorstores-utils-filter-complex-metadata/628474/2
# Using Chroma as the vector store
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores.utils import filter_complex_metadata
embeddings=OllamaEmbeddings(model="tinyllama");
vector_store = Chroma(embedding_function=embeddings)

LangChain API Key:  ········


In [2]:
# Ref: https://python.langchain.com/docs/integrations/chat/ollama/
# Ref: https://python.langchain.com/docs/how_to/sequence/
# Ref: https://github.com/REZ3LIET/personal_chatbot/blob/main/Scripts/qa_chatbot.py
# Ref: https://medium.com/@ankit_data_scientist/end-to-end-creation-and-deployment-of-chatbot-with-ollama-langchain-langserve-and-langsmith-5b2f6f500c37
# Ref: https://python.langchain.com/docs/tutorials/rag/
# Ref: https://github.com/langchain-ai/langchain/issues/4838
# Ref: https://python.langchain.com/docs/integrations/document_loaders/browserbase/
# Ref: https://python.langchain.com/docs/integrations/document_loaders/firecrawl/
# Ref: https://python.langchain.com/api_reference/unstructured/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html

import bs4
from langchain_community.document_loaders.firecrawl import FireCrawlLoader
#from langchain_unstructured import UnstructuredLoader
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing_extensions import List, TypedDict

# load content on the page 
WEB_PATH = "https://en.wikipedia.org/wiki/Baseball"
FIRECRAWL_API_KEY = "{FIRECRAWL_API_KEY}"
web_loader = FireCrawlLoader(
    api_key=FIRECRAWL_API_KEY, url=WEB_PATH, mode="scrape"
)
web_docs = web_loader.load()

# split text into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
split_web_docs = text_splitter.split_documents(web_docs)
chunk_index = vector_store.add_documents(documents=filter_complex_metadata(split_web_docs))

In [3]:
%%writefile graph.py
# Define state (data structure) for the RAG application, which consists of question, context, and answer
# Ref: https://python.langchain.com/docs/tutorials/rag/
# Ref: https://langchain-ai.github.io/langgraph/concepts/low_level/#graphs
# Ref: https://www.getzep.com/ai-agents/langgraph-tutorial
# Ref: https://mlflow.org/blog/langgraph-model-from-code
# Ref: https://python.langchain.com/v0.2/docs/tutorials/rag/
# Ref: https://python.langchain.com/docs/integrations/chat/ollama/

from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langgraph.graph.state import CompiledStateGraph
from langchain_core.documents import Document
import mlflow
from langchain import hub
from langchain_ollama import ChatOllama


def build_graph(vector_store) -> CompiledStateGraph:
    # Instantiation of model using Ollama
    llm = ChatOllama(
        model="tinyllama",
        temperatute=0
    )

    prompt = hub.pull("rlm/rag-prompt")
    
    class State(TypedDict):
        question: str
        context: List[Document]
        answer: str

# Define retrieval step
# Ref: https://python.langchain.com/docs/tutorials/rag/

    def retrieve(state: State):
        retrieved_info = vector_store.similarity_search(state["question"])
        return {"context": retrieved_info}

# Define generate step
# Ref: https://python.langchain.com/docs/tutorials/rag/
    def generate(state: State):
        context_content = "\n\n".join(ext_doc.page_content for ext_doc in state["context"])
        ext_messages = prompt.invoke({"question": state["question"],"context": context_content})
        response = llm.invoke(ext_messages)
        return {"answer": response.content}
    
# Using LangGraph to implement an application, consisting of retrieval and generation steps
# Ref: https://www.getzep.com/ai-agents/langgraph-tutorial
# Ref: https://python.langchain.com/docs/tutorials/rag/

    graph_builder = StateGraph(State).add_sequence([retrieve,generate])
    graph_builder.add_edge(START,"retrieve")
    graph = graph_builder.compile()
    return graph

Overwriting graph.py


In [4]:
import mlflow
from graph import build_graph
from typing_extensions import TypedDict
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langgraph.graph.state import CompiledStateGraph

mlflow.models.set_model(build_graph(vector_store))

In [5]:
# Using MLFlow to track 
# Ref: https://mlflow.org/docs/latest/getting-started/running-notebooks/index.html
# Ref: https://python.langchain.com/docs/integrations/providers/mlflow_tracking/
# Ref: https://mlflow.org/docs/latest/getting-started/intro-quickstart/index.html
# Ref: https://mlflow.org/docs/latest/llms/langchain/guide/index.html
# Ref: https://mlflow.org/docs/latest/llms/langchain/notebooks/langchain-retriever.html
# Ref: https://mlflow.org/blog/langgraph-model-from-code
import mlflow
from graph import build_graph

mlflow.set_tracking_uri(uri="http://127.0.0.1:8081")

with mlflow.start_run():
    model_info = mlflow.langchain.log_model(
        lc_model="graph.py",
        artifact_path="chatbot_rag"
    )
    model_uri=model_info.model_uri

# Enable tracing
mlflow.set_experiment("{EXPERIMENT_NAME}")
mlflow.langchain.autolog()

# Load the model
loaded_mlflow_model = mlflow.langchain.load_model(model_uri)



🏃 View run dapper-dolphin-13 at: http://127.0.0.1:8081/#/experiments/0/runs/33a798dcc2af419fa50b321068e04986
🧪 View experiment at: http://127.0.0.1:8081/#/experiments/0


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [7]:
# Ref: https://python.langchain.com/v0.2/docs/tutorials/rag/

user_question = input("Enter your question about baseball: ")
payload = {"question": user_question}
response = loaded_mlflow_model.invoke(payload)
print(f'Answer: {response.get("answer")}\n\n')

Enter your question about baseball:  how many strikes are needed to get a batter out?


Answer: Question: How many strikeouts are needed to get a batter out in amateur and professional play around the world?

Context: Baseball's history is as deep-rooted as it is on North American and American continents. The basic fielding statistics include [runners scored per inning](/wiki/Run_per_inning "Run per inning") and [runners stolen per inning](/wiki/Stolen_bases_per_inning "Runners stolen per inning"), but there is no game clock or pace-of-play regulation.

References:
[153] (cite_note) [Pitch Clock](https://en.wikipedia.org/wiki/PiTCh_clock).




In [None]:
# clear the database
#vector_store.delete_collection()