In [1]:
import os
import gradio as gr
import requests
import re
import json
from bs4 import BeautifulSoup
from langchain.docstore.document import Document

from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.agents import AgentType, initialize_agent, Tool
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_experimental.tools import PythonREPLTool
from langchain_community.utilities import SerpAPIWrapper

# --- Configuration ---
os.environ["SERPAPI_API_KEY"] = "7ffdbf28402b8d9ea42603fe9c84e6cb7e963a17ba330bcc120abc8f219b6548"

LLM_MODEL = "llama3:latest"
EMBEDDING_MODEL = "nomic-embed-text"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --- Initialize LLM and Embeddings ---
llm = ChatOllama(model=LLM_MODEL, temperature=0.0)
embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)

In [3]:
# --- Document URLs to Scrape ---
LANGCHAIN_DOC_URLS = [
    "https://python.langchain.com/docs/get_started/introduction",
    "https://python.langchain.com/docs/how_to/migrate_agent/",
    "https://python.langchain.com/docs/versions/migrating_chains/",
    "https://python.langchain.com/docs/integrations/llms/ollama",
    "https://python.langchain.com/docs/integrations/vectorstores/chroma",
    "https://python.langchain.com/docs/concepts/#llms",
    "https://python.langchain.com/docs/tutorials/rag/",
    "https://python.langchain.com/docs/expression_language/",
    "https://python.langchain.com/docs/integrations/document_loaders/web_base/",
    "https://python.langchain.com/docs/integrations/tools/searx_search/",
]


scraped_docs_dir = "scraped_langchain_docs"
persist_db_directory = "langchain_chroma_db_persistent"
collection_name = "langchain_docs_scraped_persistent_v1"

os.makedirs(scraped_docs_dir, exist_ok=True)

In [4]:
# --- Scraping Function ---
def scrape_and_extract_text(url):
    print(f"Scraping: {url}")
    try:
        headers = {'User-Agent': 'Mozilla/5.0'}
        resp = requests.get(url, headers=headers, timeout=30)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.content, 'html.parser')
        
        # Try more specific selectors first
        main_content_selectors = [
            'article[role="main"]',
            'main',
            'div.theme-doc-markdown', # Docusaurus
            'div[class*="markdown"]',
            'div.content',
            'article'
        ]
        main = None
        for selector in main_content_selectors:
            main = soup.select_one(selector)
            if main:
                break
        
        if main:
            for tag_selector in ['script', 'style', 'nav', 'footer', 'header', 'aside', '.toc', 'button[class*="theme-edit-this-page"]']:
                for tag in main.select(tag_selector):
                    tag.decompose()
            parts = [p.get_text(separator=' ', strip=True) for p in main.find_all(['h1','h2','h3','h4','p','li','code','pre','td','th'])]
            text = "\n\n".join(filter(None, parts))
            if not text.strip() and len(text) < 200: # Fallback for less structured pages
                text = main.get_text(separator='\n', strip=True)
        else:
            print(f"  Warning: Could not find a specific main content element for {url}. Using body.")
            for tag in soup(['script','style','nav','footer','header','aside']):
                tag.decompose()
            text = soup.body.get_text(separator='\n', strip=True) if soup.body else ''
        
        print(f"  Successfully scraped. Text length: {len(text)}")
        return text.strip()
    except requests.exceptions.Timeout:
        print(f"  Timeout error scraping {url}")
        return None
    except requests.exceptions.RequestException as e:
        print(f"  Request error scraping {url}: {e}")
        return None
    except Exception as e:
        print(f"  Generic error processing {url}: {e}")
        return None


In [5]:
# --- Load or Build Vector Store ---
vectorstore = None
retriever = None

if embeddings:
    if os.path.isdir(persist_db_directory) and any(os.scandir(persist_db_directory)):
        print(f"Attempting to load vector store from: {persist_db_directory}")
        try:
            vectorstore = Chroma(
                persist_directory=persist_db_directory,
                embedding_function=embeddings,
                collection_name=collection_name
            )
            if vectorstore._collection.count() > 0:
                retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
                print(f"Successfully loaded vector store with {vectorstore._collection.count()} documents.")
            else:
                print("Vector store loaded but appears to be empty. Will rebuild.")
                retriever = None
        except Exception as e:
            print(f"Error loading vector store: {e}. Will attempt to rebuild.")
            retriever = None
    else:
        print(f"Vector store not found at {persist_db_directory} or is empty. Building new one.")

    if not retriever:
        print("Building new vector store...")
        docs = []
        for i, url in enumerate(LANGCHAIN_DOC_URLS):
            # Sanitize filename from URL
            sanitized_url_part = re.sub(r'[^a-zA-Z0-9_-]+', '_', url.split("//")[-1])
            filename = os.path.join(scraped_docs_dir, f"doc_{i}_{sanitized_url_part[:50]}.txt")
            text = None
            
            if os.path.exists(filename):
                with open(filename, 'r', encoding='utf-8') as f:
                    text = f.read().strip()
                if text:
                    print(f"Loaded cached content from {filename}")
                else:
                    print(f"Cached file {filename} is empty. Re-scraping.")
                    text = None
            
            if not text:
                text = scrape_and_extract_text(url)
                if text:
                    with open(filename, 'w', encoding='utf-8') as f:
                        f.write(text)
                    print(f"Saved scraped content to {filename}")
                else:
                    print(f"Failed to scrape or extract text from {url}. Skipping.")
                    continue
            
            if text:
                docs.append(Document(page_content=text, metadata={"source": url}))

        if docs:
            print(f"Collected {len(docs)} documents for vector store.")
            splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
            chunks = splitter.split_documents(docs)
            print(f"Split documents into {len(chunks)} chunks.")
            
            try:
                vectorstore = Chroma.from_documents(
                    documents=chunks,
                    embedding=embeddings,
                    collection_name=collection_name,
                    persist_directory=persist_db_directory
                )
                vectorstore.persist()
                retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
                print(f"Successfully built and persisted vector store with {vectorstore._collection.count()} chunks.")
            except Exception as e:
                print(f"Error building vector store: {e}")
                retriever = None
        else:
            print("No documents were successfully processed to build the vector store.")
            retriever = None
else:
    print("Embeddings not available. Skipping vector store setup.")


Attempting to load vector store from: langchain_chroma_db_persistent


  vectorstore = Chroma(


Successfully loaded vector store with 203 documents.


In [6]:
# --- Build RAG QA Chain ---
rag_chain = None
if retriever:
    rag_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True
)

search_utility = SerpAPIWrapper()

In [7]:
# --- Helper to get top similarity score ---
def get_top_similarity_score(query: str, retriever, k=1) -> float:
    try:
        docs_and_scores = retriever.vectorstore.similarity_search_with_score(query, k=k)
        if docs_and_scores:
            _, score = docs_and_scores[0]
            return score
    except Exception as e:
        print(f"Error computing similarity score: {e}")
    return 0.0

In [8]:
def rag_tool_func(query: str, similarity_threshold: float = 0.8) -> str:
    if rag_chain and retriever:
        try:
            # Check similarity score
            top_score = get_top_similarity_score(query, retriever)
            print(f"Top similarity score: {top_score}")
            if top_score > similarity_threshold:
                raise ValueError("Low similarity score — falling back to web search")

            result = rag_chain.invoke({"query": query})
            answer = result.get("result", "").strip()
            sources = result.get("source_documents", [])

            if answer and sources:
                citations = "\n".join(f"- {doc.metadata.get('source')}" for doc in sources)
                return (
                    f"{answer}\n\n"
                    f"Sources:\n"
                    f"{citations}"
                )
            else:
                raise ValueError("RAG returned no answer or sources")

        except Exception as e:
            print(f"RAG fallback reason: {e}")

    try:
        web_results = search_utility.run(query)
    except Exception as e:
        print(f"WebSearch error: {e}")
        web_results = "Sorry, I'm unable to perform a web search right now."

    return (
        "I couldn't find a definitive answer in my local documents, so here's what I found online:\n"
        f"{web_results}"
    )


In [9]:
rag_tool = Tool(
    name="LangChainDocsQA",
    func=rag_tool_func,
    description="Ask questions about LangChain internals, code examples, and integrations. Falls back to web search if not found locally."
)

def safe_web_search(query: str) -> str:
    try:
        return search_utility.run(query)
    except Exception as e:
        print(f"WebSearch error: {e}")
        return "Sorry, I couldn't perform a web search right now."

search_tool = Tool(
    name="WebSearch",
    func=safe_web_search,
    description="General web search via SerpAPI (with built-in error handling)."
)


python_tool = Tool(
    name="PythonInterpreter",
    func=PythonREPLTool().run,
    description="Execute Python code for calculations or data manipulations."
)

tools = [rag_tool, search_tool, python_tool]

In [10]:
# --- Memory & Agent ---
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
system = SystemMessagePromptTemplate.from_template("""
You are an expert assistant that outputs **only** a JSON object in this exact form:

```json
{"action":"<ToolName>","action_input":"<string>"}
action must be one of: LangChainDocsQA, WebSearch, PythonInterpreter, Final Answer
action_input must be exactly the string to pass to that tool (no extra quotes, no nested dicts).
Do not output any other text.
""")

human = HumanMessagePromptTemplate.from_template("{input}")
prompt = ChatPromptTemplate.from_messages([system, human])

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
agent_executor = initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    memory=memory,
    agent_kwargs={"prompt": prompt},
    verbose=True,
    handle_parsing_errors="force",
    max_iterations=7,
    early_stopping_method="generate"
)

  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
  agent_executor = initialize_agent(


In [11]:
import pandas as pd

relevant_qs = [
    "How do I build a custom agent in LangChain?",
    "What's the structure of a RetrievalQA chain?",
    "How do I integrate Ollama LLM with LangChain?",
    "Show me an example of SerpAPIWrapper usage.",
    "How does RecursiveCharacterTextSplitter work?",
    "What are the key parameters for Chroma.from_documents?",
    "Explain the `chain_type='stuff'` in RetrievalQA.",
    "How can I persist a Chroma vector store?",
    "How do I add source citations in a RetrievalQA response?",
    "What's the role of ConversationBufferMemory?",
    "How to call PythonREPLTool from a LangChain agent?",
    "What is the YAML config for a chat prompt template?",
    "How do you chunk documents with overlap?",
    "Explain AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION.",
    "How to handle parsing errors with a LangChain agent?",
    "What's the default k in as_retriever(search_kwargs={'k':…})?",
    "How to migrate from Chains v0 to v1 in LangChain?",
    "What does `OllamaEmbeddings(model=…)` produce?",
    "How to scrape and cache docs before vectorizing?",
    "How do I use Docusaurus-style HTML selectors in scraping?"
]

irrelevant_qs = [
    "Who won the 2022 World Cup?",
    "What's the recipe for tiramisu?",
    "Latest trends in hip-hop music",
    "How to plant a rose garden?",
    "What's the capital of Mongolia?",
    "Define quantum entanglement in simple terms.",
    "Who wrote 'Pride and Prejudice'?",
    "Best smartphone cameras in 2025",
    "What is the Pythagorean theorem?",
    "How to do a backflip on a skateboard?",
    "Explain blockchain in under 100 words.",
    "What is the population of Brazil?",
    "How to brew the perfect espresso?",
    "Symptoms of vitamin D deficiency",
    "What causes auroras in the sky?",
    "How to solve a Rubik's cube",
    "Who painted the Mona Lisa?",
    "What's the history of the Eiffel Tower?",
    "How to knit a scarf for beginners?",
    "Latest box office hits this week"
]

rows = []
for q in relevant_qs:
    score = get_top_similarity_score(q, retriever)
    rows.append({"label": "relevant", "query": q, "score": score})
for q in irrelevant_qs:
    score = get_top_similarity_score(q, retriever)
    rows.append({"label": "irrelevant", "query": q, "score": score})

df = pd.DataFrame(rows)


rel_max = df[df.label=="relevant"].score.max()
rel_min = df[df.label=="relevant"].score.min()
irr_max = df[df.label=="irrelevant"].score.max()
irr_min = df[df.label=="irrelevant"].score.min()

print("Relevant scores range:  ", rel_min, "…", rel_max)
print("Irrelevant scores range:", irr_min, "…", irr_max)

midpoint = (rel_max + irr_min) / 2
print(f"\nSuggested threshold ≈ ({rel_max:.3f} + {irr_min:.3f})/2 = {midpoint:.3f}")
print("You might use a slightly conservative value, e.g.:")
print(f"    SIMILARITY_THRESHOLD = {midpoint:.3f}")

print("\nFull scores:")
print(df.sort_values(by="score", ascending=False).to_string(index=False))


Relevant scores range:   0.4264291524887085 … 0.882327675819397
Irrelevant scores range: 0.8145221471786499 … 1.2147629261016846

Suggested threshold ≈ (0.882 + 0.815)/2 = 0.848
You might use a slightly conservative value, e.g.:
    SIMILARITY_THRESHOLD = 0.848

Full scores:
     label                                                        query    score
irrelevant                                  Who won the 2022 World Cup? 1.214763
irrelevant                             Who wrote 'Pride and Prejudice'? 1.160347
irrelevant                             Symptoms of vitamin D deficiency 1.157709
irrelevant                      What's the history of the Eiffel Tower? 1.118858
irrelevant                            What is the population of Brazil? 1.115735
irrelevant                              What's the capital of Mongolia? 1.112212
irrelevant                                   Who painted the Mona Lisa? 1.096067
irrelevant                             Latest box office hits this week 1.09

In [12]:
# Fallback test
test_query = "Who won the 2022 World Cup?"

print("\n[TEST] Sending query to rag_tool_func (with fallback enabled):")
response = rag_tool_func(test_query)

print("\n--- Response ---")
print(response)


[TEST] Sending query to rag_tool_func (with fallback enabled):
Top similarity score: 1.2147629261016846
RAG fallback reason: Low similarity score — falling back to web search

--- Response ---
I couldn't find a definitive answer in my local documents, so here's what I found online:
Argentina national football team


In [None]:
# ___ Gradio Interface ____
def agent_chat(message , chat_history):
    try:
        bot_message = agent_executor.run(message)
    except Exception as e:
        print(f"Agent error: {e}")
        bot_message = "An internal error occurred."
    return bot_message

iface = gr.ChatInterface(
    fn=agent_chat,
    title="LangChain RAG Chatbot",
    description="Ask LangChain questions with local RAG and web fallback.",
    examples=[ 
        "How do I build a custom agent in LangChain?",
        "What are the key features of llama3?",
        "What's 2 to the power of 20 in Python?",
        "Give me an intro to LangChain.",
        "What is the capital of France?",
        "Currnet US president?",
        "What is the fastest growing religion in the world?",
    ],
)

if __name__ == "__main__":
    iface.launch(share=False, debug=True)

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


  bot_message = agent_executor.run(message)




[1m> Entering new AgentExecutor chain...[0m


Python REPL can execute arbitrary code. Use with caution.


[32;1m[1;3mHere is my response:

```json
{
  "action": "PythonInterpreter",
  "action_input": "sorted([3,2,6,2,8,4,2])"
}
```

This will execute the Python code to sort the list and return the result.[0m
Observation: [38;5;200m[1;3m[0m
Thought:[32;1m[1;3m```json
{
  "action": "Final Answer",
  "action_input": "The sorted list is: [2, 2, 2, 3, 4, 6, 8]"
}[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
  "action": "WebSearch",
  "action_input": "Who is the current US President?"
}
```

(Note: I've used Option 1 to suggest using WebSearch to find the answer)[0m
Observation: [33;1m[1;3mDonald Trump[0m
Thought:[32;1m[1;3m```json
{
  "action": "Final Answer",
  "action_input": "The current US President is Donald Trump."
}
```[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
  "action": "WebSearch",
  "action_input": "Donald Trump children"
}
```

Note: I used the WebSearc

In [None]:
print("\nConversation Memory:")
print(agent_executor.memory.load_memory_variables({}))


Conversation Memory:
{'chat_history': [HumanMessage(content='How do I build a custom agent in LangChain?', additional_kwargs={}, response_metadata={}), AIMessage(content="Unfortunately, it seems that LangChain is not related to customs brokers or agents. The provided information appears to be about customs brokers and their services. If you're looking for information on building a custom agent in LangChain, I couldn't find any relevant information.", additional_kwargs={}, response_metadata={}), HumanMessage(content='How do I build a custom agent in LangChain?', additional_kwargs={}, response_metadata={}), AIMessage(content='To build a custom agent in LangChain, you can follow the steps outlined in this notebook: [insert link to notebook]. This will guide you through creating your own custom agent using OpenAI Tool Calling.', additional_kwargs={}, response_metadata={}), HumanMessage(content='How do I build a custom agent in LangChain?', additional_kwargs={}, response_metadata={}), AIMe