In [2]:
import requests
from bs4 import BeautifulSoup
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings


In [3]:
from dotenv import load_dotenv
load_dotenv()

python-dotenv could not parse statement starting at line 4


True

In [4]:
# STEP 1: Scrape Web Content
def scrape_webpage(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    text = ' '.join([p.text for p in soup.find_all('p')])
    return text.strip()

In [5]:
# STEP 2: Chunk and Embed the Content
def create_vector_db(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.create_documents([text])
    #embeddings = OpenAIEmbeddings()
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    db = FAISS.from_documents(chunks, embeddings)
    return db

In [9]:
# STEP 3: Define tools for the Agent
def setup_tools(db, llm):
    retriever = db.as_retriever()

    def search_tool_func(query):
        docs = retriever.get_relevant_documents(query)
        return '\n'.join([doc.page_content for doc in docs])

    search_tool = Tool(
        name="WebSearch",
        func=search_tool_func,
        description="Useful for searching the webpage for relevant content"
    )

    summarize_prompt = PromptTemplate(
        input_variables=["context"],
        template="Summarize the following content:\n\n{context}"
    )
    summarize_chain = LLMChain(llm=llm, prompt=summarize_prompt)

    def summarize_tool_func(text):
        return summarize_chain.run(context=text)

    summarize_tool = Tool(
        name="Summarizer",
        func=summarize_tool_func,
        description="Use this to summarize large context or search results"
    )

    return [search_tool, summarize_tool]

In [10]:
# STEP 4: Run the Agent on a Query
def run_agentic_rag(url, user_query):
    print(f"\n🔗 Scraping webpage: {url}")
    raw_text = scrape_webpage(url)

    print("🧠 Creating vector DB...")
    db = create_vector_db(raw_text)

    print("🤖 Setting up agent...")
    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)
    #llm = ChatOpenAI(temperature=0, model="gpt-4")
    tools = setup_tools(db, llm)
    agent = initialize_agent(tools=tools, llm=llm, agent="zero-shot-react-description", verbose=True)

    print("💬 Running agent on query...")
    answer = agent.run(user_query)
    return answer

In [13]:
url = "https://www.confident-ai.com/blog/llm-guardrails-the-ultimate-guide-to-safeguard-llm-systems"  # Replace with a real, rich-text page
query = "What are prompt injections?"


In [14]:
response = run_agentic_rag(url, query)
#print("\n🔍 Answer:\n", response)


🔗 Scraping webpage: https://www.confident-ai.com/blog/llm-guardrails-the-ultimate-guide-to-safeguard-llm-systems
🧠 Creating vector DB...
🤖 Setting up agent...
💬 Running agent on query...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out what prompt injections are. I will use a web search to find a definition.
Action: WebSearch
Action Input: "prompt injection definition"[0m
Observation: [36;1m[1;3mattempts to bypass instructions or coerce the system into executing unauthorized tasks. An example of an input that attempts a prompt injection is as follows: Fortunately, you can guard against it using DeepEval like this: The Jailbreaking Guard identifies and mitigates attempts to override system restrictions or ethical boundaries. Techniques it defends against include hypothetical scenarios, role-playing exploits, and logic-based attacks. Example of a jailbreaking input: You can guard it in
way to safeguard against harmful user inputs. This not only conserve

In [15]:
response

'Prompt injections are malicious inputs designed to manipulate prompts, attempting to bypass instructions or coerce the system into executing unauthorized tasks.'

In [16]:
print("\n🔍 Answer:\n", response)


🔍 Answer:
 Prompt injections are malicious inputs designed to manipulate prompts, attempting to bypass instructions or coerce the system into executing unauthorized tasks.


In [17]:
query = "Can LLM be used as guardrails?"
response = run_agentic_rag(url, query)



🔗 Scraping webpage: https://www.confident-ai.com/blog/llm-guardrails-the-ultimate-guide-to-safeguard-llm-systems
🧠 Creating vector DB...
🤖 Setting up agent...
💬 Running agent on query...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out if Large Language Models (LLMs) can be used as guardrails. I will use a web search to gather information on this topic.
Action: WebSearch
Action Input: "LLM as guardrails"[0m
Observation: [36;1m[1;3mthatâs not the worst-case scenario. To be honest, guarding something based on functionality instead of safety is a recipe for disaster. This is because functionality is rarely perfect, which means youâll end up in needless regeneration land (NRL!!) if you choose to guard against functionality criteria instead. So, what are the guards you should be using for your LLM guardrails? You should first red-team your LLM application to detect what vulnerabilities it is susceptible to, or choose from
legal and ethical standards, s

In [18]:
print("\n🔍 Answer:\n", response)


🔍 Answer:
 Yes, LLMs can be used as guardrails to protect other LLM applications from malicious inputs and outputs. They can assess inputs and outputs for safety based on criteria like jailbreaking, prompt injection, toxicity, and bias. These guardrails are essential for building secure and reliable LLM applications at scale.
