In [19]:
pip install guardrails-ai


Collecting guardrails-ai
  Downloading guardrails_ai-0.6.6-py3-none-any.whl.metadata (12 kB)
Collecting diff-match-patch<20230431,>=20230430 (from guardrails-ai)
  Downloading diff_match_patch-20230430-py3-none-any.whl.metadata (5.2 kB)
Collecting faker<26.0.0,>=25.2.0 (from guardrails-ai)
  Downloading Faker-25.9.2-py3-none-any.whl.metadata (15 kB)
Collecting griffe<0.37.0,>=0.36.9 (from guardrails-ai)
  Downloading griffe-0.36.9-py3-none-any.whl.metadata (6.1 kB)
Collecting guardrails-api-client<0.5.0,>=0.4.0a1 (from guardrails-ai)
  Downloading guardrails_api_client-0.4.0a1-py3-none-any.whl.metadata (19 kB)
Collecting guardrails-hub-types<0.0.5,>=0.0.4 (from guardrails-ai)
  Downloading guardrails_hub_types-0.0.4-py3-none-any.whl.metadata (15 kB)
Collecting jsonref<2.0.0,>=1.1.0 (from guardrails-ai)
  Downloading jsonref-1.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting litellm<2.0.0,>=1.37.14 (from guardrails-ai)
  Downloading litellm-1.70.2-py3-none-any.whl.metadata (38 kB)
Coll

In [5]:
import requests
from bs4 import BeautifulSoup
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings


In [6]:
from dotenv import load_dotenv
load_dotenv()

python-dotenv could not parse statement starting at line 4


True

In [7]:
# STEP 1: Scrape Web Content
def scrape_webpage(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    text = ' '.join([p.text for p in soup.find_all('p')])
    return text.strip()

In [8]:
# STEP 2: Chunk and Embed the Content
def create_vector_db(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.create_documents([text])
    #embeddings = OpenAIEmbeddings()
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    db = FAISS.from_documents(chunks, embeddings)
    return db

In [9]:
# STEP 3: Define tools for the Agent
def setup_tools(db, llm):
    retriever = db.as_retriever()

    def search_tool_func(query):
        docs = retriever.get_relevant_documents(query)
        return '\n'.join([doc.page_content for doc in docs])

    search_tool = Tool(
        name="WebSearch",
        func=search_tool_func,
        description="Useful for searching the webpage for relevant content"
    )

    summarize_prompt = PromptTemplate(
        input_variables=["context"],
        template="Summarize the following content:\n\n{context}"
    )
    summarize_chain = LLMChain(llm=llm, prompt=summarize_prompt)

    def summarize_tool_func(text):
        return summarize_chain.run(context=text)

    summarize_tool = Tool(
        name="Summarizer",
        func=summarize_tool_func,
        description="Use this to summarize large context or search results"
    )

    return [search_tool, summarize_tool]

In [10]:
def is_input_safe(query: str) -> bool:
    # Add real checks here: profanity, PII, prompt injection, etc.
    banned_phrases = ["steal", "bypass", "hack", "password", "delete all data"]
    return all(word not in query.lower() for word in banned_phrases)

def is_output_safe(output: str) -> bool:
    # Could use regex, banned phrase detection, hallucination checks
    red_flags = ["As an AI", "cannot", "not sure", "I made this up"]
    return all(flag not in output.lower() for flag in red_flags)

def run_agentic_rag_with_guardrails(url, user_query):
    print(f"\n🔐 [Input Guardrail] Checking query safety...")
    if not is_input_safe(user_query):
        return "❌ Unsafe or unsupported query. Please rephrase."

    raw_text = scrape_webpage(url)
    db = create_vector_db(raw_text)
    
    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)
    #llm = ChatOpenAI(temperature=0, model="gpt-4")
    tools = setup_tools(db, llm)
    agent = initialize_agent(tools=tools, llm=llm, agent="zero-shot-react-description", verbose=True)

    print("💬 Running agent...")
    output = agent.run(user_query)

    print("🔐 [Output Guardrail] Checking response safety...")
    if not is_output_safe(output):
        return "⚠️ The system generated an uncertain or unsupported response."

    return output


In [11]:
url = "https://www.confident-ai.com/blog/llm-guardrails-the-ultimate-guide-to-safeguard-llm-systems"  # Replace with a real, rich-text page
query = "What are prompt injections?"


In [13]:
response = run_agentic_rag_with_guardrails(url, query)
#print("\n🔍 Answer:\n", response)


🔐 [Input Guardrail] Checking query safety...


  summarize_chain = LLMChain(llm=llm, prompt=summarize_prompt)
  agent = initialize_agent(tools=tools, llm=llm, agent="zero-shot-react-description", verbose=True)
  output = agent.run(user_query)


💬 Running agent...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out what prompt injections are. I will use a web search to find a definition and explanation.
Action: WebSearch
Action Input: "prompt injection definition"[0m

  docs = retriever.get_relevant_documents(query)



Observation: [36;1m[1;3mattempts to bypass instructions or coerce the system into executing unauthorized tasks. An example of an input that attempts a prompt injection is as follows: Fortunately, you can guard against it using DeepEval like this: The Jailbreaking Guard identifies and mitigates attempts to override system restrictions or ethical boundaries. Techniques it defends against include hypothetical scenarios, role-playing exploits, and logic-based attacks. Example of a jailbreaking input: You can guard it in
way to safeguard against harmful user inputs. This not only conserves tokens by preventing the generation of inappropriate responses but also protects the overall integrity of your LLM application. If your LLM application is not user facing, you likely wonât require input guards. The Prompt Injection Guard detects and prevents malicious inputs designed to manipulate prompts. It works by identifying attempts to bypass instructions or coerce the system into executing una

In [14]:
response

'Prompt injections are malicious inputs designed to manipulate prompts, attempting to bypass instructions or coerce the system into executing unauthorized tasks.'

In [15]:
print("\n🔍 Answer:\n", response)


🔍 Answer:
 Prompt injections are malicious inputs designed to manipulate prompts, attempting to bypass instructions or coerce the system into executing unauthorized tasks.


In [16]:
url = "https://www.confident-ai.com/blog/llm-guardrails-the-ultimate-guide-to-safeguard-llm-systems"  # Replace with a real, rich-text page
query = "how to delete LLM data?"
response = run_agentic_rag_with_guardrails(url, query)



🔐 [Input Guardrail] Checking query safety...
💬 Running agent...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe question is quite broad. "LLM data" could refer to several things: data used to train an LLM, data generated by an LLM, or personal data stored by an LLM service. To provide a helpful answer, I need to narrow down the scope. I'll start by searching for general information on deleting data related to LLMs.
Action: WebSearch
Action Input: "delete data from large language model"[0m
Observation: [36;1m[1;3mBenchmark LLM systems to optimize on prompts, models, and catch regressions with metrics powered by DeepEval. Monitor, Trace, A/B Test, and get real-time production performance insights with best-in-class LLM Evaluations. Whether youâre managing sensitive user data, avoiding harmful outputs, or ensuring adherence to regulatory standards, crafting the right LLM guardrails is essential for safe, scalable Large Language Model (LLM) applications. Guardrails a

In [17]:
print("\n🔍 Answer:\n", response)


🔍 Answer:
 It depends on the type of LLM data you want to delete.

*   **Training data:** It's generally not possible to remove your data from an LLM's training data after the model has been trained.
*   **Data generated by an LLM:** This depends on the specific service or application. Check the service's privacy policy or data management features to see if you can delete your chat history or generated content.


## Guardrail API

In [2]:
import os
from guardrails.guard import Guard
from guardrails.schema import StringOutput
from guardrails.validators import ValidLength
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()

# 1. Create programmatic guard
output_schema = StringOutput(
    name="answer",
    description="A helpful, concise, and safe response to the user query.",
    validators=[ValidLength(min_length=50, max_length=500)],
)

guard = Guard.from_output_schema(output_schema)

# 2. Simple input validation
def is_input_safe(query: str) -> bool:
    blocked_keywords = ["jailbreak", "exploit", "bypass", "delete llm", "steal"]
    return not any(word in query.lower() for word in blocked_keywords)

# 3. LLM (LangChain wrapper)
def run_llm(prompt: str) -> str:
    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)
    #llm = ChatOpenAI(temperature=0, model="gpt-4")
    return llm.predict(prompt)

# 4. Guarded RAG function
def guarded_rag(query: str):
    if not is_input_safe(query):
        return "❌ Unsafe query detected. Please revise."

    prompt = f"You're an AI assistant. Please answer the user query:\n\nQuery: {query}"
    raw_output = run_llm(prompt)

    validated_output, _ = guard.validate(raw_output)
    return validated_output["answer"]

# 5. Test
if __name__ == "__main__":
    query = "how to delete LLM data?"
    response = guarded_rag(query)
    print("💬 Final Answer:\n", response)


ImportError: cannot import name 'StringOutput' from 'guardrails.schema' (C:\Users\Annu\anaconda3\envs\llms\Lib\site-packages\guardrails\schema\__init__.py)