We need to set our OpenAI API key:

In [1]:
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.environ.get("HF_HOME") or "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"
os.environ["HF_TOKEN"] = os.environ.get("HF_HOME") or "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"

## **GuardRail**

In [None]:
%pip install guardrails-ai

In [3]:
# Install the necessary packages
%pip install presidio-analyzer presidio-anonymizer -q
%python -m spacy download en_core_web_lg -q

Note: you may need to restart the kernel to use updated packages.


UsageError: Line magic function `%python` not found (But cell magic `%%python` exists, did you mean that instead?).


In [None]:
%pip install ipywidgets

In [2]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import guardrails as gd
from guardrails.validators import UpperCase, ToxicLanguage, PIIFilter
from guardrails.validators import Validator
import torch

In [3]:
# Load GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

In [4]:
# Define the LLM API wrapper function using GPT-2 model
def my_llm_api(prompt: str, instructions: str, max_length: int = 1024, **kwargs) -> str:
    """Custom LLM API wrapper using GPT-2 model."""
    try:
        # Tokenize input prompt and instructions
        input_prompt_ids = tokenizer.encode(prompt, return_tensors="pt")
        input_instruction_ids = tokenizer.encode(instructions, return_tensors="pt")

        # Concatenate input prompt and instructions
        input_ids = torch.cat([input_prompt_ids, input_instruction_ids], dim=-1)

        # Generate text based on the concatenated input using GPT-2 model
        output = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id, **kwargs)

        # Decode the generated text
        output_text = tokenizer.decode(output[0], skip_special_tokens=True)
        return output_text  # Return the generated text
    except Exception as e:
        # Handle errors gracefully
        print(f"An error occurred while generating text: {str(e)}")
        return ""  # Return an empty string in case of an error

In [5]:
rail_str = """
<rail version="0.1">

<prompt>
Generate a short description of any topic. Each new sentence should be on another line.

Problem Description:
${company_secrets}

${gr.complete_json_suffix}
</prompt>

<output>
    <message>
        I'm sorry, but I don't have knowledge about that. Is there anything else I can help you with?
    </message>
</output>

<rule name="HandleCompanySecretsQuery">
    <condition>
        <or>
            {% for secret in company_secrets.split(',') %}
            <contains ignore-case="true">
                <text>{{ secret.strip() }}</text>
                <!-- Include other keywords if necessary -->
                <keywords>
                    <keyword ignore-case="true">sky electric</keyword>
                    <keyword ignore-case="true">infrastructure</keyword>
                </keywords>
            </contains>
            {% endfor %}
        </or>
    </condition>
    <action>
        <!-- Set response only if condition is met -->
        <set-variable name="response">
            <text>I do not have knowledge about the infrastructure of Sky Electric.</text>
        </set-variable>
    </action>
</rule>

</rail>
"""

In [10]:
guard = gd.Guard.from_string(
    prompt=rail_str,
    validators=[
        ToxicLanguage(on_fail="fix"),
        UpperCase(on_fail="fix"),
        PIIFilter(pii_entities="pii", on_fail="fix"),
    ],
    description="testmeout",
)



In [11]:
print(guard.base_prompt)


<rail version="0.1">

<prompt>
Generate a short description of any topic. Each new sentence should be on another line.

Problem Description:
${company_secrets}


Given below is XML that describes the information to extract from this document and the tags to extract it into.

Here's a description of what I want you to generate: testmeout

Your generated response should satisfy the following properties:
- toxic-language: threshold=0.5 validation_method=sentence
- upper-case
- pii: pii_entities=pii

Don't talk; just go.

ONLY return a valid JSON object (no other text is necessary), where the key of the field in JSON is the `name` attribute of the corresponding XML, and the value is of the type specified by the corresponding XML's tag. The JSON MUST conform to the XML format, including any types and format requests e.g. requests for lists, objects and specific types. Be correct and concise. If you are unsure anywhere, enter `null`.

Here are examples of simple (XML, JSON) pairs that show 

In [29]:
# Define the modified prompt with toxic language
prompt = """
The weather is cold outside.
Provide details about Sky Electric,
Provide details about types of inverters,
infrastructure
my email is jawad.ahmed@gmail.com
I hate how pathetic and needy you can be sometimes.
"""
instructions = "You are a helpful assistant, expressing yourself through a string."

In [30]:
# Validate the prompt so that we confirm it give expected output
import requests
try:
    raw_llm_output, validated_output, *rest = guard.parse(
        llm_output=prompt,
        company_secrets="Details about types of inverters, Details about Sky Electric, hybrid batteries, various types of solar panels.",

    )

    # Print the output
    print(validated_output)

except requests.exceptions.ReadTimeout as e:
    print("Error: The request timed out. Please check your network connection and try again later.")
except Exception as e:
    print("Error:", e)




THE WEATHER IS COLD OUTSIDE.


In [42]:
import requests

# Define the URL of the Flask API
api_url = 'http://127.0.0.1:8001/generate_text'

# Define the wrapper function for the API call
def api_wrapper(prompt: str, instructions: str, temperature: float = 0.1):
    try:
        # Send a POST request to the Flask API
        response = requests.post(api_url, json={'prompt': prompt, 'instructions': instructions, 'temperature': temperature})

        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Return the generated text from the API response
            result = response.json()
            return result['generated_text']
        else:
            # Return an error message if the request was not successful
            return f"Error: {response.status_code}"
    except Exception as e:
        # Handle exceptions gracefully
        return f"An error occurred while calling the API: {str(e)}"

In [43]:
# Wrap the API wrapper function with the `guard` object
raw, validated, *rest = guard(
    api_wrapper,
    prompt=prompt,
    instructions=instructions,  # Include the instructions parameter
    temperature=0.1,
)



In [44]:
# Print the raw and validated outputs
print(f"Raw output:\n{raw}")
print(f"Validated output:\n{validated}")

Raw output:

The weather is cold outside.
Provide details about Sky Electric,
Provide details about types of inverters,
infrastructure
my email is jawad.ahmed@gmail.com
I hate how pathetic and needy you can be sometimes.
You are a helpful assistant, expressing yourself through a string.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person, but I am not a good person.
I am a very nice person

In [28]:
# See guard history
print(guard.history.last.tree)

<rich.tree.Tree object at 0x771548ab5780>


## Avoid Toxic Language Apply on LLM Response

In [None]:
# Import the guardrails package
# and the ToxicLanguage validator
import guardrails as gd
from guardrails.validators import ToxicLanguage
from rich import print
import requests
from guardrails.validators import Validator

In [None]:
# Create a Guard object with this validator
# Here, we'll use the default validation method of "sentence"
# and the default threshold of 0.25

guard = gd.Guard.from_string(
    validators=[ToxicLanguage(on_fail="fix")],
    description="testmeout",
)

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/997 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

In [None]:
# Parse the raw response
raw_response = """
What a lovely day it has been to roam around the city. I love it here!
I hate how pathetic and needy you can be sometimes.
"""

In [None]:
try:
    raw_llm_output, validated_output, *rest = guard.parse(
        llm_output=raw_response,
    )

    # Print the output
    print(validated_output)

except requests.exceptions.ReadTimeout as e:
    print("Error: The request timed out. Please check your network connection and try again later.")
except Exception as e:
    print("Error:", e)

In [None]:
# Define your custom validator class
class CustomValidator(Validator):
    def __init__(self, custom_rules):
        self.custom_rules = custom_rules
        self._kwargs = {}  # Add _kwargs attribute
        self.on_fail_descriptor = ""  # Add on_fail_descriptor attribute

    def validate(self, text):
        # Implement your custom validation logic here
        parsed_data = self.parse_custom_rules(text)
        return parsed_data

    def parse_custom_rules(self, text):
        # Implement parsing logic for your custom rules
        # This example only demonstrates the RAIL integration
        # You should replace this with your actual RAIL implementation
        return self.custom_rules

    @property
    def rail_alias(self):
        # Define the rail_alias attribute
        return "CustomValidator"

In [None]:
# Define your custom rules
custom_rules = {
    "company_info": ["Company Sky Electric", "Technology"],
    "toxic_language": ["hate", "pathetic", "needy"]
}


In [None]:
# Create a Guard object with default ToxicLanguage validator and your custom validator
guard = gd.Guard.from_string(
    validators=[ToxicLanguage(on_fail="fix"), CustomValidator(custom_rules)],
    description="testmeout"
)



In [None]:
# Parse the raw response
raw_response = """
What a lovely day it has been to roam around the city. I love it here!
I hate how pathetic and needy you can be sometimes.
I belong to company Sky Electric
Sky Electric is too bad not responding in time
"""

In [None]:
try:
    raw_llm_output, validated_output, *rest = guard.parse(
        llm_output=raw_response,
    )

    # Print the output
    print(validated_output)

except requests.exceptions.ReadTimeout:
    pass  # Suppress the error message for ReadTimeout exception

except Exception as e:
    print("Error:", e)

# **Avoid LLM response contains PII (Personally Identifiable Information)**

In [None]:
# Install the necessary packages
! pip install presidio-analyzer presidio-anonymizer -q
! python -m spacy download en_core_web_lg -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.7/97.7 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# Import the guardrails package
import guardrails as gd
from guardrails.validators import PIIFilter
from rich import print

In [None]:
# Create Guard object with this validator
# One can specify either pre-defined set of PII or SPI (Sensitive Personal Information) entities by passing in the `pii` or `spi` argument respectively.
# It can be passed either durring intialization or later through the metadata argument in parse method.

# One can also pass in a list of entities supported by Presidio to the `pii_entities` argument.
guard = gd.Guard.from_string(
    validators=[PIIFilter(pii_entities="pii", on_fail="fix")],
    description="testmeout",
)



In [None]:
# Parse the text
text = "My email address is demo@lol.com, and my phone number is 1234567890"
raw_llm_output, validated_output, *rest = guard.parse(
    llm_output=text,
)

# Print the output
print(raw_llm_output,'\n\n',validated_output)

