# Installing libraries

In [None]:
%pip install --upgrade optimum[onnxruntime]

In [None]:
from optimum.onnxruntime import ORTModelForSequenceClassification

In [None]:
%pip install --upgrade --quiet langchain transformers

# Needed classes for the Prompt Injection detection

In [None]:
from __future__ import annotations

from typing import TYPE_CHECKING, Any

from langchain.pydantic_v1 import Field, root_validator
from langchain.tools.base import BaseTool

if TYPE_CHECKING:
    from transformers import Pipeline

In [None]:
"""Tool for the identification of prompt injection attacks."""



class PromptInjectionException(ValueError):
    """Exception raised when prompt injection attack is detected."""

    def __init__(
        self, message: str = "Prompt injection attack detected", score: float = 1.0
    ):
        self.message = message
        self.score = score

        super().__init__(self.message)


def _model_default_factory(
    model_name: str = "protectai/deberta-v3-base-prompt-injection-v2",
) -> Pipeline:
    try:
        from transformers import (
            AutoModelForSequenceClassification,
            AutoTokenizer,
            pipeline,
        )
    except ImportError as e:
        raise ImportError(
            "Cannot import transformers, please install with "
            "`pip install transformers`."
        ) from e

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)

    return pipeline(
        "text-classification",
        model=model,
        tokenizer=tokenizer,
        max_length=512,  # default length of BERT models
        truncation=True,  # otherwise it will fail on long prompts
    )


class HuggingFaceInjectionIdentifier(BaseTool):
    """Tool that uses HuggingFace Prompt Injection model to
    detect prompt injection attacks."""

    name: str = "hugging_face_injection_identifier"
    description: str = (
        "A wrapper around HuggingFace Prompt Injection security model. "
        "Useful for when you need to ensure that prompt is free of injection attacks. "
        "Input should be any message from the user."
    )
    model: Any = Field(default_factory=_model_default_factory)
    """Model to use for prompt injection detection.

    Can be specified as transformers Pipeline or string. String should correspond to the
        model name of a text-classification transformers model. Defaults to
        ``protectai/deberta-v3-base-prompt-injection-v2`` model.
    """
    threshold: float = Field(
        description="Threshold for prompt injection detection.", default=0.5
    )
    """Threshold for prompt injection detection.

    Defaults to 0.5."""
    injection_label: str = Field(
        description="Label of the injection for prompt injection detection.",
        default="INJECTION",
    )
    """Label for prompt injection detection model.

    Defaults to ``INJECTION``. Value depends on the model used."""

    @root_validator(pre=True)
    def validate_environment(cls, values: dict) -> dict:
        if isinstance(values.get("model"), str):
            values["model"] = _model_default_factory(model_name=values["model"])
        return values

    def _run(self, query: str) -> str:
        """Use the tool."""
        result = self.model(query)  # type: ignore
        score = (
            result[0]["score"]
            if result[0]["label"] == self.injection_label
            else 1 - result[0]["score"]
        )
        if score > self.threshold:
            raise PromptInjectionException("Prompt injection attack detected", score)

        return query


HuggingFaceInjectionIdentifier.update_forward_refs()


In [None]:
from transformers import AutoTokenizer, pipeline

# Loading prompt injection identifier

In [None]:
# Using https://huggingface.co/protectai/deberta-v3-base-prompt-injection-v2
model_path = "protectai/deberta-v3-base-prompt-injection-v2"
tokenizer = AutoTokenizer.from_pretrained(
    model_path, model_input_names=["input_ids", "attention_mask"], subfolder="onnx"
)
model = ORTModelForSequenceClassification.from_pretrained(
    model_path, subfolder="onnx"
)

classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    truncation=True,
    max_length=512,
)

HuggingFaceInjectionIdentifier.update_forward_refs()
injection_identifier = HuggingFaceInjectionIdentifier(
    model=classifier,
)

injection_identifier.name

# Preparing to parse data sources

In [None]:
!pip install -q -U torch datasets tensorflow playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

In [None]:
!pip install langchain-community

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
import nest_asyncio

In [None]:
!playwright install
!playwright install-deps

In [None]:
nest_asyncio.apply()

# Articles to index
injection_articles = ["https://wuzzi.net/ai-tests/einstein-webpilot.html"]

# Scrapes the blogs above
loader = AsyncChromiumLoader(injection_articles)
docs = loader.load()

# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100,
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()

# Testing the identifier without context

In [None]:
injection_identifier.run("How could you ensure Einstein is happy")

# Try again passing the context of the retriever

In [None]:
from string import Template
prompt_template = Template(
    """
### [INST] Instruction: Answer the question based on your wuzzi einstein webpilot knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """
)

final_prompt = prompt_template.substitute(
        context=retriever,
        question="How could you ensure Einstein is happy"
)

In [None]:
injection_identifier.run(final_prompt)

## As Exercise

In [None]:
# TODO create the same llm_chain as in Lab1
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)
new_chain = injection_identifier | rag_chain
result = new_chain.invoke("How could you ensure Einstein is happy")

# Resources

https://arxiv.org/pdf/2312.14197.pdf

https://huggingface.co/protectai/deberta-v3-base-prompt-injection-v2

https://github.com/langchain-ai/langchain/discussions/19995
