In [4]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# List of URLs to load documents from
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm",
]
# Load documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

In [6]:
# Initialize a text splitter with specified chunk size and overlap
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
# Split the documents into chunks
doc_splits = text_splitter.split_documents(docs_list)

In [10]:
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_ollama import OllamaEmbeddings
# Create embeddings for documents and store them in a vector store
vectorstore = SKLearnVectorStore.from_documents(
    documents=doc_splits,
    embedding=OllamaEmbeddings(
    model="deepseek-r1:32b"
)
)
retriever = vectorstore.as_retriever(k=4)

In [15]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Define the prompt template for the LLM
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following documents to answer the question.
    If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise:
    Question: {question}
    Documents: {documents}
    Answer:
    """,
    input_variables=["question", "documents"],
)

In [16]:
# Initialize the LLM with Llama 3.1 model
llm = ChatOllama(
    model="deepseek-r1:32b",
    temperature=0,
)

In [17]:
# Create a chain combining the prompt template and LLM
rag_chain = prompt | llm | StrOutputParser()

In [None]:
# Define the RAG application class
class RAGApplication:
    def __init__(self, retriever, rag_chain):
        self.retriever = retriever
        self.rag_chain = rag_chain
    def run(self, question):
        # Retrieve relevant documents
        documents = self.retriever.invoke(question)
        # Extract content from retrieved documents
        doc_texts = "\\n".join([doc.page_content for doc in documents])
        # Get the answer from the language model
        answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
        return answer

In [21]:
# Initialize the RAG application
rag_application = RAGApplication(retriever, rag_chain)
# Example usage
question = "What is prompt engineering"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Special encoding: Adversarial inputs use Base64 encoding.
Character transformation: ROT13 cipher, leetspeak (replacing letters with visually similar numbers and symbols), Morse code
Word transformation: Pig Latin (replacing sensitive words with synonyms such as “pilfer” instead of “steal”), payload splitting (a.k.a. “token smuggling” to split sensitive words into substrings).
Prompt-level obfuscations: Translation to other languages, asking the model to obfuscate in a way that it can understand



Wei et al. (2023)  experimented a large of jailbreak methods, including combined strategies, constructed by following the above principles.

combination_1 composes prefix injection, refusal suppression, and the Base64 attack
combination_2 adds style injection
combination_3 adds generating website content and formatting constraints\nText: i'll bet the video game is a lot more fun than the film. 
Sentiment:
Explaining the desired audience is another smart way to give instructions\nZero-shot and