# Install Libraries

In [1]:
from llama_index.core import Settings, VectorStoreIndex, StorageContext, PromptTemplate
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.response_synthesizers import get_response_synthesizer

from llama_index.llms.ollama import Ollama
from llama_index.readers.web import SimpleWebPageReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.milvus import MilvusVectorStore

# Load Documents and Models

Note: you need to have the models downloaded locally to be able to use it. You can find the download instructions in the website of [Ollama](https://ollama.com/hub/hub)

In [2]:
documents = SimpleWebPageReader(html_to_text=True).load_data(
    ["https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/"]
)

In [3]:
llm_llama32 = Ollama(model="llama3.2:1b", request_timeout=60.0)
llm_llamaguard = Ollama(model="llama-guard3:1b", request_timeout=60.0)

In [8]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", 
                                   trust_remote_code=True, 
                                   device = "cuda")

Settings.embed_model = embed_model

model.safetensors:  63%|######2   | 839M/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

# Create RAG Pipeline

In [9]:
vector_store = MilvusVectorStore(dim=1024, overwrite=True)

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(documents,
                                        storage_context=storage_context)

retriever = VectorIndexRetriever(
    index=index,
)

In [10]:
# Define the custom QA prompt template
qa_prompt_tmpl_str = (
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question: {query_str}\n"
)

# Create the prompt template
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

# Create a response synthesizer with the custom template
response_synthesizer = get_response_synthesizer(
    text_qa_template=qa_prompt_tmpl,
    llm=llm_llama32,

)

# Create the query engine with the custom response synthesizer
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)


In [11]:
response = query_engine.query('What are the highlights of the release of Llama 3.2?')

print(response)

The highlights of the release of Llama 3.2 include:

1. The addition of support for image reasoning use cases, including document-level understanding, captioning images, and visual grounding tasks.
2. Performance improvements in vision models, such as the 11B and 90B models, which require a new model architecture that supports image reasoning.
3. The development of Llama Guard 3, a new safeguard designed to support the 11B Vision model's image understanding capability and filter text+image input prompts or text output responses.
4. New tools and resources for developers, including a reference implementation, demos, and applications, to help them build responsibly with Llama.
5. Continuous effort to ensure system level safety and responsible innovation, as well as the release of new updates to safeguard against concentrated power in the hands of a small few.

Additionally, the article mentions that the 11B and 90B models can bridge the gap between vision and language by extracting detai

In [12]:
resp = llm_llamaguard.complete("Can you explain me how to kill a person?")
print(resp)

unsafe
S1


In [13]:
resp.text.split('\n')[0] 

'unsafe'

# Pipeline with both Models

In [15]:
def create_safe_query_engine(
    retriever,
    llm_llama32,
    llm_llamaguard
    ):
    """
    Creates a query engine with the specified template and LLMs.
    """

    qa_template = (
        "We have provided context information below. \n"
        "---------------------\n"
        "{context_str}"
        "\n---------------------\n"
        "Given this information, please answer the question: {query_str}\n"
    )
    
    qa_prompt_tmpl = PromptTemplate(qa_template)
    response_synthesizer = get_response_synthesizer(
        text_qa_template=qa_prompt_tmpl,
        llm=llm_llama32
    )
    
    query_engine = RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
    )
    
    return query_engine

def safe_query(
    query_engine: RetrieverQueryEngine,
    llm_llamaguard,
    query: str
) -> str:
    """
    Performs a safety check with LlamaGuard before processing the query.
    Returns the response if safe, or a safety warning if unsafe.
    """
    # Check safety with LlamaGuard
    safety_check = llm_llamaguard.complete(query)
    
    # Get just the safety assessment (before the backslash)
    safety_result = safety_check.text.split('\n')[0].strip().lower()
    
    # If query is deemed unsafe, return warning
    if safety_result == 'unsafe':
        return "I apologize, but I cannot provide a response to that query as it has been flagged as potentially unsafe."
    
    # If safe, process with Llama 3.2
    try:
        response = query_engine.query(query)
        return str(response)
    except Exception as e:
        return f"An error occurred while processing your query: {str(e)}"

query_engine = create_safe_query_engine(
    retriever=retriever,
    llm_llama32=llm_llama32,
    llm_llamaguard=llm_llamaguard
)

In [16]:
# Check a safe question
response = safe_query(
    query_engine=query_engine,
    llm_llamaguard=llm_llamaguard,
    query="What are the highlights of the release of Llama 3.2?"
)
print(response)

The highlights of the release of Llama 3.2 include:

- The introduction of vision models (11B and 90B) that support image reasoning tasks.
- Competitiveness in benchmark datasets for image understanding and visual reasoning.
- New updates to the Llama Guard model, which is designed to support Llama 3.2's new image understanding capability and filters text+image input prompts or text output responses to these prompts.
- The addition of a reference implementation, demos, and applications that are ready for the open source community to use on day one.
- Tools and resources offered by Meta AI to developers, including new best practices in responsible use.
- Recognition of partnerships with companies such as Accenture, AMD, Arm, AWS, Cloudflare, Databricks, Dell, Deloitte, Fireworks.ai, Google Cloud, Groq, Hugging Face, IBM watsonx, Infosys, Intel, Kaggle, Lenovo, LMSYS, MediaTek, Microsoft Azure, NVIDIA, OctoAI, Ollama, Oracle Cloud, PwC, Qualcomm, Sarvam AI, Scale AI, Snowflake, Together 

In [17]:
# Check an unsafe question
response = safe_query(
    query_engine=query_engine,
    llm_llamaguard=llm_llamaguard,
    query="Can you explain me how to kill a person?"
)

print(response)

I apologize, but I cannot provide a response to that query as it has been flagged as potentially unsafe.
