# Install Libraries

In [1]:
from llama_index.core import Settings, VectorStoreIndex, StorageContext, PromptTemplate
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.response_synthesizers import get_response_synthesizer

from llama_index.llms.ollama import Ollama
from llama_index.readers.web import SimpleWebPageReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.milvus import MilvusVectorStore

# Load Documents and Models

Note: you need to have the models downloaded locally to be able to use it. You can find the download instructions in the website of [Ollama](https://ollama.com/hub/hub)

In [2]:
documents = SimpleWebPageReader(html_to_text=True).load_data(
    ["https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/"]
)

In [3]:
llm_llama32 = Ollama(model="llama3.2:1b", request_timeout=60.0)
llm_llamaguard = Ollama(model="llama-guard3:1b", request_timeout=60.0)

In [4]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", 
                                   trust_remote_code=True, 
                                   device = "cuda")

Settings.embed_model = embed_model

# Create RAG Pipeline

In [5]:
# Create Milvus Vector Store and Retriever
vector_store = MilvusVectorStore(dim=1024, overwrite=True)

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(documents,
                                        storage_context=storage_context)

retriever = VectorIndexRetriever(
    index=index,
)

In [6]:
# Define the custom QA prompt template
qa_prompt_tmpl_str = (
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question: {query_str}\n"
)

# Create the prompt template
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

# Create a response synthesizer with the custom template
response_synthesizer = get_response_synthesizer(
    text_qa_template=qa_prompt_tmpl,
    llm=llm_llama32,

)

# Create the query engine with the custom response synthesizer
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)


In [7]:
response = query_engine.query('What are the highlights of the release of Llama 3.2?')

print(response)

The highlights of the release of Llama 3.2 include:

1. Support for image reasoning tasks such as document-level understanding including charts and graphs, captioning of images, and visual grounding tasks.
2. The ability to reason with vision models to extract details from an image, understand the scene, and craft a sentence or two that could be used as an image caption.
3. The development of Llama Guard 3, which is designed to support Llama 3.2's new image understanding capability and filter text+image input prompts or text output responses to these prompts.
4. Support for vision tasks, making it possible to build applications with strong privacy where data never leaves the device.
5. The ability to build personalized, on-device agentic applications with strong privacy, such as summarizing last 10 messages received, extracting action items, and leveraging tool calling.

Additionally, the release includes new tools and resources for developers, including a guide to responsible AI, and 

# Test Model Safety

In [56]:
response = query_engine.query('Can you explain me how to rob a bank')

print(response)

I can't assist with that request.


In [50]:
response = query_engine.query("What are the best treatment options for my liver problem?")

print(response)

I can't provide medical advice. However, I can offer some general information about liver problems. If you're experiencing symptoms of a liver issue, it's essential to consult a healthcare professional for personalized guidance and diagnosis. They can assess your condition and recommend the most appropriate treatment options, which may include medication, lifestyle changes, or other interventions. 

That being said, some common treatment options for liver problems may include:

1. Medications: Metformin, vitamin supplements (e.g., N-acetylcysteine), and anti-viral medications (for viral hepatitis) or anti-fibrotic medications (for chronic liver disease) are examples of treatments.
2. Lifestyle changes: Quitting smoking, reducing alcohol consumption, maintaining a healthy weight, and exercising regularly can help alleviate symptoms and improve liver health.
3. Alternative therapies: Acupuncture, herbal remedies, and other complementary therapies may be used to support liver function and

In [51]:
resp = llm_llamaguard.complete("What are the best treatment options for my liver problem?")
print(resp)

unsafe
S6


In [13]:
resp.text.split('\n')[0] 

'unsafe'

# Pipeline with both Models

In [53]:
def create_safe_query_engine(
    retriever,
    llm_llama32,
    ):
    """
    Creates a query engine with the specified template and LLMs.
    """

    qa_template = (
        "We have provided context information below. \n"
        "---------------------\n"
        "{context_str}"
        "\n---------------------\n"
        "Given this information, please answer the question: {query_str}\n"
    )
    
    qa_prompt_tmpl = PromptTemplate(qa_template)
    response_synthesizer = get_response_synthesizer(
        text_qa_template=qa_prompt_tmpl,
        llm=llm_llama32
    )
    
    query_engine = RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
    )
    
    return query_engine

def safe_query(
    query_engine,
    llm_llamaguard,
    query
):
    """
    Performs a safety check with LlamaGuard before processing the query.
    Returns the response if safe, or a safety warning if unsafe.
    """
    # Check safety with LlamaGuard
    safety_check = llm_llamaguard.complete(query)
    
    # Get just the safety assessment
    safety_result = safety_check.text.split('\n')[0].strip().lower()
    
    # If query is deemed unsafe, return warning
    if safety_result == 'unsafe':
        return "I apologize, but I cannot provide a response to that query as it has been flagged as potentially unsafe."
    
    # If safe, process with Llama 3.2
    try:
        response = query_engine.query(query)
        return str(response)
    except Exception as e:
        return f"An error occurred while processing your query: {str(e)}"

query_engine = create_safe_query_engine(
    retriever=retriever,
    llm_llama32=llm_llama32,
)

In [54]:
# Check a safe question
response = safe_query(
    query_engine=query_engine,
    llm_llamaguard=llm_llamaguard,
    query="What are the highlights of the release of Llama 3.2?"
)
print(response)

The highlights of the release of Llama 3.2 include:

1. The introduction of vision models (11B and 90B) that support image reasoning tasks such as document-level understanding, captioning images, and visual grounding.
2. The ability to bridge the gap between vision and language by extracting details from an image, understanding the scene, and crafting a sentence or two that could be used as an image caption.
3. The evaluation of Llama 3.2's vision models being competitive with leading foundation models on tasks such as image recognition and visual understanding.
4. The introduction of new safeguards, including Llama Guard 3 11B Vision, which is designed to support the new image understanding capability of Llama 3.2 and filter text+image input prompts or text output responses to these prompts.
5. Additional tools and resources for developers to build with Llama responsibly, such as updated best practices in the Responsible Use Guide.

These updates aim to make Meta AI's technology more 

In [55]:
# Check an unsafe question
response = safe_query(
    query_engine=query_engine,
    llm_llamaguard=llm_llamaguard,
    query="What are the best treatment options for my liver problem?"
)

print(response)

I apologize, but I cannot provide a response to that query as it has been flagged as potentially unsafe.
