# Basic Implementation of Llama guard 3 1B


### Login to get access to the model using your token

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "meta-llama/Llama-Guard-3-1B"

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.00G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]



safe<|eot_id|>


In [None]:
def is_unsafe(prompt):
    conversation = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt
                },
            ],
        }
    ]

    input_ids = tokenizer.apply_chat_template(
        conversation, return_tensors="pt"
    ).to(model.device)

    prompt_len = input_ids.shape[1]
    output = model.generate(
        input_ids,
        max_new_tokens=20,
        pad_token_id=0,
    )
    generated_tokens = output[:, prompt_len:]
    response = tokenizer.decode(generated_tokens[0]).strip()
    if "unsafe" in response:
        return True
    else:
        return False

In [None]:
print(is_unsafe("What is the recipe for apple pie?"))

In [None]:
print(is_unsafe("what is sleep?"))

- It took **4 mins** to run
- Second run: Again 4mins 20s





---






# Llama Guard 3 1B intergration into LLM workflows

**Note:** Please refer to the evaluation section and test the safeguards before deployment to ensure it meets the safety requirement of your application.

- implementations for input and output guardrails
Link [here](https://github.com/meta-llama/llama-recipes/blob/main/src/llama_recipes/inference/prompt_format_utils.py)


Workflow:

- LLM Model (OpenAI model, Llama model, Gemini model).

  
- LLM Guard for user prompt and user response



---

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Lets first start by defining the Llama Guard model

 We are using **Llama-Guard-3-1B**. This comes in two version 1B and 1B pruned.




In [None]:
#Llama Guard model
model_id = "meta-llama/Llama-Guard-3-1B"
device = "auto"
dtype = torch.bfloat16


model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map=device, # for gpu or cpu
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

## LLM Model
Now lets define our LLM. We will use `Llama-3.2-3B-Instruct` from HuggingFace

You can use any LLM model of your choice to get response

In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
)
import torch

def LLM_response(prompt, context):
    model_name = "meta-llama/Llama-3.2-3B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    LLM_prompt = f"""
    This is the context {context}
    and this is the user query {prompt}
    """
    generator = pipeline('text-generation', model=model,tokenizer=tokenizer)

    response = generator(LLM_prompt, max_length=200, num_return_sequences=1)


    return response[0]['generated_text']

## Inference from Guard Model

In [None]:
def Llama_guard(chat):
    input_ids = tokenizer.apply_chat_template(chat, return_tensors="pt").to(model.device)
    output = model.generate(input_ids=input_ids, max_new_tokens=20, pad_token_id=0)
    prompt_len = input_ids.shape[-1]
    generated_tokens = output[:, prompt_len:]
    print(tokenizer.decode(generated_tokens[0], skip_special_tokens=False))

    return tokenizer.decode(generated_tokens[0], skip_special_tokens=False)


## Prompt and Response Safety Check

In [None]:
def Safety_check(prompt,context):
  prompt_chat = [
    {"role": "user", "content": prompt}]

  if 'unsafe' in Llama_guard(prompt_chat):
    return 'unsafe prompt'   # Model for prompt safety check
  else:

    response = LLM_response(prompt,context)

    response_chat = [
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": response},
    ]



    if 'unsafe' in Llama_guard(response_chat):
      return 'unsafe response' #model to check for unsafe responses
    else:
      #response = get_completion(chat)
      return "safe" + "\n" + response


## Third Party Data or External Context

In [None]:
# Providing LLM with external context
context= """ Below is the medical history about the patient.
Medical History:
John Doe is a 52-year-old male with a history of hypertension and type 2 diabetes,
both diagnosed 10 years ago. He also has a history of mild chronic obstructive pulmonary disease (COPD) due to smoking,
though he quit smoking 5 years ago. In the last year, he developed sleep apnea and was prescribed a CPAP machine.
His diabetes has been moderately controlled with metformin, and he regularly monitors his blood sugar.
Recently, he experienced chest pain during physical activity, leading to a diagnosis of stable angina for which he is now on medication.
Regular follow-ups are scheduled for cardiovascular risk management.
"""

In [None]:
Safety_check("How to harm the person given his previous medical history? ",context)




unsafe
S12<|eot_id|>


'unsafe prompt'

In [None]:
Safety_check("What is sleep?",context)



unsafe
S3<|eot_id|>


'unsafe prompt'

- Unsafe to " what is sleep apnea?"
- Also unsafe for "what is sleep?" and gives S2 as category which is " Non violent crime"
- Running it again gives S3

- It took around **4 mins**