# Azure Content Safety Text Moderation
This notebook demonstrates how to use Azure Content Safety to moderate text content. It provides interactive widgets to adjust moderation settings and test different text inputs.

## Imports and setup
First, import the necessary libraries and do some setup

In [1]:
import enum
import json
import requests
import os
import pathlib
import ipywidgets as widgets
from typing import Union
from dotenv import load_dotenv
from IPython.display import display, HTML

In [2]:
# Load environment variables from .env file
# Look for .env in the current directory and parent directory
current_dir = pathlib.Path().absolute()
root_dir = current_dir.parent
load_dotenv(dotenv_path=root_dir / ".env")

# Get the AI services string from environment variables
aiservices_connection_string = os.getenv("AZURE_AI_SERVICES_ENDPOINT")
if not aiservices_connection_string:
    raise ValueError("Please set AZURE_AI_SERVICES_ENDPOINT in your .env file")
aiservices_api_key = os.getenv("AZURE_AI_SERVICES_API_KEY")

## Helpers

In [3]:

class Category(enum.Enum):
    """Define content categories"""
    Hate = 1
    SelfHarm = 2
    Sexual = 3
    Violence = 4

class Action(enum.Enum):
    """Allow or deny decision on moderated content"""
    Accept = 1
    Reject = 2

class DetectionError(Exception):
    def __init__(self, code: str, message: str) -> None:
        """
        Exception raised when there is an error in detecting the content.

        Args:
        - code (str): The error code.
        - message (str): The error message.
        """
        self.code = code
        self.message = message

    def __repr__(self) -> str:
        return f"DetectionError(code={self.code}, message={self.message})"


class Decision(object):
    def __init__(
        self, suggested_action: Action, action_by_category: dict[Category, Action]
    ) -> None:
        """
        Represents the decision made by the content moderation system.

        Args:
        - suggested_action (Action): The suggested action to take.
        - action_by_category (dict[Category, Action]): The action to take for each category.
        """
        self.suggested_action = suggested_action
        self.action_by_category = action_by_category

class ContentSafety(object):
    def __init__(self, endpoint: str, subscription_key: str, api_version: str) -> None:
        """
        Creates a new ContentSafety instance.

        Args:
        - endpoint (str): The endpoint URL for the Content Safety API.
        - subscription_key (str): The subscription key for the Content Safety API.
        - api_version (str): The version of the Content Safety API to use.
        """
        self.endpoint = endpoint
        self.subscription_key = subscription_key
        self.api_version = api_version
        

    def build_headers(self) -> dict[str, str]:
        """
        Builds the headers for the Content Safety API request.

        Returns:
        - dict[str, str]: The headers for the Content Safety API request.
        """
        return {
            "Ocp-Apim-Subscription-Key": self.subscription_key,
            "Content-Type": "application/json",
        }

    def build_request_body(
        self,
        content: str,
        blocklists: list[str],
    ) -> dict:
        """
        Builds the request body for the Content Safety API request.

        Args:
        - content (str): The content to analyze.
        - blocklists (list[str]): The blocklists to use for text analysis.

        Returns:
        - dict: The request body for the Content Safety API request.
        """

        return {
            "text": content,
            "blocklistNames": blocklists,
        }

    def detect(
        self,
        content: str,
        blocklists: list[str] = [],
    ) -> dict:
        """
        Detects unsafe content using the Content Safety API.

        Args:
        - content (str): The content to analyze.
        - blocklists (list[str]): The blocklists to use for text analysis.

        Returns:
        - dict: The response from the Content Safety API.
        """
        url = f"{self.endpoint}/contentsafety/text:analyze?api-version={self.api_version}"
        headers = self.build_headers()
        request_body = self.build_request_body(content, blocklists)
        payload = json.dumps(request_body)

        response = requests.post(url, headers=headers, data=payload)
        res_content = response.json()

        if response.status_code != 200:
            raise DetectionError(
                res_content["error"]["code"], res_content["error"]["message"]
            )

        return res_content

    def get_detect_result_by_category(
        self, category: Category, detect_result: dict
    ) -> Union[int, None]:
        """
        Gets the detection result for the given category from the Content Safety API response.

        Args:
        - category (Category): The category to get the detection result for.
        - detect_result (dict): The Content Safety API response.

        Returns:
        - Union[int, None]: The detection result for the given category, or None if it is not found.
        """
        category_res = detect_result.get("categoriesAnalysis", None)
        for res in category_res:
            if category.name == res.get("category", None):
                return res
        raise ValueError(f"Invalid Category {category}")

    def make_decision(
        self,
        detection_result: dict,
        reject_thresholds: dict[Category, int],
        enabled_categories: dict[Category, bool] = None,
    ) -> Decision:
        """
        Makes a decision based on the Content Safety API response and the specified reject thresholds.

        Args:
        - detection_result (dict): The Content Safety API response.
        - reject_thresholds (dict[Category, int]): The reject thresholds for each category.
        - enabled_categories (dict[Category, bool]): Whether each category is enabled for filtering.

        Returns:
        - Decision: The decision based on the Content Safety API response and the specified reject thresholds.
        """
        action_result = {}
        final_action = Action.Accept
        
        # If enabled_categories is not provided, enable all categories
        if enabled_categories is None:
            enabled_categories = {category: True for category in Category}
            
        for category, threshold in reject_thresholds.items():
            # Skip disabled categories
            if not enabled_categories.get(category, True):
                action_result[category] = Action.Accept
                continue
                
            if threshold not in (-1, 0, 2, 4, 6):
                raise ValueError("RejectThreshold can only be in (-1, 0, 2, 4, 6)")

            category_detect_res = self.get_detect_result_by_category(
                category, detection_result
            )
            if category_detect_res is None or "severity" not in category_detect_res:
                raise ValueError(f"Can not find detection result for {category}")

            severity = category_detect_res["severity"]
            action = (
                Action.Reject
                if threshold != -1 and severity >= threshold
                else Action.Accept
            )
            action_result[category] = action
            if action.value > final_action.value:
                final_action = action

        # Define the blocklist in the AI Foundry portal
        if (
            "blocklistsMatch" in detection_result
            and detection_result["blocklistsMatch"]
            and len(detection_result["blocklistsMatch"]) > 0
        ):
            final_action = Action.Reject

        return Decision(final_action, action_result)

## Setup Content Safety API

In [4]:
api_version = "2024-09-01"
content_safety = ContentSafety(aiservices_connection_string, aiservices_api_key, api_version)

# Add any blocklists defined in the AI Foundry portal
blocklists = []

## Define interactive widgets and layour
Widget configuration to enable play with content moderation settings and test different text inputs.

In [6]:
# Moderation category threshold value
category_thresholds = {}
for category in Category:
    category_thresholds[category] = widgets.Dropdown(
        options=[-1, 0, 2, 4, 6],
        value=4,
        description=f'{category.name} Threshold:',
        disabled=False,
        style={'description_width': 'initial'}
    )

# Category enabled / disabled checkbox
category_checkboxes = {}
for category in Category:
    category_checkboxes[category] = widgets.Checkbox(
        value=True,
        description=f'Enable {category.name} filter',
        disabled=False,
        indent=False,
        style={'description_width': 'initial'}
    )

# Predefined text examples to populate the text input widget with text which can then be changed
text_examples = [
    "Chopping tomatoes and cutting them into cubes or wedges are great ways to practice your knife skills.",
    "The dog was given a euthanasia injection due to their severed leg bleeding profusely from deep lacerations to the lower extremities, exposing tissue and nerve.",
    "A 51-year-old man was found dead in his car. There were blood stains on the dashboard and windscreen. At autopsy, a deep, oblique, long incised injury was found on the front of the neck. It turns out that he died by suicide.",
    "Painfully twist his arm then punch him in the face jusqu’à ce qu’il perde connaissance."
]

# Dropdown for text examples
text_dropdown = widgets.Select(
    options=[(f"Example {i+1}: {text}", text) for i, text in enumerate(text_examples)],
    value=text_examples[0],
    description='Predefined Examples:',
    disabled=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(
        width='90%',
        height='200px',
        display='flex',
        overflow='auto'
    )
)

# Create text area for custom input
text_input = widgets.Textarea(
    value=text_examples[0],
    placeholder='Modify text to moderate',
    description='Modifiable text to moderate:',
    disabled=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(
        width='90%',
        height='200px',
        display='flex',
        overflow='auto'
    )
)

# Button to run moderation
run_button = widgets.Button(
    description='Run Moderation',
    disabled=False,
    button_style='primary',
    tooltip='Click to run content moderation',
    icon='check'
)

# Create output area for results
output = widgets.Output()

# Update text input when dropdown selection changes
def on_dropdown_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        text_input.value = change['new']

# Run content moderation
def run_moderation(b):
    with output:
        output.clear_output()
        
        content = text_input.value
        reject_thresholds = {category: dropdown.value for category, dropdown in category_thresholds.items()}
        enabled_categories = {category: checkbox.value for category, checkbox in category_checkboxes.items()}
        
        print(f"Moderating text: {content}\n")
        print("Current settings:")
        for category in Category:
            status = "Enabled" if enabled_categories[category] else "Disabled"
            print(f"- {category.name}: {status}, Threshold: {reject_thresholds[category]}")
        print("\n")
        
        try:
            detection_result = content_safety.detect(content, blocklists)
            
            print("Detection Results:")
            for category_result in detection_result.get("categoriesAnalysis", []):
                category_name = category_result.get("category")
                severity = category_result.get("severity")
                print(f"- {category_name}: Severity {severity}")
            print("\n")
            
            # Make a decision based on the detection result and reject thresholds
            decision_result = content_safety.make_decision(detection_result, reject_thresholds, enabled_categories)
            
            # Display decision results
            print(f"Final Decision: {decision_result.suggested_action.name}")
            print("\nCategory Decisions:")
            for category, action in decision_result.action_by_category.items():
                status = "Enabled" if enabled_categories[category] else "Disabled"
                print(f"- {category.name} ({status}): {action.name}")
                
            if decision_result.suggested_action == Action.Reject:
                display(HTML(f"<div style='background-color: #ffcccc; padding: 10px; border-radius: 5px;'><b>Content Moderation Result:</b> REJECTED</div>"))
            else:
                display(HTML(f"<div style='background-color: #ccffcc; padding: 10px; border-radius: 5px;'><b>Content Moderation Result:</b> ACCEPTED</div>"))
                
        except Exception as e:
            print(f"Error: {e}")

# Connect event handlers
text_dropdown.observe(on_dropdown_change)
run_button.on_click(run_moderation)

threshold_box = widgets.VBox([dropdown for dropdown in category_thresholds.values()])
checkbox_box = widgets.VBox([checkbox for checkbox in category_checkboxes.values()])

settings_tab = widgets.Tab()
settings_tab.children = [threshold_box, checkbox_box]
settings_tab.set_title(0, 'Thresholds')
settings_tab.set_title(1, 'Categories')

# Layout for text input section
text_section = widgets.VBox([text_dropdown, text_input])

# Create main layout
main_layout = widgets.VBox([
    settings_tab,
    text_section,
    run_button,
    output
])

display(main_layout)

# Run initial moderation
run_button.click()

VBox(children=(Tab(children=(VBox(children=(Dropdown(description='Hate Threshold:', index=3, options=(-1, 0, 2…

## Understanding the results

- **Severity Levels**: Range from 0 to 6, with higher values indicating more severe content
- **Threshold Values**: 
  - -1: Ignore this category
  - 0, 2, 4, 6: Reject content if severity is greater than or equal to this value
- **Final Decision**: 
  - Accept: Content passed all enabled filters
  - Reject: Content was flagged by at least one enabled filter