In [1]:
!pip install --upgrade azure-cognitiveservices-vision-contentmoderator


Collecting azure-cognitiveservices-vision-contentmoderator
  Downloading azure_cognitiveservices_vision_contentmoderator-1.0.0-py2.py3-none-any.whl (40 kB)
Collecting azure-common~=1.1
  Using cached azure_common-1.1.26-py2.py3-none-any.whl (12 kB)
Collecting msrest>=0.5.0
  Using cached msrest-0.6.21-py2.py3-none-any.whl (85 kB)
Collecting requests-oauthlib>=0.5.0
  Using cached requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB)
Collecting isodate>=0.6.0
  Using cached isodate-0.6.0-py2.py3-none-any.whl (45 kB)
Collecting oauthlib>=3.0.0
  Using cached oauthlib-3.1.0-py2.py3-none-any.whl (147 kB)
Installing collected packages: azure-common, oauthlib, requests-oauthlib, isodate, msrest, azure-cognitiveservices-vision-contentmoderator
Successfully installed azure-cognitiveservices-vision-contentmoderator-1.0.0 azure-common-1.1.26 isodate-0.6.0 msrest-0.6.21 oauthlib-3.1.0 requests-oauthlib-1.3.0


# Azure Cognitive Language Services: Content Moderator

* Reference: https://docs.microsoft.com/en-us/azure/cognitive-services/Content-Moderator/client-libraries?pivots=programming-language-python&tabs=visual-studio

In [1]:
import os.path
from pprint import pprint
import time
from io import BytesIO
from random import random
import uuid
import yaml
import os, sys

from azure.cognitiveservices.vision.contentmoderator import ContentModeratorClient
import azure.cognitiveservices.vision.contentmoderator.models
from msrest.authentication import CognitiveServicesCredentials

Load Configs

In [2]:
config_file = os.path.join("config","config.yaml")
with open(config_file, 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)

typee = 'content_moderator'
key= config[typee]['key']
endpoint = config[typee]['endpoint']

In [3]:
files = "text_files"

Authenticate the client


In [4]:
client = ContentModeratorClient(
    endpoint=endpoint,
    credentials=CognitiveServicesCredentials(key)
)

## Moderate Text

In [5]:
# Screen the input text: check for profanity,
# do autocorrect text, and check for personally identifying
# information (PII)
with open(os.path.join(files, 'content_moderator_text_moderation.txt'), "rb") as text_fd:
    screen = client.text_moderation.screen_text(
        text_content_type="text/plain",
        text_content=text_fd,
        language="eng",
        autocorrect=True,
        pii=True
    )
    pprint(screen.as_dict())

{'auto_corrected_text': 'Is this a garbage email abcdef@abcd.com, phone: '
                        '4255550111, IP: 255.255.255.255, 1234 Main Boulevard, '
                        'Pentapolis WA 96555.\r\n'
                        'Crap is the profanity here. Is this information PII? '
                        'phone 2065550111',
 'language': 'eng',
 'normalized_text': '   grabage email abcdef@abcd.com, phone: 4255550111, IP: '
                    '255.255.255.255, 1234 Main Boulevard, Panapolis WA '
                    '96555.\r\n'
                    'Crap   profanity .   information PII? phone 2065550111',
 'original_text': 'Is this a grabage email abcdef@abcd.com, phone: 4255550111, '
                  'IP: 255.255.255.255, 1234 Main Boulevard, Panapolis WA '
                  '96555.\r\n'
                  'Crap is the profanity here. Is this information PII? phone '
                  '2065550111',
 'pii': {'address': [{'index': 81,
                      'text': '1234 Main Boulevar

## Use a custom terms list

In [6]:
#
# Create list
#
print("\nCreating list")
custom_list = client.list_management_term_lists.create(
    content_type="application/json",
    body={
        "name": "Term list name",
        "description": "Term list description",
    }
)
print("List created:")
pprint(custom_list.as_dict())
list_id = custom_list.id


Creating list
List created:
{'description': 'Term list description', 'id': 169, 'name': 'Term list name'}


In [7]:
#
# Update list details
#
print("\nUpdating details for list {}".format(list_id))
updated_list = client.list_management_term_lists.update(
    list_id=list_id,
    content_type="application/json",
    body={
        "name": "New name",
        "description": "New description"
    }
)
pprint(updated_list.as_dict())


Updating details for list 169
{'description': 'New description', 'id': 169, 'name': 'New name'}


In [8]:
#
# Add terms
#
print("\nAdding terms to list {}".format(list_id))
client.list_management_term.add_term(
    list_id=list_id,
    term="term1",
    language="eng"
)
client.list_management_term.add_term(
    list_id=list_id,
    term="term2",
    language="eng"
)


Adding terms to list 169


{'ContentId': '24140',
 'AdditionalInfo': [{'Key': 'Source', 'Value': '169'}],
 'Status': {'Code': 3000, 'Description': 'OK', 'Exception': None},
 'TrackingId': 'f0d2fa05-d60c-471a-9340-4d8966432605'}

In [9]:
#
# Get all terms ids
#
print("\nGetting all term IDs for list {}".format(list_id))
terms = client.list_management_term.get_all_terms(
    list_id=list_id, language="eng")
terms_data = terms.data
pprint(terms_data.as_dict())


Getting all term IDs for list 169
{'language': 'eng',
 'status': {'code': 3000, 'description': 'OK'},
 'terms': [{'term': 'term1'}, {'term': 'term2'}],
 'tracking_id': 'b877248f-baee-4ffa-be10-0ce7ebb551b7'}


In [10]:
#
# Refresh the index
#Whenever you add or remove terms from the list, you must refresh the index before you can use the updated list.
#
LATENCY_DELAY =1

print("\nRefreshing the search index for list {}".format(list_id))
refresh_index = client.list_management_term_lists.refresh_index_method(
    list_id=list_id, language="eng")
pprint(refresh_index.as_dict())

print("\nWaiting {} minutes to allow the server time to propagate the index changes.".format(
    LATENCY_DELAY))
time.sleep(LATENCY_DELAY * 60)


Refreshing the search index for list 169
{'advanced_info': [],
 'content_source_id': '169',
 'is_update_success': True,
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': 'f90262d3-0480-43f9-9761-85d250d5f9b9'}

Waiting 1 minutes to allow the server time to propagate the index changes.


In [12]:
#
# Screen text
#
with open(os.path.join(files, 'content_moderator_term_list.txt'), "rb") as text_fd:
    screen = client.text_moderation.screen_text(
        text_content_type="text/plain",
        text_content=text_fd,
        language="eng",
        autocorrect=False,
        pii=False,
        list_id=list_id
    )
    pprint(screen.as_dict())

{'language': 'eng',
 'normalized_text': ' text contains  terms "term1"  "term2".',
 'original_text': 'This text contains the terms "term1" and "term2".',
 'status': {'code': 3000, 'description': 'OK'},
 'terms': [{'index': 23, 'list_id': 169, 'original_index': 30, 'term': 'term1'},
           {'index': 32,
            'list_id': 169,
            'original_index': 42,
            'term': 'term2'}],
 'tracking_id': 'a2a988ca-395e-47d0-bd66-31e720376df7'}


In [13]:
#
# Remove terms
#
term_to_remove = "term1"
print("\nRemove term {} from list {}".format(term_to_remove, list_id))
client.list_management_term.delete_term(
    list_id=list_id,
    term=term_to_remove,
    language="eng"
)


Remove term term1 from list 169


In [14]:
#
# Delete all terms
#
print("\nDelete all terms in the image list {}".format(list_id))
client.list_management_term.delete_all_terms(
    list_id=list_id, language="eng")


Delete all terms in the image list 169


In [15]:
#
# Delete list
#
print("\nDelete the term list {}".format(list_id))
client.list_management_term_lists.delete(list_id=list_id)


Delete the term list 169


## Moderate images


In [17]:
IMAGE_LIST = [
    "https://moderatorsampleimages.blob.core.windows.net/samples/sample2.jpg",
    "https://moderatorsampleimages.blob.core.windows.net/samples/sample5.png"
]

for image_url in IMAGE_LIST:
    print("\nEvaluate image {}".format(image_url))
    


Evaluate image https://moderatorsampleimages.blob.core.windows.net/samples/sample2.jpg

Evaluate image https://moderatorsampleimages.blob.core.windows.net/samples/sample5.png


Check for adult/racy content


In [18]:
print("\nEvaluate for adult and racy content.")
evaluation = client.image_moderation.evaluate_url_input(
    content_type="application/json",
    cache_image=True,
    data_representation="URL",
    value=image_url
)
pprint(evaluation.as_dict())


Evaluate for adult and racy content.
{'adult_classification_score': 0.0014388329582288861,
 'advanced_info': [{'key': 'ImageDownloadTimeInMs', 'value': '649'},
                   {'key': 'ImageSizeInBytes', 'value': '2278902'}],
 'cache_id': '2a3338ff-f31a-493f-8c5a-12dfbc591549_637510085748285827',
 'is_image_adult_classified': False,
 'is_image_racy_classified': False,
 'racy_classification_score': 0.0046299152309075,
 'result': False,
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': '1754d0ca-73e6-49fe-a042-7575b77e91c7'}


Check for visible text


In [20]:
print("\nDetect and extract text.")
evaluation = client.image_moderation.ocr_url_input(
    language="eng",
    content_type="application/json",
    data_representation="URL",
    value=image_url,
    cache_image=True,
)
pprint(evaluation.as_dict())


Detect and extract text.
{'candidates': [],
 'language': 'eng',
 'metadata': [{'key': 'ImageDownloadTimeInMs', 'value': '605'},
              {'key': 'ImageSizeInBytes', 'value': '2278902'}],
 'status': {'code': 3000, 'description': 'OK'},
 'text': '',
 'tracking_id': '8a5f722c-d636-48ed-bdd1-bff9746c27ac'}


Check for faces


In [22]:
print("\nDetect faces.")
evaluation = client.image_moderation.find_faces_url_input(
    content_type="application/json",
    cache_image=True,
    data_representation="URL",
    value=image_url
)
pprint(evaluation.as_dict())


Detect faces.
{'advanced_info': [{'key': 'ImageDownloadTimeInMs', 'value': '514'},
                   {'key': 'ImageSizeInBytes', 'value': '2278902'}],
 'count': 6,
 'faces': [{'bottom': 633, 'left': 297, 'right': 531, 'top': 399},
           {'bottom': 503, 'left': 1228, 'right': 1453, 'top': 278},
           {'bottom': 595, 'left': 47, 'right': 257, 'top': 385},
           {'bottom': 619, 'left': 966, 'right': 1168, 'top': 417},
           {'bottom': 590, 'left': 589, 'right': 781, 'top': 398},
           {'bottom': 578, 'left': 807, 'right': 978, 'top': 407}],
 'result': True,
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': '59630e88-217d-4c4b-892b-a4f7f8869ef8'}


Use a custom image list


In [24]:
IMAGE_LIST = {
    "Sports": [
        "https://moderatorsampleimages.blob.core.windows.net/samples/sample4.png",
        "https://moderatorsampleimages.blob.core.windows.net/samples/sample6.png",
        "https://moderatorsampleimages.blob.core.windows.net/samples/sample9.png"
    ],
    "Swimsuit": [
        "https://moderatorsampleimages.blob.core.windows.net/samples/sample1.jpg",
        "https://moderatorsampleimages.blob.core.windows.net/samples/sample3.png",
        "https://moderatorsampleimages.blob.core.windows.net/samples/sample4.png",
        "https://moderatorsampleimages.blob.core.windows.net/samples/sample16.png"
    ]
}

IMAGES_TO_MATCH = [
    "https://moderatorsampleimages.blob.core.windows.net/samples/sample1.jpg",
    "https://moderatorsampleimages.blob.core.windows.net/samples/sample4.png",
    "https://moderatorsampleimages.blob.core.windows.net/samples/sample5.png",
    "https://moderatorsampleimages.blob.core.windows.net/samples/sample16.png"
]

In [26]:
#
# Create list
#
print("Creating list MyList\n")
custom_list = client.list_management_image_lists.create(
    content_type="application/json",
    body={
        "name": "MyList",
        "description": "A sample list",
        "metadata": {
            "key_one": "Acceptable",
            "key_two": "Potentially racy"
        }
    }
)
print("List created:")
pprint(custom_list.as_dict())
list_id = custom_list.id

Creating list MyList

List created:
{'description': 'A sample list',
 'id': 941908,
 'metadata': {'key_one': 'Acceptable', 'key_two': 'Potentially racy'},
 'name': 'MyList'}


Add images to a list


In [29]:
#
# Add images
#
def add_images(list_id, image_url, label):
    """Generic add_images from url and label."""
    print("\nAdding image {} to list {} with label {}.".format(
        image_url, list_id, label))
    try:
        added_image = client.list_management_image.add_image_url_input(
            list_id=list_id,
            content_type="application/json",
            data_representation="URL",
            value=image_url,
            label=label
        )
    except APIErrorException as err:
        # sample4 will fail
        print("Unable to add image to list: {}".format(err))
    else:
        pprint(added_image.as_dict())
        return added_image

In [30]:
print("\nAdding images to list {}".format(list_id))
index = {}  # Keep an index url to id for later removal
for label, urls in IMAGE_LIST.items():
    for url in urls:
        image = add_images(list_id, url, label)
        if image:
            index[url] = image.content_id


Adding images to list 941908

Adding image https://moderatorsampleimages.blob.core.windows.net/samples/sample4.png to list 941908 with label Sports.
{'additional_info': [{'key': 'ImageDownloadTimeInMs', 'value': '589'},
                     {'key': 'ImageSizeInBytes', 'value': '2945548'},
                     {'key': 'Source', 'value': '941908'}],
 'content_id': '1167266',
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': '92e52083-37d3-4132-8bd7-26798ba14693'}

Adding image https://moderatorsampleimages.blob.core.windows.net/samples/sample6.png to list 941908 with label Sports.
{'additional_info': [{'key': 'ImageDownloadTimeInMs', 'value': '642'},
                     {'key': 'ImageSizeInBytes', 'value': '2440050'},
                     {'key': 'Source', 'value': '941908'}],
 'content_id': '1167268',
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': 'd3424cf0-727a-4d9f-b098-e08334aa7f97'}

Adding image https://moderatorsampleimages.blob.core.windows.net/sa

NameError: name 'APIErrorException' is not defined

In [32]:
#
# Get all images ids
#
print("\nGetting all image IDs for list {}".format(list_id))
image_ids = client.list_management_image.get_all_image_ids(list_id=list_id)
pprint(image_ids.as_dict())


Getting all image IDs for list 941908
{'content_ids': [1167266, 1167268, 1167269, 1167270, 1167271],
 'content_source': '941908',
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': '2ec197e4-5ec1-4cf2-b60c-0e6e1b744865'}


In [33]:
#
# Update list details
#
print("\nUpdating details for list {}".format(list_id))
updated_list = client.list_management_image_lists.update(
    list_id=list_id,
    content_type="application/json",
    body={
        "name": "Swimsuits and sports"
    }
)
pprint(updated_list.as_dict())


Updating details for list 941908
{'id': 941908, 'name': 'Swimsuits and sports'}


In [35]:
#
# Get list details
#
print("\nGetting details for list {}".format(list_id))
list_details = client.list_management_image_lists.get_details(
    list_id=list_id)
pprint(list_details.as_dict())


Getting details for list 941908
{'id': 941908, 'metadata': {}, 'name': 'Swimsuits and sports'}


In [36]:
#
# Refresh the index
#
print("\nRefreshing the search index for list {}".format(list_id))
refresh_index = client.list_management_image_lists.refresh_index_method(
    list_id=list_id)
pprint(refresh_index.as_dict())

print("\nWaiting {} minutes to allow the server time to propagate the index changes.".format(
    LATENCY_DELAY))
time.sleep(LATENCY_DELAY * 60)


Refreshing the search index for list 941908
{'advanced_info': [],
 'content_source_id': '941908',
 'is_update_success': True,
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': '0873571f-e7cb-4912-beaa-afec14031d9d'}

Waiting 1 minutes to allow the server time to propagate the index changes.


In [38]:
#
# Match images against the image list.
#
for image_url in IMAGES_TO_MATCH:
    print("\nMatching image {} against list {}".format(image_url, list_id))
    match_result = client.image_moderation.match_url_input(
        content_type="application/json",
        list_id=list_id,
        data_representation="URL",
        value=image_url,
    )
    print("Is match? {}".format(match_result.is_match))
    print("Complete match details:")
    pprint(match_result.as_dict())


Matching image https://moderatorsampleimages.blob.core.windows.net/samples/sample1.jpg against list 941908
Is match? True
Complete match details:
{'is_match': True,
 'matches': [{'label': 'Swimsuit',
              'match_id': 1167270,
              'score': 1.0,
              'source': '941908',
              'tags': [0]}],
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': 'a26e8024-7918-4559-81ea-68140bfedcfe'}

Matching image https://moderatorsampleimages.blob.core.windows.net/samples/sample4.png against list 941908
Is match? True
Complete match details:
{'is_match': True,
 'matches': [{'label': 'Sports',
              'match_id': 1167266,
              'score': 1.0,
              'source': '941908',
              'tags': [0]}],
 'status': {'code': 3000, 'description': 'OK'},
 'tracking_id': '9df982db-c64d-423f-9295-8d87930a1365'}

Matching image https://moderatorsampleimages.blob.core.windows.net/samples/sample5.png against list 941908
Is match? False
Complete match d