# Azure Cognitive Language Services: Text Analytics 
## Entity Recognition
### REST API
* Reference: https://docs.microsoft.com/en-us/learn/modules/recognize-entities-text-analytics-api/4-exercise-structure-submit-request?pivots=python

In [4]:
from pip._vendor import requests
# pprint is used to format the JSON response
from pprint import pprint
import os, yaml

In [19]:
config_file = os.path.join("config","config.yaml")
with open(config_file, 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)

typee = 'text_analytics'
key= config[typee]['key']
endpoint = config[typee]['endpoint'] + "/text/analytics/{}/".format(config[typee]['api-version'])


In [20]:

# variable to store a JSON formatted document that contains two entries in a JSON array.
documents = {"documents": [
    {"id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters for the Altair 8800."},
    {"id": "2", "text": "La sede principal de Microsoft se encuentra en la ciudad de Redmond, a 21 kilómetros de Seattle."}
]}

In [21]:
def get_data_from_text_analytics(endpoint_api):
    ''' Endpoints: entities, sentiment, languages, keyPhrases'''

    # Setup the header information for the REST request passing in the subscription key
    headers = {"Ocp-Apim-Subscription-Key": key}

    # Build the REST request by passing in the complete URL, header information for authentication, and the JSON document
    response = requests.post(endpoint + endpoint_api, headers=headers, json=documents)

    # Create a variable to store the results that are returned from the REST request
    entities = response.json()

    # Output the result using pprint.
    pprint(entities)

In [22]:
get_data_from_text_analytics("entities")

{'documents': [{'entities': [{'bingId': 'a093e9b9-90f5-a3d5-c4b8-5855e1b01f85',
                              'matches': [{'entityTypeScore': 1.0,
                                           'length': 9,
                                           'offset': 0,
                                           'text': 'Microsoft',
                                           'wikipediaScore': 0.5080587142195991}],
                              'name': 'Microsoft',
                              'type': 'Organization',
                              'wikipediaId': 'Microsoft',
                              'wikipediaLanguage': 'en',
                              'wikipediaUrl': 'https://en.wikipedia.org/wiki/Microsoft'},
                             {'bingId': '0d47c987-0042-5576-15e8-97af601614fa',
                              'matches': [{'entityTypeScore': 1.0,
                                           'length': 10,
                                           'offset': 25,
                       

## Key Phrases
### REST API
* Reference: https://docs.microsoft.com/en-us/learn/modules/extract-key-phrases-text-analytics-api/2-exercise-prepare-json-document-use-key-phrase-extraction

In [24]:
get_data_from_text_analytics('keyPhrases')

{'documents': [{'id': '1',
                'keyPhrases': ['Bill Gates',
                               'Paul Allen',
                               'BASIC interpreters',
                               'Altair',
                               'Microsoft']},
               {'id': '2',
                'keyPhrases': ['Microsoft se encuentra en',
                               'Redmond',
                               'ciudad',
                               'sede principal',
                               'kilómetros',
                               'Seattle']}],
 'errors': []}


### SDK
* Reference: https://docs.microsoft.com/en-us/learn/modules/extract-key-phrases-text-analytics-api/3-exercise-structure-submit-request?pivots=python

In [31]:
!pip install --upgrade azure-cognitiveservices-language-textanalytics

Collecting azure-cognitiveservices-language-textanalytics
  Downloading azure_cognitiveservices_language_textanalytics-0.2.0-py2.py3-none-any.whl (43 kB)
Installing collected packages: azure-cognitiveservices-language-textanalytics
Successfully installed azure-cognitiveservices-language-textanalytics-0.2.0


In [32]:
import os
from azure.cognitiveservices.language.textanalytics import TextAnalyticsClient
from msrest.authentication import CognitiveServicesCredentials

In [35]:
def authenticateClient():
    credentials = CognitiveServicesCredentials(key)
    text_analytics_client = TextAnalyticsClient(
        endpoint=endpoint, credentials=credentials)
    return text_analytics_client

In [40]:
def key_phrases(documents):

    client = authenticateClient()

    try:
        for document in documents:
            print(
                "Asking key-phrases on '{}' (id: {})".format(document['text'], document['id']))

        response = client.key_phrases(documents=documents)

        for document in response.documents:
            print("Document Id: ", document.id)
            print("\tKey Phrases:")
            for phrase in document.key_phrases:
                print("\t\t", phrase)

    except Exception as err:
        print("Encountered exception. {}".format(err))

In [38]:
documents = [
            {"id": "1", "language": "ja", "text": "猫は幸せ"},
            {"id": "2", "language": "de",
                "text": "Fahrt nach Stuttgart und dann zum Hotel zu Fu."},
            {"id": "3", "language": "en",
                "text": "My cat might need to see a veterinarian."},
            {"id": "4", "language": "es", "text": "A mi me encanta el fútbol!"}
        ]

In [42]:
key_phrases(documents)

Asking key-phrases on '猫は幸せ' (id: 1)
Asking key-phrases on 'Fahrt nach Stuttgart und dann zum Hotel zu Fu.' (id: 2)
Asking key-phrases on 'My cat might need to see a veterinarian.' (id: 3)
Asking key-phrases on 'A mi me encanta el fútbol!' (id: 4)
Encountered exception. Operation returned an invalid status code 'Resource Not Found'


## Sentiment


In [25]:
# 1 positive, 0 negative, 0.5 neutral
get_data_from_text_analytics('sentiment')

{'documents': [{'id': '1', 'score': 0.5}, {'id': '2', 'score': 0.5}],
 'errors': []}
