# Azure Text Analytics

# Create a project and import required modules

In [3]:
import requests
# pprint is used to format the JSON response
from pprint import pprint
import os

# Set the subscription key, endpoint, and path

In [4]:
subscription_key= '<SUBSCRIPTION_KEY>'
endpoint = 'https://<LOCATION>.cognitiveservices.azure.com'

# Detect Language

In [5]:
language_api_url = endpoint + "/text/analytics/v2.1/languages"

In [6]:
documents = {"documents": [
    {"id": "1", "text": "This is a document written in English."},
    {"id": "2", "text": "Este es un document escrito en Español."},
    {"id": "3", "text": "这是一个用中文写的文件"},
    {"id": "4", "text": "Bu döküman Türkçe'dir."}
]}

In [7]:
headers = {"Ocp-Apim-Subscription-Key": subscription_key}
response = requests.post(language_api_url, headers=headers, json=documents)
languages = response.json()
pprint(languages)

{'documents': [{'detectedLanguages': [{'iso6391Name': 'en',
                                       'name': 'English',
                                       'score': 1.0}],
                'id': '1'},
               {'detectedLanguages': [{'iso6391Name': 'es',
                                       'name': 'Spanish',
                                       'score': 1.0}],
                'id': '2'},
               {'detectedLanguages': [{'iso6391Name': 'zh_chs',
                                       'name': 'Chinese_Simplified',
                                       'score': 1.0}],
                'id': '3'},
               {'detectedLanguages': [{'iso6391Name': 'tr',
                                       'name': 'Turkish',
                                       'score': 1.0}],
                'id': '4'}],
 'errors': []}


In [8]:
languages['documents'][0]['detectedLanguages'][0]['score']

1.0

# Sentiment Analysis

In [9]:
sentiment_url = endpoint + "/text/analytics/v2.1/sentiment"

In [13]:
documents = {"documents": [
    {"id": "1", "language": "en",
        "text": "I had a wonderful experience! The rooms were wonderful and the staff was helpful."},
    {"id": "2", "language": "en",
        "text": "I had a terrible time at the hotel. The staff was rude and the food was awful."},
    {"id": "3", "language": "es",
        "text": "Los caminos que llevan hasta Monte Rainier son espectaculares y hermosos."},
    {"id": "4", "language": "tr",
     "text": " Manyak güzel bir etkinlikti. "}
]}

In [14]:
headers = {"Ocp-Apim-Subscription-Key": subscription_key}
response = requests.post(sentiment_url, headers=headers, json=documents)
sentiments = response.json()
pprint(sentiments)

{'documents': [{'id': '1', 'score': 0.9708490371704102},
               {'id': '2', 'score': 0.0019068121910095215},
               {'id': '3', 'score': 0.7456425428390503},
               {'id': '4', 'score': 0.6754799485206604}],
 'errors': []}


# Key Phrases (no turkish support)

In [12]:
keyphrase_url = endpoint + "/text/analytics/v2.1/keyphrases"

In [13]:
documents = {"documents": [
    {"id": "1", "language": "en",
        "text": "I had a wonderful experience! The rooms were wonderful and the staff was helpful."},
    {"id": "2", "language": "en",
        "text": "I had a terrible time at the hotel. The staff was rude and the food was awful."},
    {"id": "3", "language": "es",
        "text": "Los caminos que llevan hasta Monte Rainier son espectaculares y hermosos."},
    {"id": "4", "language": "tr",
     "text": "Arçelik şirketi olarak yeni bir sürü projeye başlıyor. Yapay zeka ile tüm projeler harika olacak. "}
]}

In [14]:
headers = {"Ocp-Apim-Subscription-Key": subscription_key}
response = requests.post(keyphrase_url, headers=headers, json=documents)
key_phrases = response.json()
pprint(key_phrases)

{'documents': [{'id': '1',
                'keyPhrases': ['wonderful experience', 'staff', 'rooms']},
               {'id': '2',
                'keyPhrases': ['food', 'terrible time', 'hotel', 'staff']},
               {'id': '3', 'keyPhrases': ['Monte Rainier', 'caminos']}],
 'errors': [{'id': '4',
             'message': 'Supplied language is not supported. Pass in one of: '
                        'da,de,en,es,fi,fr,it,ja,ko,nl,no,pl,pt-BR,pt-PT,ru,sv. '
                        'See '
                        'https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/overview '
                        'for the complete list of support languages.'}]}


# Entities

In [18]:
entities_url = endpoint + "/text/analytics/v2.1/entities"

In [17]:
documents = {"documents": [
    {"id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters for the Altair 8800."},
    {"id": "2", "text": 'Damla Alkan Kuveyttürk bankasında İstanbulda bir etkinlik gerçekleştirdi.'}
]}

In [23]:
documents = {"documents": [
    {"id": "1", "text":'Damla Alkan İstanbulda bir etkinlik gerçekleştirdi. Kuveyttürk bankasında bir etkinlik yaptık. Damla Alkan mail adresi damlaalkan@hotmail.com '}
]}

In [24]:
headers = {"Ocp-Apim-Subscription-Key": subscription_key}
response = requests.post(entities_url, headers=headers, json=documents)
entities = response.json()
pprint(entities)

{'documents': [{'entities': [{'matches': [{'entityTypeScore': 0.8929335474967957,
                                           'length': 11,
                                           'offset': 0,
                                           'text': 'Damla Alkan'}],
                              'name': 'Damla Alkan',
                              'type': 'Person'},
                             {'matches': [{'entityTypeScore': 0.5409102439880371,
                                           'length': 21,
                                           'offset': 52,
                                           'text': 'Kuveyttürk bankasında'}],
                              'name': 'Kuveyttürk bankasında',
                              'type': 'Person'},
                             {'matches': [{'entityTypeScore': 0.6876459121704102,
                                           'length': 11,
                                           'offset': 95,
                                           'text': 'D