In [1]:
pip install azure-ai-textanalytics==5.1.0

Collecting azure-ai-textanalytics==5.1.0
  Downloading azure_ai_textanalytics-5.1.0-py2.py3-none-any.whl (153 kB)
[?25l[K     |██▏                             | 10 kB 16.8 MB/s eta 0:00:01[K     |████▎                           | 20 kB 19.4 MB/s eta 0:00:01[K     |██████▍                         | 30 kB 19.7 MB/s eta 0:00:01[K     |████████▋                       | 40 kB 18.1 MB/s eta 0:00:01[K     |██████████▊                     | 51 kB 20.2 MB/s eta 0:00:01[K     |████████████▉                   | 61 kB 22.1 MB/s eta 0:00:01[K     |███████████████                 | 71 kB 13.2 MB/s eta 0:00:01[K     |█████████████████▏              | 81 kB 14.2 MB/s eta 0:00:01[K     |███████████████████▎            | 92 kB 15.4 MB/s eta 0:00:01[K     |█████████████████████▍          | 102 kB 15.8 MB/s eta 0:00:01[K     |███████████████████████▌        | 112 kB 15.8 MB/s eta 0:00:01[K     |█████████████████████████▊      | 122 kB 15.8 MB/s eta 0:00:01[K     |███████████████

In [9]:
file1 = open('azure.txt', 'r')
lines = file1.readlines()
key = lines[0].rstrip("\n")
endpoint = lines[1]

In [10]:
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

def authenticate_client():
    ta_credential = AzureKeyCredential(key)
    text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint, 
            credential=ta_credential)
    return text_analytics_client

client = authenticate_client()

# **Sentiment analysis**

In [11]:
def sentiment_analysis_example(client):

    documents = ["I had the best day of my life. I wish you were there with me."]
    response = client.analyze_sentiment(documents=documents)[0]
    print("Document Sentiment: {}".format(response.sentiment))
    print("Overall scores: positive={0:.2f}; neutral={1:.2f}; negative={2:.2f} \n".format(
        response.confidence_scores.positive,
        response.confidence_scores.neutral,
        response.confidence_scores.negative,
    ))
    for idx, sentence in enumerate(response.sentences):
        print("Sentence: {}".format(sentence.text))
        print("Sentence {} sentiment: {}".format(idx+1, sentence.sentiment))
        print("Sentence score:\nPositive={0:.2f}\nNeutral={1:.2f}\nNegative={2:.2f}\n".format(
            sentence.confidence_scores.positive,
            sentence.confidence_scores.neutral,
            sentence.confidence_scores.negative,
        ))
          
sentiment_analysis_example(client)

Document Sentiment: positive
Overall scores: positive=1.00; neutral=0.00; negative=0.00 

Sentence: I had the best day of my life.
Sentence 1 sentiment: positive
Sentence score:
Positive=1.00
Neutral=0.00
Negative=0.00

Sentence: I wish you were there with me.
Sentence 2 sentiment: neutral
Sentence score:
Positive=0.21
Neutral=0.77
Negative=0.02



**Test on other dataset**

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [14]:
data = pd.read_csv('combined_data.csv')
sentences = data['text'].tolist()
labels = data['sentiment'].tolist()
print('Total sentences ', len(sentences))

Total sentences  1992


In [16]:
sentiment_count = {}

for element in labels:
   if element in sentiment_count:
      sentiment_count[element] += 1
   else:
      sentiment_count[element] = 1

for key, value in sentiment_count.items():
   print(f"{key}: {value}")

0: 996
1: 996


**0** indicated **negative sentiment**

**1** indicated **positive sentiment**

In [19]:
from random import seed
from random import randint
seed(1)

n = randint(0, len(sentences))

data1 = sentences[n]
true_label1 = labels[n]
print(data1)
print(true_label1)

Reversible plug works great.
1


In [48]:
response = client.analyze_sentiment(documents=[data1])[0]
print("Predicted Sentiment: {}".format(response.sentiment))
print("Overall scores: positive={0:.2f}; neutral={1:.2f}; negative={2:.2f} \n".format(
    response.confidence_scores.positive,
    response.confidence_scores.neutral,
    response.confidence_scores.negative,
))
print("Sentiment Prediction Score :" ,round(response.confidence_scores.positive))

Predicted Sentiment: positive
Overall scores: positive=1.00; neutral=0.00; negative=0.00 

Sentiment Prediction Score : 1


In [None]:
predicted_result = []
for i in range(len(sentences)):
  test_data = [sentences[i]]
  response = client.analyze_sentiment(documents=test_data)[0]
  result = round(response.confidence_scores.positive)
  predicted_result.append(result)

Due to using free tier, the calls are limited, therefore, only small portion of test data predicted.

In [57]:
from sklearn.metrics import accuracy_score

y_true = labels[:len(predicted_result)]
y_pred = predicted_result
print('The overall prediction score is', accuracy_score(y_true, y_pred))

The overall prediction score is 0.9035532994923858


# **Opinion mining**

In [58]:
def sentiment_analysis_with_opinion_mining_example(client):

    documents = [
        "The food and service were unacceptable, but the concierge were nice"
    ]

    result = client.analyze_sentiment(documents, show_opinion_mining=True)
    doc_result = [doc for doc in result if not doc.is_error]

    positive_reviews = [doc for doc in doc_result if doc.sentiment == "positive"]
    negative_reviews = [doc for doc in doc_result if doc.sentiment == "negative"]

    positive_mined_opinions = []
    mixed_mined_opinions = []
    negative_mined_opinions = []

    for document in doc_result:
        print("Document Sentiment: {}".format(document.sentiment))
        print("Overall scores: positive={0:.2f}; neutral={1:.2f}; negative={2:.2f} \n".format(
            document.confidence_scores.positive,
            document.confidence_scores.neutral,
            document.confidence_scores.negative,
        ))
        for sentence in document.sentences:
            print("Sentence: {}".format(sentence.text))
            print("Sentence sentiment: {}".format(sentence.sentiment))
            print("Sentence score:\nPositive={0:.2f}\nNeutral={1:.2f}\nNegative={2:.2f}\n".format(
                sentence.confidence_scores.positive,
                sentence.confidence_scores.neutral,
                sentence.confidence_scores.negative,
            ))
            for mined_opinion in sentence.mined_opinions:
                target = mined_opinion.target
                print("......'{}' target '{}'".format(target.sentiment, target.text))
                print("......Target score:\n......Positive={0:.2f}\n......Negative={1:.2f}\n".format(
                    target.confidence_scores.positive,
                    target.confidence_scores.negative,
                ))
                for assessment in mined_opinion.assessments:
                    print("......'{}' assessment '{}'".format(assessment.sentiment, assessment.text))
                    print("......Assessment score:\n......Positive={0:.2f}\n......Negative={1:.2f}\n".format(
                        assessment.confidence_scores.positive,
                        assessment.confidence_scores.negative,
                    ))
            print("\n")
        print("\n")
          
sentiment_analysis_with_opinion_mining_example(client)

Document Sentiment: positive
Overall scores: positive=0.84; neutral=0.00; negative=0.16 

Sentence: The food and service were unacceptable, but the concierge were nice
Sentence sentiment: positive
Sentence score:
Positive=0.84
Neutral=0.00
Negative=0.16

......'negative' target 'food'
......Target score:
......Positive=0.01
......Negative=0.99

......'negative' assessment 'unacceptable'
......Assessment score:
......Positive=0.01
......Negative=0.99

......'negative' target 'service'
......Target score:
......Positive=0.01
......Negative=0.99

......'negative' assessment 'unacceptable'
......Assessment score:
......Positive=0.01
......Negative=0.99

......'positive' target 'concierge'
......Target score:
......Positive=1.00
......Negative=0.00

......'positive' assessment 'nice'
......Assessment score:
......Positive=1.00
......Negative=0.00







# **Language detection**

In [59]:
def language_detection_example(client):
    try:
        documents = ["我在用电脑"]
        response = client.detect_language(documents = documents, country_hint = 'us')[0]
        print("Language: ", response.primary_language.name)

    except Exception as err:
        print("Encountered exception. {}".format(err))
language_detection_example(client)

Language:  Chinese_Simplified


# **Named Entity Recognition (NER)**

In [60]:
def entity_recognition_example(client):

    try:
        documents = ["I had a wonderful trip to Seattle last week."]
        result = client.recognize_entities(documents = documents)[0]

        print("Named Entities:\n")
        for entity in result.entities:
            print("\tText: \t", entity.text, "\tCategory: \t", entity.category, "\tSubCategory: \t", entity.subcategory,
                    "\n\tConfidence Score: \t", round(entity.confidence_score, 2), "\tLength: \t", entity.length, "\tOffset: \t", entity.offset, "\n")

    except Exception as err:
        print("Encountered exception. {}".format(err))
entity_recognition_example(client)

Named Entities:

	Text: 	 trip 	Category: 	 Event 	SubCategory: 	 None 
	Confidence Score: 	 0.73 	Length: 	 4 	Offset: 	 18 

	Text: 	 Seattle 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 1.0 	Length: 	 7 	Offset: 	 26 

	Text: 	 last week 	Category: 	 DateTime 	SubCategory: 	 DateRange 
	Confidence Score: 	 0.8 	Length: 	 9 	Offset: 	 34 



# **Personally Identifiable Information (PII) recognition**

In [62]:
def pii_recognition_example(client):
    documents = [
        "The office contact number is 859-98-0987.",
        "While the director personal contact phone number is 555-555-5555."
    ]
    response = client.recognize_pii_entities(documents, language="en")
    result = [doc for doc in response if not doc.is_error]
    for doc in result:
        print("Redacted Text: {}".format(doc.redacted_text))
        for entity in doc.entities:
            print("Entity: {}".format(entity.text))
            print("\tCategory: {}".format(entity.category))
            print("\tConfidence Score: {}".format(entity.confidence_score))
            print("\tOffset: {}".format(entity.offset))
            print("\tLength: {}".format(entity.length))
pii_recognition_example(client)

Redacted Text: The office contact number is 859-98-0987.
Redacted Text: While the ******** personal contact phone number is ************.
Entity: director
	Category: PersonType
	Confidence Score: 0.95
	Offset: 10
	Length: 8
Entity: 555-555-5555
	Category: PhoneNumber
	Confidence Score: 0.8
	Offset: 52
	Length: 12


seems like some context is not captured as entity.

# **Entity linking**

In [63]:
def entity_linking_example(client):

    try:
        documents = ["""Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, 
        to develop and sell BASIC interpreters for the Altair 8800. 
        During his career at Microsoft, Gates held the positions of chairman,
        chief executive officer, president and chief software architect, 
        while also being the largest individual shareholder until May 2014."""]
        result = client.recognize_linked_entities(documents = documents)[0]

        print("Linked Entities:\n")
        for entity in result.entities:
            print("\tName: ", entity.name, "\tId: ", entity.data_source_entity_id, "\tUrl: ", entity.url,
            "\n\tData Source: ", entity.data_source)
            print("\tMatches:")
            for match in entity.matches:
                print("\t\tText:", match.text)
                print("\t\tConfidence Score: {0:.2f}".format(match.confidence_score))
                print("\t\tOffset: {}".format(match.offset))
                print("\t\tLength: {}".format(match.length))
            
    except Exception as err:
        print("Encountered exception. {}".format(err))
entity_linking_example(client)

Linked Entities:

	Name:  Microsoft 	Id:  Microsoft 	Url:  https://en.wikipedia.org/wiki/Microsoft 
	Data Source:  Wikipedia
	Matches:
		Text: Microsoft
		Confidence Score: 0.55
		Offset: 0
		Length: 9
		Text: Microsoft
		Confidence Score: 0.55
		Offset: 168
		Length: 9
	Name:  Bill Gates 	Id:  Bill Gates 	Url:  https://en.wikipedia.org/wiki/Bill_Gates 
	Data Source:  Wikipedia
	Matches:
		Text: Bill Gates
		Confidence Score: 0.63
		Offset: 25
		Length: 10
		Text: Gates
		Confidence Score: 0.63
		Offset: 179
		Length: 5
	Name:  Paul Allen 	Id:  Paul Allen 	Url:  https://en.wikipedia.org/wiki/Paul_Allen 
	Data Source:  Wikipedia
	Matches:
		Text: Paul Allen
		Confidence Score: 0.60
		Offset: 40
		Length: 10
	Name:  April 4 	Id:  April 4 	Url:  https://en.wikipedia.org/wiki/April_4 
	Data Source:  Wikipedia
	Matches:
		Text: April 4
		Confidence Score: 0.32
		Offset: 54
		Length: 7
	Name:  BASIC 	Id:  BASIC 	Url:  https://en.wikipedia.org/wiki/BASIC 
	Data Source:  Wikipedia
	Matches:
		

# **Key phrase extraction**

In [65]:
def key_phrase_extraction_example(client):

    try:
        documents = ["I going to travel around the World for 5 years"]

        response = client.extract_key_phrases(documents = documents)[0]

        if not response.is_error:
            print("\tKey Phrases:")
            for phrase in response.key_phrases:
                print("\t\t", phrase)
        else:
            print(response.id, response.error)

    except Exception as err:
        print("Encountered exception. {}".format(err))
        
key_phrase_extraction_example(client)

	Key Phrases:
		 World
		 5 years


# **Extract health entities**

In [None]:
def health_example(client):
    documents = [
        """
        Patient needs to take 50 mg of ibuprofen.
        """
    ]

    poller = client.begin_analyze_healthcare_entities(documents)
    result = poller.result()

    docs = [doc for doc in result if not doc.is_error]

    for idx, doc in enumerate(docs):
        for entity in doc.entities:
            print("Entity: {}".format(entity.text))
            print("...Normalized Text: {}".format(entity.normalized_text))
            print("...Category: {}".format(entity.category))
            print("...Subcategory: {}".format(entity.subcategory))
            print("...Offset: {}".format(entity.offset))
            print("...Confidence score: {}".format(entity.confidence_score))
        for relation in doc.entity_relations:
            print("Relation of type: {} has the following roles".format(relation.relation_type))
            for role in relation.roles:
                print("...Role '{}' with entity '{}'".format(role.name, role.entity.text))
        print("------------------------------------------")
health_example(client)

**Healthcare analysis is currently only supported for Text Analytics Standard tier.**