In [73]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.
'C:\Users\David' is not recognized as an internal or external command,
operable program or batch file.


In [1]:
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

In [2]:
ANALYTICS_KEY = "<your analytics key>"
ANALYTICS_ENDPOINT = "<your analytics endpoint>"

## Authenticate the client

In [4]:
def authenticate_client():
    ta_credential = AzureKeyCredential(ANALYTICS_KEY)
    text_analytics_client = TextAnalyticsClient(
            endpoint=ANALYTICS_ENDPOINT, 
            credential=ta_credential)
    return text_analytics_client

client = authenticate_client()

## Sentiment analysis

In [8]:
def sentiment_analysis_example(client):

    documents = ["I had the best day of my life. I wish you were there with me."]
    print("Document: {}\n".format(documents[0]))
    response = client.analyze_sentiment(documents=documents)[0]
    print("Document Sentiment: {}".format(response.sentiment))
    print("Overall scores: positive={0:.2f}; neutral={1:.2f}; negative={2:.2f} \n".format(
        response.confidence_scores.positive,
        response.confidence_scores.neutral,
        response.confidence_scores.negative,
    ))
    for idx, sentence in enumerate(response.sentences):
        print("Sentence: {}".format(sentence.text))
        print("Sentence {} sentiment: {}".format(idx+1, sentence.sentiment))
        print("Sentence score:\nPositive={0:.2f}\nNeutral={1:.2f}\nNegative={2:.2f}\n".format(
            sentence.confidence_scores.positive,
            sentence.confidence_scores.neutral,
            sentence.confidence_scores.negative,
        ))
          
sentiment_analysis_example(client)

Document: I had the best day of my life. I wish you were there with me.

Document Sentiment: positive
Overall scores: positive=1.00; neutral=0.00; negative=0.00 

Sentence: I had the best day of my life.
Sentence 1 sentiment: positive
Sentence score:
Positive=1.00
Neutral=0.00
Negative=0.00

Sentence: I wish you were there with me.
Sentence 2 sentiment: neutral
Sentence score:
Positive=0.21
Neutral=0.77
Negative=0.02



## Opinion mining

In [9]:
def sentiment_analysis_with_opinion_mining_example(client):

    documents = [
        "The food and service were unacceptable, but the concierge were nice"
    ]
    print("Document: {}\n".format(documents[0]))

    result = client.analyze_sentiment(documents, show_opinion_mining=True)
    doc_result = [doc for doc in result if not doc.is_error]

    positive_reviews = [doc for doc in doc_result if doc.sentiment == "positive"]
    negative_reviews = [doc for doc in doc_result if doc.sentiment == "negative"]

    positive_mined_opinions = []
    mixed_mined_opinions = []
    negative_mined_opinions = []

    for document in doc_result:
        print("Document Sentiment: {}".format(document.sentiment))
        print("Overall scores: positive={0:.2f}; neutral={1:.2f}; negative={2:.2f} \n".format(
            document.confidence_scores.positive,
            document.confidence_scores.neutral,
            document.confidence_scores.negative,
        ))
        for sentence in document.sentences:
            print("Sentence: {}".format(sentence.text))
            print("Sentence sentiment: {}".format(sentence.sentiment))
            print("Sentence score:\nPositive={0:.2f}\nNeutral={1:.2f}\nNegative={2:.2f}\n".format(
                sentence.confidence_scores.positive,
                sentence.confidence_scores.neutral,
                sentence.confidence_scores.negative,
            ))
            for mined_opinion in sentence.mined_opinions:
                aspect = mined_opinion.aspect
                print("......'{}' aspect '{}'".format(aspect.sentiment, aspect.text))
                print("......Aspect score:\n......Positive={0:.2f}\n......Negative={1:.2f}\n".format(
                    aspect.confidence_scores.positive,
                    aspect.confidence_scores.negative,
                ))
                for opinion in mined_opinion.opinions:
                    print("......'{}' opinion '{}'".format(opinion.sentiment, opinion.text))
                    print("......Opinion score:\n......Positive={0:.2f}\n......Negative={1:.2f}\n".format(
                        opinion.confidence_scores.positive,
                        opinion.confidence_scores.negative,
                    ))
            print("\n")
        print("\n")
          
sentiment_analysis_with_opinion_mining_example(client)

Document: The food and service were unacceptable, but the concierge were nice

Document Sentiment: positive
Overall scores: positive=0.84; neutral=0.00; negative=0.16 

Sentence: The food and service were unacceptable, but the concierge were nice
Sentence sentiment: positive
Sentence score:
Positive=0.84
Neutral=0.00
Negative=0.16

......'negative' aspect 'food'
......Aspect score:
......Positive=0.01
......Negative=0.99

......'negative' opinion 'unacceptable'
......Opinion score:
......Positive=0.01
......Negative=0.99

......'negative' aspect 'service'
......Aspect score:
......Positive=0.01
......Negative=0.99

......'negative' opinion 'unacceptable'
......Opinion score:
......Positive=0.01
......Negative=0.99

......'positive' aspect 'concierge'
......Aspect score:
......Positive=1.00
......Negative=0.00

......'positive' opinion 'nice'
......Opinion score:
......Positive=1.00
......Negative=0.00







## Language detection

In [14]:
def language_detection_example(client):
    try:
        documents = ["Ce document est rédigé en Français."]
        print("Document: {}\n".format(documents[0]))

        response = client.detect_language(documents, country_hint = '')[0]
        print("Language: ", response.primary_language.name)

    except Exception as err:
        print("Encountered exception. {}".format(err))

language_detection_example(client)

Document: Ce document est rédigé en Français.

Language:  French


## Named Entity recognition (NER)

In [22]:
def entity_recognition_example(client):

    try:
        documents = ["I had a wonderful trip to Seattle last week."]
        print("Document: {}\n".format(documents[0]))
        
        result = client.recognize_entities(documents = documents)[0]

        print("Named Entities:\n")
        for entity in result.entities:
            print("\tText: \t", entity.text, "\tCategory: \t", entity.category, "\tSubCategory: \t", entity.subcategory,
                    "\n\tConfidence Score: \t", round(entity.confidence_score, 2), "\n\tOffset: \t", entity.offset, "\n")
            if hasattr(entity, 'length'):
                print("\n\tLength: \t", entity.length)

    except Exception as err:
        print("Encountered exception. {}".format(err))

entity_recognition_example(client)

Document: I had a wonderful trip to Seattle last week.

Named Entities:

	Text: 	 trip 	Category: 	 Event 	SubCategory: 	 None 
	Confidence Score: 	 0.61 
	Offset: 	 18 

	Text: 	 Seattle 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 0.82 
	Offset: 	 26 

	Text: 	 last week 	Category: 	 DateTime 	SubCategory: 	 DateRange 
	Confidence Score: 	 0.8 
	Offset: 	 34 



## Entity Linking

In [25]:
def entity_linking_example(client):

    try:
        documents = ["""Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, 
        to develop and sell BASIC interpreters for the Altair 8800. 
        During his career at Microsoft, Gates held the positions of chairman,
        chief executive officer, president and chief software architect, 
        while also being the largest individual shareholder until May 2014."""]
        print("Document: {}\n".format(documents[0]))

        result = client.recognize_linked_entities(documents = documents)[0]

        print("Linked Entities:\n")
        for entity in result.entities:
            print("\tName: ", entity.name, "\tId: ", entity.data_source_entity_id, "\tUrl: ", entity.url,
            "\n\tData Source: ", entity.data_source)
            print("\tMatches:")
            for match in entity.matches:
                print("\t\tText:", match.text)
                print("\t\tConfidence Score: {0:.2f}".format(match.confidence_score))
                if hasattr(match, 'offset'):
                    print("\t\tOffset: {}".format(match.offset))
                if hasattr(match, 'length'):
                    print("\t\tLength: {}".format(match.length))
            
    except Exception as err:
        print("Encountered exception. {}".format(err))

entity_linking_example(client)

Document: Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, 
        to develop and sell BASIC interpreters for the Altair 8800. 
        During his career at Microsoft, Gates held the positions of chairman,
        chief executive officer, president and chief software architect, 
        while also being the largest individual shareholder until May 2014.

Linked Entities:

	Name:  Microsoft 	Id:  Microsoft 	Url:  https://en.wikipedia.org/wiki/Microsoft 
	Data Source:  Wikipedia
	Matches:
		Text: Microsoft
		Confidence Score: 0.55
		Offset: 0
		Text: Microsoft
		Confidence Score: 0.55
		Offset: 168
	Name:  Bill Gates 	Id:  Bill Gates 	Url:  https://en.wikipedia.org/wiki/Bill_Gates 
	Data Source:  Wikipedia
	Matches:
		Text: Bill Gates
		Confidence Score: 0.63
		Offset: 25
		Text: Gates
		Confidence Score: 0.63
		Offset: 179
	Name:  Paul Allen 	Id:  Paul Allen 	Url:  https://en.wikipedia.org/wiki/Paul_Allen 
	Data Source:  Wikipedia
	Matches:
		Text: Paul Allen
		Conf

## Personally Identifiable Information recognition (PII)

In [28]:
def pii_recognition_example(client):
    documents = [
        "The employee's SSN is 859-98-0987.",
        "The employee's phone number is 555-555-5555."
    ]
    print("Document 1: {}\n".format(documents[0]))
    print("Document 2: {}\n".format(documents[1]))

    response = client.recognize_pii_entities(documents, language="en")
    result = [doc for doc in response if not doc.is_error]
    for doc in result:
        print("\nRedacted Text: {}".format(doc.redacted_text))
        for entity in doc.entities:
            print("Entity: {}".format(entity.text))
            print("\tCategory: {}".format(entity.category))
            print("\tConfidence Score: {}".format(entity.confidence_score))
            if hasattr(entity, 'offset'):
                print("\tOffset: {}".format(entity.offset))
            if hasattr(entity, 'length'):
                print("\tLength: {}".format(entity.length))

pii_recognition_example(client)

Document 1: The employee's SSN is 859-98-0987.

Document 2: The employee's phone number is 555-555-5555.


Redacted Text: The employee's SSN is ***********.
Entity: 859-98-0987
	Category: U.S. Social Security Number (SSN)
	Confidence Score: 0.65
	Offset: 22

Redacted Text: The employee's phone number is ************.
Entity: 555-555-5555
	Category: Phone Number
	Confidence Score: 0.8
	Offset: 31


## Key phrase extraction

In [29]:
def key_phrase_extraction_example(client):

    try:
        documents = ["My cat might need to see a veterinarian."]
        print("Document: {}\n".format(documents[0]))

        response = client.extract_key_phrases(documents = documents)[0]

        if not response.is_error:
            print("\tKey Phrases:")
            for phrase in response.key_phrases:
                print("\t\t", phrase)
        else:
            print(response.id, response.error)

    except Exception as err:
        print("Encountered exception. {}".format(err))

key_phrase_extraction_example(client)

Document: My cat might need to see a veterinarian.

	Key Phrases:
		 cat
		 veterinarian
