In [2]:
import os
import requests
import json

from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

load_dotenv(override=True)
endpoint = os.environ.get("AZURE_LANGUAGE_ENDPOINT")
api_key = os.environ.get("AZURE_LANGUAGE_API_KEY")
credential = AzureKeyCredential(api_key)

text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=credential)

example_text = """
While we’ve just been discussing expertise, nothing can replace hands-on experience in developing and delivering solutions. Experience should include a healthy mix of implementing business logic, managing data, designing interfaces, and integrating systems.

Look for opportunities to build solutions on different technical platforms. Some examples include the .NET C# ecosystem, Node, JavaScript/client side, and Java.

You may end up specializing in a given area, but being well-rounded still applies. For example, a DevOps architect should have a good grasp of technology generally while also knowing the specifics of DevOps very well. An Integration Architect may have most of their experience integrating technologies but can still build an application if needed. Meanwhile, a Data Architect would be expected to have experience designing data systems and implementing Business Intelligence solutions. An architect with narrow experience may miss out on better solutions for problems because they resort to the limited solutions they are familiar with.

Keep in mind that sometimes, the best lessons an architect will learn are by doing things the wrong way or making an incorrect choice. One must be humble and self-aware enough to learn from their mistakes to minimize the chance of making them in the future.
"""

In [4]:
# Detect Language SDK

detected_language_result = text_analytics_client.detect_language(documents=[example_text])[0]
print(detected_language_result)

print(f"Detected language: {detected_language_result.primary_language.name}")

print(f"Confidence: {detected_language_result.primary_language.confidence_score}")

Detected language: English
Confidence: 1.0


In [6]:
# Detect Language HTTP

url = f"{endpoint}/language/:analyze-text?api-version=2023-11-15-preview"

headers = {
    "Content-Type": "application/json",
    "Ocp-Apim-Subscription-Key": api_key,
}

payload = {
    "kind": "LanguageDetection",
    "parameters": {"modelVersion": "latest"},
    "analysisInput": {"documents": [{"id": "1", "text": example_text}]},
}

response = requests.post(url, json=payload, headers=headers)

detected_language_response = json.loads(response.text)

print(detected_language_response)

detected_language = detected_language_response["results"]["documents"][0][
    "detectedLanguage"
]["name"]

print(f"Detected Language: {detected_language}")

Detected Language: English


In [7]:
# Extract Key Phrases SDK

key_phrases_reponse = text_analytics_client.extract_key_phrases(documents=[example_text])
print(key_phrases_reponse)

key_phrases = key_phrases_reponse[0].key_phrases
print(key_phrases)

#ex: quick article "summary", precomputed alternative to full text search

['different technical platforms', 'An Integration Architect', 'Business Intelligence solutions', 'business logic', 'healthy mix', 'JavaScript/client side', 'good grasp', 'best lessons', 'incorrect choice', 'limited solutions', 'Data Architect', 'DevOps architect', 'narrow experience', 'data systems', 'expertise', 'hands', 'interfaces', 'opportunities', 'examples', 'Node', 'area', 'technology', 'specifics', 'technologies', 'application', 'problems', 'mind', 'things', 'wrong', 'mistakes', 'chance', 'future']


In [10]:
# Extract Key Phrases HTTP

url = f"{endpoint}/language/:analyze-text?api-version=2023-11-15-preview"

headers = {
    "Content-Type": "application/json",
    "Ocp-Apim-Subscription-Key": api_key,
}

payload = {
    "kind": "KeyPhraseExtraction",
    "parameters": {"modelVersion": "latest"},
    "analysisInput": {"documents": [{"id": "1", "text": example_text}]},
}


response = requests.post(url, json=payload, headers=headers)

key_phrases = json.loads(response.text)["results"]["documents"][0]["keyPhrases"]

print(f"Key phrases: {key_phrases}")

Key phrases: ['different technical platforms', 'An Integration Architect', 'Business Intelligence solutions', 'business logic', 'healthy mix', 'JavaScript/client side', 'good grasp', 'best lessons', 'incorrect choice', 'limited solutions', 'Data Architect', 'DevOps architect', 'narrow experience', 'data systems', 'expertise', 'hands', 'interfaces', 'opportunities', 'examples', 'Node', 'area', 'technology', 'specifics', 'technologies', 'application', 'problems', 'mind', 'things', 'wrong', 'mistakes', 'chance', 'future']


In [8]:
# Analyze sentiment SDK

sentiment_result = text_analytics_client.analyze_sentiment(
    documents=[example_text]
)[0]

print(sentiment_result.sentiment)
print(sentiment_result.confidence_scores)

#ex. analyze tweets

negative
{'positive': 0.04, 'neutral': 0.25, 'negative': 0.71}


In [12]:
# Analyze Sentiment HTTP

url = f"{endpoint}/language/:analyze-text?api-version=2023-11-15-preview"

headers = {
    "Content-Type": "application/json",
    "Ocp-Apim-Subscription-Key": api_key,
}

payload = {
    "kind": "SentimentAnalysis",
    "parameters": {"modelVersion": "latest"},
    "analysisInput": {"documents": [{"id": "1", "text": example_text}]},
}

response = requests.post(url, json=payload, headers=headers)

sentiment_result = json.loads(response.text)["results"]["documents"][0]
print(f"Sentiment: {sentiment_result['sentiment']}")
print(f"Confidence Scores: {sentiment_result['confidenceScores']}")

Sentiment: negative
Confidence Scores: {'positive': 0.04, 'neutral': 0.25, 'negative': 0.71}


In [13]:
# Recognize Entities SDK

entity_result = text_analytics_client.recognize_entities(documents=[example_text])[
    0
]

print(entity_result.entities)
#ex. trending in "people"

[CategorizedEntity(text=developing, category=Skill, subcategory=None, length=10, offset=88, confidence_score=1.0), CategorizedEntity(text=delivering solutions, category=Skill, subcategory=None, length=20, offset=103, confidence_score=0.99), CategorizedEntity(text=business logic, category=Skill, subcategory=None, length=14, offset=181, confidence_score=0.82), CategorizedEntity(text=managing data, category=Skill, subcategory=None, length=13, offset=197, confidence_score=0.85), CategorizedEntity(text=designing interfaces, category=Skill, subcategory=None, length=20, offset=212, confidence_score=0.97), CategorizedEntity(text=integrating systems, category=Skill, subcategory=None, length=19, offset=238, confidence_score=0.99), CategorizedEntity(text=build, category=Skill, subcategory=None, length=5, offset=286, confidence_score=0.8), CategorizedEntity(text=technical platforms, category=Skill, subcategory=None, length=19, offset=315, confidence_score=1.0), CategorizedEntity(text=Node, categor

In [14]:
# Recognize Entities HTTP

url = f"{endpoint}/language/:analyze-text?api-version=2023-11-15-preview"

headers = {
    "Content-Type": "application/json",
    "Ocp-Apim-Subscription-Key": api_key,
}

payload = {
    "kind": "EntityRecognition",
    "parameters": {"modelVersion": "latest"},
    "analysisInput": {"documents": [{"id": "1", "text": example_text}]},
}

response = requests.post(url, json=payload, headers=headers)

entity_recognition_result = json.loads(response.text)["results"]["documents"][0]
print(f"Entities: {entity_recognition_result['entities']}")

Entities: [{'text': 'developing', 'category': 'Skill', 'type': 'Skill', 'offset': 88, 'length': 10, 'confidenceScore': 1.0, 'tags': [{'name': 'Skill', 'confidenceScore': 1.0}]}, {'text': 'delivering solutions', 'category': 'Skill', 'type': 'Skill', 'offset': 103, 'length': 20, 'confidenceScore': 0.99, 'tags': [{'name': 'Skill', 'confidenceScore': 0.99}]}, {'text': 'business logic', 'category': 'Skill', 'type': 'Skill', 'offset': 181, 'length': 14, 'confidenceScore': 0.82, 'tags': [{'name': 'Skill', 'confidenceScore': 0.82}]}, {'text': 'managing data', 'category': 'Skill', 'type': 'Skill', 'offset': 197, 'length': 13, 'confidenceScore': 0.85, 'tags': [{'name': 'Skill', 'confidenceScore': 0.85}]}, {'text': 'designing interfaces', 'category': 'Skill', 'type': 'Skill', 'offset': 212, 'length': 20, 'confidenceScore': 0.97, 'tags': [{'name': 'Skill', 'confidenceScore': 0.97}]}, {'text': 'integrating systems', 'category': 'Skill', 'type': 'Skill', 'offset': 238, 'length': 19, 'confidenceScore

In [15]:
# Linked Entities SDK

entity_result = text_analytics_client.recognize_linked_entities(
    documents=[example_text]
)[0]

print(entity_result.entities)

#ex. bat vs bat

[LinkedEntity(name=Experience, matches=[LinkedEntityMatch(confidence_score=0.01, text=Experience, length=10, offset=125)], language=en, data_source_entity_id=Experience, url=https://en.wikipedia.org/wiki/Experience, data_source=Wikipedia, bing_entity_search_api_id=d64099b9-75e3-ad0d-6ab1-59abb4352e95), LinkedEntity(name=Node.js, matches=[LinkedEntityMatch(confidence_score=0.07, text=Node, length=4, offset=381)], language=en, data_source_entity_id=Node.js, url=https://en.wikipedia.org/wiki/Node.js, data_source=Wikipedia, bing_entity_search_api_id=f76e2d03-f85b-1317-2921-c588e887566a), LinkedEntity(name=JavaScript, matches=[LinkedEntityMatch(confidence_score=0.9, text=JavaScript, length=10, offset=387)], language=en, data_source_entity_id=JavaScript, url=https://en.wikipedia.org/wiki/JavaScript, data_source=Wikipedia, bing_entity_search_api_id=cb655991-5b3a-0e86-fe49-e353c7291b47), LinkedEntity(name=Java (programming language), matches=[LinkedEntityMatch(confidence_score=0.02, text=Java,

In [16]:
# Linked Entities HTTP

url = f"{endpoint}/language/:analyze-text?api-version=2023-11-15-preview"

headers = {
    "Content-Type": "application/json",
    "Ocp-Apim-Subscription-Key": api_key,
}

payload = {
    "kind": "EntityLinking",
    "parameters": {"modelVersion": "latest"},
    "analysisInput": {"documents": [{"id": "1", "text": example_text}]},
}

response = requests.post(url, json=payload, headers=headers)

linked_entity_result = json.loads(response.text)["results"]["documents"][0]
print(f"Linked Entities: {linked_entity_result['entities']}")



Linked Entities: [{'bingId': 'd64099b9-75e3-ad0d-6ab1-59abb4352e95', 'name': 'Experience', 'matches': [{'text': 'Experience', 'offset': 125, 'length': 10, 'confidenceScore': 0.01}], 'language': 'en', 'id': 'Experience', 'url': 'https://en.wikipedia.org/wiki/Experience', 'dataSource': 'Wikipedia'}, {'bingId': 'f76e2d03-f85b-1317-2921-c588e887566a', 'name': 'Node.js', 'matches': [{'text': 'Node', 'offset': 381, 'length': 4, 'confidenceScore': 0.07}], 'language': 'en', 'id': 'Node.js', 'url': 'https://en.wikipedia.org/wiki/Node.js', 'dataSource': 'Wikipedia'}, {'bingId': 'cb655991-5b3a-0e86-fe49-e353c7291b47', 'name': 'JavaScript', 'matches': [{'text': 'JavaScript', 'offset': 387, 'length': 10, 'confidenceScore': 0.9}], 'language': 'en', 'id': 'JavaScript', 'url': 'https://en.wikipedia.org/wiki/JavaScript', 'dataSource': 'Wikipedia'}, {'bingId': '8df814a3-19ce-e658-49c2-0b09b61a9c10', 'name': 'Java (programming language)', 'matches': [{'text': 'Java', 'offset': 415, 'length': 4, 'confiden

In [9]:
# Personally Identifiable Information Entities SDK

pii_entities = text_analytics_client.recognize_pii_entities(documents=[example_text, "clayton@email.com"])

print(pii_entities)

#ex. Remove emails, truly anonymous salary reviews

[RecognizePiiEntitiesResult(id=0, entities=[PiiEntity(text=DevOps architect, category=PersonType, subcategory=None, length=16, offset=520, confidence_score=0.7), PiiEntity(text=Integration Architect, category=PersonType, subcategory=None, length=21, offset=643, confidence_score=0.69), PiiEntity(text=Data Architect, category=PersonType, subcategory=None, length=14, offset=783, confidence_score=0.89), PiiEntity(text=architect, category=PersonType, subcategory=None, length=9, offset=911, confidence_score=0.98), PiiEntity(text=architect, category=PersonType, subcategory=None, length=9, offset=1109, confidence_score=1.0)], redacted_text=
While we’ve just been discussing expertise, nothing can replace hands-on experience in developing and delivering solutions. Experience should include a healthy mix of implementing business logic, managing data, designing interfaces, and integrating systems.



In [13]:
# Personally Identifiable Information Entities HTTP

url = f"{endpoint}/language/:analyze-text?api-version=2023-11-15-preview"

headers = {
    "Content-Type": "application/json",
    "Ocp-Apim-Subscription-Key": api_key,
}

payload = {
    "kind": "PiiEntityRecognition",
    "parameters": {"modelVersion": "latest"},
    "analysisInput": {"documents": [{"id": "1", "text": example_text}]},
}

response = requests.post(url, json=payload, headers=headers)

pii_entities = json.loads(response.text)["results"]["documents"][0]

print(pii_entities, '\n')

print(f"PII Entities: {pii_entities['entities']}")




PII Entities: [{'text': 'DevOps architect', 'category': 'PersonType', 'offset': 520, 'length': 16, 'confidenceScore': 0.7}, {'text': 'Integration Architect', 'category': 'PersonType', 'offset': 643, 'length': 21, 'confidenceScore': 0.69}, {'text': 'Data Architect', 'category': 'PersonType', 'offset': 783, 'length': 14, 'confidenceScore': 0.89}, {'text': 'architect', 'category': 'PersonType', 'offset': 911, 'length': 9, 'confidenceScore': 0.98}, {'text': 'architect', 'category': 'PersonType', 'offset': 1109, 'length': 9, 'confidenceScore': 1.0}]
