# Trabajando con APIs preentrenadas en Amazon Comprehend

## Configuracion inicial

In [None]:
import boto3
import json
import pandas as pd
import numpy as np
import sagemaker
from sagemaker import get_execution_role

In [None]:
role = get_execution_role()
region = boto3.Session().region_name

## Iniciamos el cliente de comprehend

In [None]:
comprehend = boto3.client(service_name='comprehend', region_name=region)

### Texto de ejemplo

In [None]:
sample_text = '''
Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53 that is due by July 31st. Based on your autopay settings, we will withdraw your payment on the due date from your bank account XXXXXX1111 with the routing number XXXXX0000. 
Your latest statement was mailed to 100 Main Street, Anytown, WA 98121. 
After your payment is received, you will receive a confirmation text message at 206-555-0100. 
If you have questions about your bill, AnyCompany Customer Service is available by phone at 206-555-0199 or email at support@anycompany.com.
'''

## 1- Identificando el nombre de entidades

In [None]:
detected_entities = comprehend.detect_entities(Text=sample_text, LanguageCode='en')
print(json.dumps(detected_entities, sort_keys=True, indent=4))

### En formato legible

In [None]:
detectec_entities_df = pd.DataFrame([ [entity['Text'], entity['Type'], entity['Score']] for entity in detected_entities['Entities']],
                columns=['Text', 'Type', 'Score'])

print('Texto Analizado:')
print(sample_text)
print('Resultado:')
display (detectec_entities_df)

## 2- Deteccion de frases clave

In [None]:
detected_key_phrases = comprehend.detect_key_phrases(Text=sample_text, LanguageCode='en')
print(json.dumps(detected_key_phrases, sort_keys=True, indent=4))

### En formato legible

In [None]:
detected_key_phrases_df = pd.DataFrame([ [entity['Text'], entity['Score']] for entity in detected_key_phrases['KeyPhrases']],
                columns=['Text', 'Score'])

print('Texto Analizado')
print(sample_text)
print('Resultado')
display (detected_key_phrases_df)

## 3- Identificacion del lenguaje dominante

In [None]:
detected_language = comprehend.detect_dominant_language(Text=sample_text)
print(json.dumps(detected_language, sort_keys=True, indent=4))

### En formato legible

In [None]:
detected_language_df = pd.DataFrame([ [code['LanguageCode'], code['Score']] for code in detected_language['Languages']],
                columns=['Language Code', 'Score'])

print('Texto analizado:')
print(sample_text)
print('Resultado:')
display (detected_language_df)

## 4- Determinación del sentimiento emocional

In [None]:
detected_sentiment = comprehend.detect_sentiment(Text=sample_text, LanguageCode='en')
print(json.dumps(detected_sentiment, sort_keys=True, indent=4))

### En formato legible

In [None]:
predominant_sentiment = detected_sentiment['Sentiment']
detected_sentiments_df = pd.DataFrame([ [sentiment, detected_sentiment['SentimentScore'][sentiment]] for sentiment in detected_sentiment['SentimentScore']],
                columns=['Language Code', 'Score'])

print('Texto Analizado:')
print(sample_text)
print()
print('El sentimiento predominanate es {}.'.format(predominant_sentiment))
print()
display (detected_sentiments_df)

## 5- Determinación de la sintaxis

In [None]:
detected_syntax = comprehend.detect_syntax(Text=sample_text, LanguageCode='en')
print(json.dumps(detected_syntax, sort_keys=True, indent=4))

### En formato legible

In [None]:
detected_syntax_df = pd.DataFrame([ [part['Text'], part['PartOfSpeech']['Tag'], part['PartOfSpeech']['Score']] for part in detected_syntax['SyntaxTokens']],
                columns=['Text', 'Part Of Speech', 'Score'])

print('Texto analizado:')
print(sample_text)
print()
print('Primeros 20 tokens:')
display (detected_syntax_df.head(20))

## 6- Detección de información de identificación personal (PII)

In [None]:
detected_pii_entities = comprehend.detect_pii_entities(Text=sample_text, LanguageCode='en')
print(json.dumps(detected_pii_entities, sort_keys=True, indent=4))

### En formato legible

In [None]:
detected_pii_entities_df = pd.DataFrame([ [entity['Type'], entity['Score']] for entity in detected_pii_entities['Entities']],
                columns=['Type', 'Score'])

print('Texto analizado:')
print(sample_text)
print('Resultado')
display (detected_pii_entities_df)

## 7- Label text documents with PII

In [None]:
detected_pii_labels = comprehend.contains_pii_entities(Text=sample_text, LanguageCode='en')
print(json.dumps(detected_pii_labels, sort_keys=True, indent=4))

### En formato legible

In [None]:
detected_pii_labels_df = pd.DataFrame([ [entity['Name'], entity['Score']] for entity in detected_pii_labels['Labels']],
                columns=['Name', 'Score'])

print('Texto analizado:')
print(sample_text)
print('Resultado')
display (detected_pii_labels_df)