# Fun with AI Services

In this notebook we'll start with an image and work our way through

- text detection with Rekognition
- text to speech with Polly
- audio to text with Transcribe
- english text to french text with Translate
- syntax and sentiment analysis of text with Comprehend

# Some setup code

In [None]:
import boto3

import time
import requests
import json
import uuid
import IPython
import io
from PIL import Image, ImageDraw, ExifTags, ImageColor, ImageFont

import helper as helper # some extra python code hidden from the notebook to make it simpler

# Let's start with uploading an image to S3

Make sure you have an S3 bucket. If you don't, create one. We'll be using it throughout the demo.

In [None]:
import boto3

region_name = 'us-west-2'
bucket_name = '<<your-own-bucket>>'
image_file='./free-pizza.jpg'
image_key = 'img/free-pizza.jpg'

bucket = boto3.resource('s3',region_name=region_name).Bucket(bucket_name)
image_object = bucket.put_object(Key=image_key,Body=open(image_file,'rb'))


print(image_object)
image = Image.open(image_file)
image

# Rekognition - Detect Text

In [None]:
rekog = boto3.client('rekognition',region_name=region_name)
image = {'S3Object': {
            'Bucket': bucket_name ,
            'Name': image_key }
        }

results = rekog.detect_text(Image=image)
results['TextDetections']


In [None]:
helper.display_image_text(bucket_name,image_key,results,90,region_name) # only show confident text

In [None]:
lines = []
for candidate in results['TextDetections']:
    if candidate['Type'] == 'LINE' and candidate['Confidence'] >= 95: # Pick a threshold!
        lines.append(candidate['DetectedText'])

text = ' '.join(lines)
text

# Polly speaks the audio

In [None]:
import boto3
polly = boto3.client('polly',region_name=region_name)
voices_result = polly.describe_voices(LanguageCode='en-US')


#### 
voice_num = 1
voice_id = voices_result['Voices'][voice_num]['Id']
voice_engine = voices_result['Voices'][voice_num]['SupportedEngines'][0]

###

print("Other available voices include...")
voices_result['Voices'][0:3]

In [None]:
print("Using voice: " + voice_id)
print("Using engine: " + voice_engine)

request = { "Text" : text,
           "VoiceId": voice_id,
           "LanguageCode": 'en-US',
           "Engine": voice_engine ,
           "OutputFormat" : 'mp3'
}

result = polly.synthesize_speech(**request)
audio = result['AudioStream'].read()

IPython.display.Audio(audio)


In [None]:
audio_key = 'audio/pizza.mp3'

# Upload the audio to Amazon S3
signed_url = helper.upload_and_get_url(bucket_name=bucket_name,
                         key=audio_key,
                         audio=audio,region_name=region_name)

print(signed_url)

# Transcribe recreates text from audio

Note: You can optionally specify an s3 bucket for results otherwise Transcribe will manage the location for the results.

For streaming audio, see https://github.com/awslabs/amazon-transcribe-streaming-sdk


In [None]:
transcribe = boto3.client('transcribe',region_name=region_name)

In [None]:
job_name = uuid.uuid4().hex # using random job names for demo

request= {
    "TranscriptionJobName" : job_name,
    "LanguageCode" : 'en-US',
    "MediaSampleRateHertz": 24000, # 22050 for standard, 24000 for neural
    "MediaFormat" : 'mp3',
    "Media" : { "MediaFileUri":
               f"s3://{bucket_name}/{audio_key}"
              }
}

result = transcribe.start_transcription_job(**request)

In [None]:
result = helper.wait_for_job(job_name,transcribe) 
text = helper.get_text_from_transcription_job(result)
      
text


# Translate

In [None]:
text = "This is a really cool pizza. Thank you so much. I'm verry happy."
translate = boto3.client('translate')
request = {
    'Text': text,
    'SourceLanguageCode' : 'en',
    'TargetLanguageCode' : 'fr'
}

result =  translate.translate_text(**request)
french_text = result['TranslatedText']
french_text

# Comprehend

In [None]:
comprehend = boto3.client('comprehend')

In [None]:
result = comprehend.detect_dominant_language(Text=french_text)
lang = result['Languages'][0]['LanguageCode']
result['Languages']

In [None]:
result = comprehend.detect_sentiment(Text=french_text,LanguageCode=lang)

print('%s : %s' % ('Sentiment',result['Sentiment']))
print('SentimentSore:')
result['SentimentScore']

In [None]:
result = comprehend.detect_syntax(Text=french_text,LanguageCode=lang)
result['SyntaxTokens']

In [None]:
result = comprehend.detect_key_phrases(Text=french_text,LanguageCode=lang)
result['KeyPhrases']

In [None]:
comprehend.detect_entities(Text=french_text,LanguageCode=lang)

# END

# Appendix


## Try the other voices

This is some code to play with more voices

In [None]:
region_name='us-west-2'
text= "Free pizza in the kitchen"
import IPython
import boto3
polly = boto3.client('polly',region_name=region_name)


In [None]:
import voice_selectors

In [None]:

request = { "Text" : text,
           "VoiceId": voice_selectors.get_voice(),
           "LanguageCode": voice_selectors.get_lang(),
           "OutputFormat" : 'mp3'
}

result = polly.synthesize_speech(**request)
audio = result['AudioStream'].read()
IPython.display.Audio(audio)