In [88]:
import boto3
import json
from botocore.config import Config
import argparse
import os
import io
import pandas as pd

# Configuration
Before using Boto3, you need to set up authentication credentials for your AWS account using either the IAM Console or the AWS CLI.
- if have the AWS CLI installed, then use the 'aws configure' command in the terminal to configure your credentials   
- Otherwise, when calling boto3.client() function, you should clarify two keys:  
    - aws_access_key_id = YOUR_ACCESS_KEY  
    - aws_secret_access_key = YOUR_SECRET_KEY  
   
Here we use AWS CLI to set up authentication credentials and connect to the AWS account we use.

# Use Case Analysis
We use the audio data to mimic the phone call scenario. In this simple use case demo, we want to present how to set up the s3 -> transcribe -> comprehend pipline to convert customer phone call audio to text and use comprehend to do sentiment analysis based on transcribed text.  
The dataset link:  https://github.com/jim-schwoebel/sample_voice_data

## Setting
Before analysis, we store all the audio data in the bucket '6330projectaudio' and create another 2 buckets: one is named as '6330projecttranscribe' to store transcribed text data; the other is named as '6330sentimentanalysis' to store the results of sentiment analysis.

In [16]:
s3 = boto3.resource('s3')

REGION = os.getenv('AWS_REGION', default='us-east-1')
transcribe = boto3.client('transcribe', region_name=REGION 
                          #aws_access_key_id = ,aws_secret_access_key =
                         )
comprehend = boto3.client('comprehend', region_name=REGION)
s3_client = boto3.client('s3', region_name=REGION)

In [90]:
for bucket in s3.buckets.all():
    print(bucket.name)

6330projectaudio
6330projecttransribe
6330sentimentanalysis


In [20]:
# get all the keys of the file in the bucket
def get_keys(bucket):
    keys = []
    resp = s3_client.list_objects_v2(Bucket=bucket)
    for file in resp['Contents']:
        keys.append(file['Key'])
    return keys

## Transcribe

In [31]:
def trans_audio(bucket):
    settings = {'ShowSpeakerLabels': False}
    keys = get_keys(bucket)
    i = 1
    
    for file in keys:
        media_url = 's3://' + bucket + '/' + file
        response = transcribe.start_transcription_job(TranscriptionJobName=str(i),
                                                      LanguageCode='en-US',
                                                      MediaFormat='wav',
                                                      Settings=settings,
                                                      OutputBucketName='6330projecttransribe',
                                                      Media={'MediaFileUri': media_url}
                                                     )
        i += 1

In [32]:
# call function transcribe to convert all the audio to text
trans_audio('6330projectaudio')

## Comprehend

In [68]:
def compre_sen(bucket):
    text = get_keys(bucket)
    text.remove('.write_access_check_file.temp')
    sentiment = {'Audio': [], 'Transcript': [], 'Sentiment': [], 
                 'Positive': [], 'Negative': [], 'Neutral':[]}
    
    for file in text:
        obj = s3_client.get_object(Bucket=bucket, Key=file)
        object_content = json.loads(obj['Body'].read())
        transcript = object_content['results']['transcripts'][0]['transcript']
        senti = comprehend.detect_sentiment(Text=transcript, LanguageCode='en')
        
        sentiment['Audio'].append(file)
        sentiment['Transcript'].append(transcript)
        sentiment['Sentiment'].append(senti['Sentiment'])
        sentiment['Positive'].append(senti['SentimentScore']['Positive'])
        sentiment['Negative'].append(senti['SentimentScore']['Negative'])
        sentiment['Neutral'].append(senti['SentimentScore']['Neutral'])
    
    return(pd.DataFrame(sentiment))

In [69]:
# call function compre_sen to get the sentiment of all audio
sentiment = compre_sen('6330projecttransribe')

In [70]:
sentiment

Unnamed: 0,Audio,Transcript,Sentiment,Positive,Negative,Neutral
0,0.json,"No, no, I think it's more than that, but I'm r...",POSITIVE,0.928965,0.004506,0.01381
1,1.json,"No, no, I think it's more than that, but I'm r...",POSITIVE,0.928965,0.004506,0.01381
2,10.json,Um Sometimes playing in the rain. Um Sometimes...,MIXED,0.063626,0.06437,0.385759
3,11.json,"Three weeks ago now, no pain whatsoever. I jus...",MIXED,0.255559,0.054976,0.017408
4,12.json,Yeah me little girl loved it when when I did i...,POSITIVE,0.988143,0.00087,0.009605
5,13.json,And ideas and just everything and all the time...,POSITIVE,0.842263,0.00413,0.140382
6,14.json,I think I can center everybody like let's watc...,POSITIVE,0.580105,0.019696,0.39971
7,15.json,"It helps, like cancer is to have someone that ...",POSITIVE,0.780631,0.011591,0.130128
8,16.json,I think my character empathizes with the alien...,NEUTRAL,0.044746,0.408376,0.534522
9,17.json,There's a feel for kindergarten that's called ...,POSITIVE,0.794209,0.072136,0.101282


## Upload results to S3

In [89]:
with io.StringIO() as csv_buffer:
    sentiment.to_csv(csv_buffer, index=False)
    response = s3_client.put_object(
        Bucket='6330sentimentanalysis', Key="sentiment_analysis.csv", Body=csv_buffer.getvalue()
    )