# Lesson 2: Summarize an audio file

### Import all needed packages

In [1]:
import os
from IPython.display import Audio
import boto3
import uuid
import time
import json
from jinja2 import Template

### Let's start with transcribing an audio file

In [None]:
audio = Audio(filename="dialog.mp3")
display(audio)

In [None]:
s3_client = boto3.client(
    's3',
    region_name='us-west-2'
)

In [None]:
bucket_name = os.environ['BucketName']

In [None]:
file_name = 'dialog.mp3'

In [None]:
s3_client.upload_file(
    file_name,
    bucket_name,
    file_name
)

In [None]:
transcribe_client = boto3.client(
    'transcribe',
    region_name='us-west-2'
)

In [None]:
job_name = 'transcription-job-' + str(uuid.uuid4())

In [None]:
job_name

In [None]:
response = transcribe_client.start_transcription_job(
    TranscriptionJobName=job_name,
    Media={'MediaFileUri': f's3://{bucket_name}/{file_name}'},
    MediaFormat='mp3',
    LanguageCode='en-US',
    OutputBucketName=bucket_name,
    Settings={
        'ShowSpeakerLabels': True,
        'MaxSpeakerLabels': 2
    }
)

In [None]:
while True:
    status = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
        break
    time.sleep(2)
print(status['TranscriptionJob']['TranscriptionJobStatus'])

In [None]:
if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED': 
    # Load the transcript from S3.
    transcript_key = f"{job_name}.json"
    transcript_obj = s3_client.get_object(Bucket=bucket_name, Key=transcript_key)
    transcript_text = transcript_obj['Body'].read().decode('utf-8')

In [None]:
transcript_json = json.loads(transcript_text)

In [None]:
transcript_json['results'].keys()

In [None]:
transcript_json['results']['items'][0]

In [None]:
output_text = ""
current_speaker = None
    
items = transcript_json['results']['items']
    
for item in items:
    speaker_label = item.get('speaker_label', None)
    content = item['alternatives'][0]['content']
    # Start the line with the speaker label:
    if speaker_label is not None and speaker_label != current_speaker:
        current_speaker = speaker_label
        output_text += f"\n{current_speaker}: "
    # Add the speech content:
    if item['type'] == 'punctuation':
        output_text = output_text.rstrip()
    output_text += f"{content} "

In [None]:
print(output_text[:200])

In [None]:
# Save the transcript to a text file
with open(f'{job_name}.txt', 'w') as f:
    f.write(output_text)

### Now, let's use an LLM

In [2]:
bedrock_runtime = boto3.client(
    'bedrock-runtime',
    region_name='us-west-2'
)

In [6]:
job_name = 'transcription-job-c11a942e-93b4-4666-900f-b7a83d1a4f23'

In [7]:
with open(f'{job_name}.txt', "r") as file:
    transcript = file.read()

In [132]:
%%writefile prompt_template.txt

Instructions:
Summarize the conversation provided between the <data> XML like tags.
The summary must contain a one word sentiment analysis, and a list of issues, problems or causes of friction during the conversation.

<data>
{{transcript}}
</data>

Response:
You MUST answer in JSON format only.
DO NOT use any other format while answering the
question.
Please wrap the entire output in JSON format. You can
use markdown ticks like
```json

{
    "sentiment": "sentiment goes here",
    "issues": [
        {
            "topic": "topic goes here",
            "summary": "summary goes here"
        },
        {
            "..."
        }
    ]
}

```

Overwriting prompt_template.txt


In [133]:
with open('prompt_template.txt', "r") as file:
    template_string = file.read()

In [134]:
data = {
    'transcript' : transcript
}

In [135]:
template = Template(template_string)

In [136]:
prompt = template.render(data)

In [137]:
print(prompt)


Instructions:
Summarize the conversation provided between the <data> XML like tags.
The summary must contain a one word sentiment analysis, and a list of issues, problems or causes of friction during the conversation.

<data>

spk_0: Hi, is this the Crystal Heights Hotel in Singapore? 
spk_1: Yes, it is. Good afternoon. How may I assist you today? 
spk_0: Fantastic. Good afternoon. I was looking to book a room for my 10th wedding anniversary. I've heard your hotel offers exceptional views and services. Could you tell me more? 
spk_1: Absolutely, Alex, and congratulations on your upcoming anniversary. That's a significant milestone, and we'd be honored to make it a special occasion for you. We have several room types that offer stunning views of the city skyline and the fictional Sapphire Bay. Our special diamond suite even comes with exclusive access to the moonlit pool and star deck. We also have in-house spa services, world-class dining options, and a shopping arcade. 
spk_0: That s

In [138]:
kwargs = {
    "modelId": "amazon.titan-text-express-v1",
    "contentType": "application/json",
    "accept": "*/*",
    "body": json.dumps(
        {
            "inputText": prompt,
            "textGenerationConfig": {
                "maxTokenCount": 512,
                "temperature": 0,
                "topP": 0.9
            }
        }
    )
}

In [None]:
response = bedrock_runtime.invoke_model(**kwargs)

In [None]:
response_body = json.loads(response.get('body').read())
generation = response_body['results'][0]['outputText']

In [131]:
print(generation)


sentiment: neutral
issues:
- topic: pre-authorization
- summary: The guest expresses concern about the pre-authorization amount of $1000, considering it excessive. The hotel apologizes and explains that it's a standard procedure to cover incidental expenses, but assures the guest that it's only a hold and not an actual charge. The guest raises a question about additional charges, specifically asking if there are any other fees besides the pre-authorization. The hotel confirms that there is a 10% service charge and a 7% fantasy tax applied to the room rate. The guest acknowledges the special occasion and decides to proceed with the booking.
