# Lesson 2: Summarize an audio file

### Import all needed packages

In [None]:
import os
from IPython.display import Audio
import boto3
import uuid
import time
import json
from jinja2 import Template

In [None]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

### Let's start with transcribing an audio file

In [None]:
audio = Audio(filename="dialog.mp3")
display(audio)

In [None]:
s3_client = boto3.client('s3', region_name='us-west-1')

In [None]:
bucket_name = os.getenv('BucketName')
print(bucket_name)

In [None]:
file_name = 'dialog.mp3'

In [None]:
s3_client.upload_file(file_name, bucket_name, file_name)

In [None]:
transcribe_client = boto3.client('transcribe', region_name='us-west-1')

In [None]:
job_name = 'transcription-job-' + str(uuid.uuid4())

In [None]:
job_name

In [None]:
response = transcribe_client.start_transcription_job(
    TranscriptionJobName=job_name,
    Media={'MediaFileUri': f's3://{bucket_name}/{file_name}'},
    MediaFormat='mp3',
    LanguageCode='en-US',
    OutputBucketName=bucket_name,
    Settings={
        'ShowSpeakerLabels': True,
        'MaxSpeakerLabels': 2
    }
)

In [None]:
while True:
    status = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
        break
    time.sleep(2)
print(status['TranscriptionJob']['TranscriptionJobStatus'])

In [None]:
if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':

    # Load the transcript from S3.
    transcript_key = f"{job_name}.json"
    transcript_obj = s3_client.get_object(Bucket=bucket_name, Key=transcript_key)
    transcript_text = transcript_obj['Body'].read().decode('utf-8')
    transcript_json = json.loads(transcript_text)

    output_text = ""
    current_speaker = None

    items = transcript_json['results']['items']

    for item in items:

        speaker_label = item.get('speaker_label', None)
        content = item['alternatives'][0]['content']

        # Start the line with the speaker label:
        if speaker_label is not None and speaker_label != current_speaker:
            current_speaker = speaker_label
            output_text += f"\n{current_speaker}: "

        # Add the speech content:
        if item['type'] == 'punctuation':
            output_text = output_text.rstrip()

        output_text += f"{content} "

    # Save the transcript to a text file
    with open(f'{job_name}.txt', 'w') as f:
        f.write(output_text)

In [None]:
bedrock_runtime = boto3.client('bedrock-runtime', region_name='us-west-2')

In [None]:
with open(f'{job_name}.txt', "r") as file:
    transcript = file.read()

In [None]:
%%writefile prompt_template.txt
You are an advanced AI assistant tasked with analyzing a conversation transcript. Your response must be in JSON format and include:
1. The overall sentiment of the conversation (Positive, Negative, or Neutral), based on the tone, resolution, and emotions expressed.
2. A detailed list of all issues, problems, or points of friction raised during the conversation. For each issue, provide:
   - A topic that summarizes the issue.
   - A brief summary explaining the concern or friction, including the specific context from the conversation.

The conversation transcript is enclosed within <data> tags.

<data>
{{ transcript }}
</data>

Follow these additional guidelines:
- Carefully analyze the entire conversation for any potential concerns or disagreements, even if they seem minor.
- Ensure each issue is meaningful and adds value to the summary.
- Respond strictly in this JSON format:
{
    "sentiment": "<sentiment>",
    "issues": [
        {
            "topic": "<topic>",
            "summary": "<issue_summary>"
        },
        ...
    ]
}
Do not leave any fields empty. Replace <sentiment>, <topic>, and <issue_summary> with meaningful and specific values. Include all possible issues raised in the conversation.


In [None]:
with open('prompt_template.txt', "r") as file:
    template_string = file.read()

In [None]:
data = {
    'transcript': transcript
}

In [None]:
data

In [None]:
template = Template(template_string)

In [None]:
prompt = template.render(data)

In [None]:
print(prompt)

In [None]:
#another way of prompting

prompt1 = Template("""
You are an advanced AI assistant tasked with analyzing a conversation transcript. Your response must be in JSON format and include:
1. The overall sentiment of the conversation (Positive, Negative, or Neutral), based on the tone, resolution, and emotions expressed.
2. A detailed list of all issues, problems, or points of friction raised during the conversation. For each issue, provide:
   - A topic that summarizes the issue.
   - A brief summary explaining the concern or friction, including the specific context from the conversation.

The conversation transcript is enclosed within <data> tags.

<data>
{{ transcript }}
</data>

Follow these additional guidelines:
- Carefully analyze the entire conversation for any potential concerns or disagreements, even if they seem minor.
- Ensure each issue is meaningful and adds value to the summary.
- Respond strictly in this JSON format:
{
    "sentiment": "<sentiment>",
    "issues": [
        {
            "topic": "<topic>",
            "summary": "<issue_summary>"
        },
        ...
    ]
}
Do not leave any fields empty. Replace <sentiment>, <topic>, and <issue_summary> with meaningful and specific values. Include all possible issues raised in the conversation.
""").render(transcript=data['transcript'])


In [None]:
kwargs = {
    "modelId": "amazon.titan-text-express-v1",
    "contentType": "application/json",
    "accept": "*/*",
    "body": json.dumps(
        {
            "inputText": prompt,
            "textGenerationConfig": {
                "maxTokenCount": 2048,  # Increase token limit for larger outputs
                "temperature": 0.8,    # Slightly increase randomness
                "topP": 0.95
            }
        }
    )
}

In [None]:
response = bedrock_runtime.invoke_model(**kwargs)

In [None]:
response_body = json.loads(response.get('body').read())
generation = response_body['results'][0]['outputText']

In [None]:
print(generation)