In [18]:
import boto3, os

In [19]:
from helpers.Lambda_Helper import Lambda_Helper
from helpers.S3_Helper import S3_Helper
from helpers.Display_Helper import Display_Helper

In [20]:
lambda_helper = Lambda_Helper()
# deploy_function
# add_lambda_trigger

In [21]:
s3_helper = S3_Helper()
# upload_file
# download_object 
# list_objects

In [22]:
display_helper = Display_Helper()
# text_file
# json_file

In [23]:
os.environ['LEARNERS3BUCKETNAMETEXT'] = '[YOURBUCKET]'
os.environ['LAMBDALAYERVERSIONARN'] = '[YOURARN]'
os.environ['LEARNERS3BUCKETNAMEAUDIO'] = '[YOURBUCKET]'
bucket_name_text = os.environ['LEARNERS3BUCKETNAMETEXT']
bucket_name_audio = os.environ['LEARNERS3BUCKETNAMEAUDIO']

In [24]:
%%writefile prompt_template.txt
I need to summarize a conversation. The transcript of the conversation is between the <data> XML like tags.
The conversation is in French, please translate it.
<data>
{{transcript}}
</data>

The summary must contain a one word sentiment analysis,
and a list of issues, problems or causes of friction during the conversation,
and a short resume of the call.

The output must be provided in JSON format shown in the following example.

Example output:
{
    "version": 0.1,
    "sentiment": <sentiment>,
    "issues": [
        {
            "topic": <topic>,
            "summary": <issue_summary>,
        }
    ],
     "resume": <resume>
}

An `issue_summary` must only be one of:
{%- for topic in topics %}
 - `{{topic}}`
{% endfor %}

Write the JSON output in French and nothing more.

Here is the JSON output:

Overwriting prompt_template.txt


In [25]:
#display_helper.text_file('prompt_template.txt')

### Create the Lambda function

In [26]:
%%writefile lambda_function_summarisation.py


#############################################################
#
# This Lambda function is written to a file by the notebook 
# It does not run in the notebook!
#
#############################################################

import boto3
import json 
from jinja2 import Template

s3_client = boto3.client('s3')
bedrock_runtime = boto3.client('bedrock-runtime', 'us-east-1')

def lambda_handler(event, context):
    
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    
    # One of a few different checks to ensure we don't end up in a recursive loop.
    if "-transcript.json" not in key: 
        print("This demo only works with *-transcript.json.")
        return
    
    try: 
        file_content = ""
        
        response = s3_client.get_object(Bucket=bucket, Key=key)
        
        file_content = response['Body'].read().decode('utf-8')
        
        transcript = extract_transcript_from_textract(file_content)

        print(f"Successfully read file {key} from bucket {bucket}.")

        print(f"Transcript: {transcript}")
        
        summary = bedrock_summarisation(transcript)
        
        s3_client.put_object(
            Bucket=bucket,
            Key='results.txt',
            Body=summary,
            ContentType='text/plain'
        )
        
    except Exception as e:
        print(f"Error occurred: {e}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error occurred: {e}")
        }

    return {
        'statusCode': 200,
        'body': json.dumps(f"Successfully summarized {key} from bucket {bucket}. Summary: {summary}")
    }
        
        
        
def extract_transcript_from_textract(file_content):

    transcript_json = json.loads(file_content)

    output_text = ""
    current_speaker = None

    items = transcript_json['results']['items']

    # Iterate through the content word by word:
    for item in items:
        speaker_label = item.get('speaker_label', None)
        content = item['alternatives'][0]['content']
        
        # Start the line with the speaker label:
        if speaker_label is not None and speaker_label != current_speaker:
            current_speaker = speaker_label
            output_text += f"\n{current_speaker}: "
        
        # Add the speech content:
        if item['type'] == 'punctuation':
            output_text = output_text.rstrip()  # Remove the last space
        
        output_text += f"{content} "
        
    return output_text
        

def bedrock_summarisation(transcript):
    
    with open('prompt_template.txt', "r") as file:
        template_string = file.read()

    data = {
        'transcript': transcript,
        'topics': ['charges', 'location', 'availability']
    }
    
    template = Template(template_string)
    prompt = template.render(data)
    
    print(prompt)
    # Invoke Claude 3 with the text prompt
    model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
    
    try:
        response = bedrock_runtime.invoke_model(
            modelId=model_id,
            body=json.dumps(
                {
                    "anthropic_version": "bedrock-2023-05-31",
                    "max_tokens": 1024,
                    "messages": [
                        {
                            "role": "user",
                            "content": [{"type": "text", "text": prompt}],
                        }
                    ],
                }
            ),
        )
    
    except Exception as e:
        print(f"Error occurred: {e}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error occurred: {e}")
        }

    result = json.loads(response.get("body").read())
    output_list = result.get("content", [])
    text = output_list[0]["text"]
    return text


Overwriting lambda_function_summarisation.py


In [27]:
%%writefile lambda_function_transcript.py

#############################################################
#
# This Lambda function is written to a file by the notebook 
# It does not run in the notebook!
#
#############################################################

import json
import boto3
import uuid
import os

s3_client = boto3.client('s3')
transcribe_client = boto3.client('transcribe', region_name='us-east-1')

def lambda_handler(event, context):
    # Extract the bucket name and key from the incoming event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    # One of a few different checks to ensure we don't end up in a recursive loop.
    if key != "dialog.wav": 
        print("This demo only works with dialog.wav.")
        return

    try:
        
        job_name = 'transcription-job-' + str(uuid.uuid4()) # Needs to be a unique name

        response = transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': f's3://{bucket}/{key}'},
            MediaFormat='wav',
            LanguageCode='en-US',
            OutputBucketName= os.environ['S3BUCKETNAMETEXT'],  # specify the output bucket
            OutputKey=f'{job_name}-transcript.json',
            Settings={
                'ShowSpeakerLabels': True,
                'MaxSpeakerLabels': 2
            }
        )
        
    except Exception as e:
        print(f"Error occurred: {e}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error occurred: {e}")
        }

    return {
        'statusCode': 200,
        'body': json.dumps(f"Submitted transcription job for {key} from bucket {bucket}.")
    }

Writing lambda_function_transcript.py


In [28]:
lambda_helper.deploy_function(
    ["lambda_function_summarisation.py", "prompt_template.txt"],
    function_name="LambdaFunctionSummarize", module_name="lambda_function_summarisation"
)

Zipping function...
Looking for existing function...
Function LambdaFunctionSummarize exists. Updating code...
Function LambdaFunctionSummarize code updated: 2024-04-09T15:48:08.000+0000
Done.


In [29]:
lambda_helper.filter_rules_suffix = "json"
lambda_helper.add_lambda_trigger(bucket_name_text)

Using function name of deployed function: LambdaFunctionSummarize
Removed existing permission: s3-trigger-permission
Permission added with Statement: {
    "Sid": "s3-trigger-permission",
    "Effect": "Allow",
    "Principal": {
        "Service": "s3.amazonaws.com"
    },
    "Action": "lambda:InvokeFunction",
    "Resource": "arn:aws:lambda:us-east-1:284474675936:function:LambdaFunctionSummarize",
    "Condition": {
        "ArnLike": {
            "AWS:SourceArn": "arn:aws:s3:::aaagius-6723reufiwdsyv"
        }
    }
}
Trigger added for aaagius-6723reufiwdsyv -> LambdaFunctionSummarize


### Deploy your lambda function

In [30]:
lambda_helper.lambda_environ_variables = {'S3BUCKETNAMETEXT' : bucket_name_text}
lambda_helper.deploy_function(["lambda_function_transcript.py"], function_name="LambdaFunctionTranscribe", module_name="lambda_function_transcript")

Zipping function...
Looking for existing function...
Function LambdaFunctionTranscribe does not exist. Creating...
Function LambdaFunctionTranscribe created: arn:aws:lambda:us-east-1:284474675936:function:LambdaFunctionTranscribe
Done.


In [31]:
lambda_helper.filter_rules_suffix = "wav"
lambda_helper.add_lambda_trigger(bucket_name_audio, function_name="LambdaFunctionTranscribe")

Permission added with Statement: {
    "Sid": "s3-trigger-permission",
    "Effect": "Allow",
    "Principal": {
        "Service": "s3.amazonaws.com"
    },
    "Action": "lambda:InvokeFunction",
    "Resource": "arn:aws:lambda:us-east-1:284474675936:function:LambdaFunctionTranscribe",
    "Condition": {
        "ArnLike": {
            "AWS:SourceArn": "arn:aws:s3:::aaagius-audio-6723reufiwdsyv"
        }
    }
}
Trigger added for aaagius-audio-6723reufiwdsyv -> LambdaFunctionTranscribe


In [32]:
s3_helper.upload_file(bucket_name_audio, 'dialog.wav')

Object 'dialog.wav' uploaded to bucket 'aaagius-audio-6723reufiwdsyv'


In [33]:
s3_helper.list_objects(bucket_name_audio)

Object: dialog.wav, Created on: 2024-04-09 15:48:27+00:00


In [34]:
s3_helper.list_objects(bucket_name_text)

No objects found in the bucket: aaagius-6723reufiwdsyv


In [37]:
s3_helper.download_object(bucket_name_text, 'results.txt')

Object 'results.txt' from bucket 'aaagius-6723reufiwdsyv' to './results.txt'


In [38]:
display_helper.text_file('results.txt')

results.txt:
