# AWS NLP

[![Index](https://img.shields.io/badge/Index-blue)](../index.ipynb)
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/digillia/Digillia-Colab/blob/main/tools/aws_nlp.ipynb)


Docs:
- https://aws.amazon.com/ai/services/

In [17]:
import os
import sys

# Supprimer les commentaires pour installer (requirements.txt)

# À installer dans tous les cas pour Google Colab et Github
if ('google.colab' in sys.modules) or ('CI' in os.environ):
    !pip3 install -q -U boto3

In [18]:
if 'google.colab' in sys.modules:
    from google.colab import userdata
    os.environ['AWS_ACCESS_KEY_ID'] = userdata.get('AWS_ACCESS_KEY_ID')
    os.environ['AWS_SECRET_ACCESS_KEY'] = userdata.get('AWS_SECRET_ACCESS_KEY')
    os.environ['AWS_DEFAULT_REGION'] = userdata.get('AWS_DEFAULT_REGION')

In [19]:
# Les variables python sont accessibles depuis les commandes shell
work_directory = './aws_nlp'

!mkdir -p $work_directory

In [33]:
import boto3
import json
from botocore.exceptions import ClientError

## Bedrock

In [34]:
bedrock_client = boto3.client('bedrock')
def list_available_models():
    try:
        response = bedrock_client.list_foundation_models()
        for model in response['modelSummaries']:
            print(f"Model ID: {model['modelId']}")
    except ClientError as error:
        print(f"Error listing models: {error}")

list_available_models()

Model ID: amazon.titan-tg1-large
Model ID: amazon.titan-image-generator-v1:0
Model ID: amazon.titan-image-generator-v1
Model ID: amazon.titan-image-generator-v2:0
Model ID: amazon.titan-text-premier-v1:0
Model ID: amazon.nova-pro-v1:0:300k
Model ID: amazon.nova-pro-v1:0
Model ID: amazon.nova-lite-v1:0:300k
Model ID: amazon.nova-lite-v1:0
Model ID: amazon.nova-canvas-v1:0
Model ID: amazon.nova-reel-v1:0
Model ID: amazon.nova-micro-v1:0:128k
Model ID: amazon.nova-micro-v1:0
Model ID: amazon.titan-embed-g1-text-02
Model ID: amazon.titan-text-lite-v1:0:4k
Model ID: amazon.titan-text-lite-v1
Model ID: amazon.titan-text-express-v1:0:8k
Model ID: amazon.titan-text-express-v1
Model ID: amazon.titan-embed-text-v1:2:8k
Model ID: amazon.titan-embed-text-v1
Model ID: amazon.titan-embed-text-v2:0:8k
Model ID: amazon.titan-embed-text-v2:0
Model ID: amazon.titan-embed-image-v1:0
Model ID: amazon.titan-embed-image-v1
Model ID: stability.stable-diffusion-xl-v1:0
Model ID: stability.stable-diffusion-xl-

In [1]:
model_id='amazon.nova-micro-v1:0'

# Grant access to the model
#!aws configure add-model --service-model file://bedrock-2023-04-20.normal.json
!aws bedrock grant-model-access --model-ids $model_id --principal-arn arn:aws:iam::$(aws sts get-caller-identity --query Account --output text):root


usage: aws [options] <command> <subcommand> [<subcommand> ...] [parameters]
To see help text, you can run:

  aws help
  aws <command> help
  aws <command> <subcommand> help

aws: error: argument operation: Invalid choice, valid choices are:

batch-delete-evaluation-job              | create-evaluation-job                   
create-guardrail                         | create-guardrail-version                
create-inference-profile                 | create-marketplace-model-endpoint       
create-model-copy-job                    | create-model-customization-job          
create-model-import-job                  | create-model-invocation-job             
create-provisioned-model-throughput      | delete-custom-model                     
delete-guardrail                         | delete-imported-model                   
delete-inference-profile                 | delete-marketplace-model-endpoint       
delete-model-invocation-logging-configuration | delete-provisioned-model-throughput 

In [None]:
bedrock_runtime = boto3.client('bedrock-runtime')

def generate_text(prompt):
    # Model ID for Amazon Titan Text Express
    model_id = "amazon.titan-text-express-v1"
    
    try:
        # Format the request payload for Titan model
        request_body = {
            "inputText": prompt,
            "textGenerationConfig": {
                "maxTokenCount": 512,
                "temperature": 0.7,
                "topP": 0.9,
            }
        }
        
        # Convert dictionary to JSON string
        body = json.dumps(request_body)
        
        # Call the model
        response = bedrock_runtime.invoke_model(
            modelId=model_id,
            body=body
        )
        
        # Parse the response
        response_body = json.loads(response.get('body').read())
        
        # Extract the generated text
        return response_body['results'][0]['outputText']
    
    except ClientError as error:
        print(f"Error generating text: {error}")
        return None

prompt = "Write a short poem about artificial intelligence."
result = generate_text(prompt)
if result:
    print("Generated text:")
    print(result)

Error generating text: An error occurred (AccessDeniedException) when calling the InvokeModel operation: You don't have access to the model with the specified model ID.


In [29]:
# Create an Amazon Bedrock client
bedrock = boto3.client('bedrock')
model_id = 'amazon.titan-1.0'
input_text = "Once upon a time in a faraway kingdom, there was a dragon."

# Prepare request body as JSON string
request_body = json.dumps({
    "inputText": input_text
})

# Create an async invocation job
response = bedrock.create_model_invocation_job(
    modelId=model_id,
    body=request_body,
    contentType='application/json'
)

# Get the job ID
job_id = response['jobId']

# Wait for job completion
waiter = bedrock.get_waiter('model_invocation_job_completed')
waiter.wait(jobId=job_id)

# Get the results
result = bedrock.get_model_invocation_job(jobId=job_id)
response_body = json.loads(result['responseBody'])
print("Model response:")
print(response_body['results'][0]['outputText'])


ParamValidationError: Parameter validation failed:
Missing required parameter in input: "jobName"
Missing required parameter in input: "roleArn"
Missing required parameter in input: "inputDataConfig"
Missing required parameter in input: "outputDataConfig"
Unknown parameter in input: "body", must be one of: jobName, roleArn, clientRequestToken, modelId, inputDataConfig, outputDataConfig, vpcConfig, timeoutDurationInHours, tags
Unknown parameter in input: "contentType", must be one of: jobName, roleArn, clientRequestToken, modelId, inputDataConfig, outputDataConfig, vpcConfig, timeoutDurationInHours, tags

## Comprehend

In [23]:
comprehend_client = boto3.client('comprehend')
text = 'J\'aime aller au cinéma.' 
sentiment = comprehend_client.detect_sentiment(Text=text, LanguageCode='fr')
print(sentiment)

{'Sentiment': 'POSITIVE', 'SentimentScore': {'Positive': 0.99946528673172, 'Negative': 3.780597762670368e-05, 'Neutral': 0.000492282269988209, 'Mixed': 4.617959802999394e-06}, 'ResponseMetadata': {'RequestId': '1c2f0875-4324-4a7a-a855-0ca8680acde5', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '1c2f0875-4324-4a7a-a855-0ca8680acde5', 'content-type': 'application/x-amz-json-1.1', 'content-length': '162', 'date': 'Fri, 13 Dec 2024 20:15:28 GMT'}, 'RetryAttempts': 0}}


## Polly

In [24]:
from IPython.display import Audio

polly_client = boto3.client('polly')
text = 'J\'aime aller au cinéma.'
# Check voices at https://docs.aws.amazon.com/polly/latest/dg/available-voices.html
result = polly_client.synthesize_speech(Text=text, OutputFormat='mp3', VoiceId='Lea', LanguageCode='fr-FR')
audio = result['AudioStream'].read()
with open(f'{work_directory}/audio.mp3', 'wb') as file:
    file.write(audio)
Audio(filename=f'{work_directory}/audio.mp3', autoplay=True)


## Transcribe

In [None]:
import boto3
import time
import os

# Initialize clients
s3_client = boto3.client('s3')
transcribe_client = boto3.client('transcribe')

# File details
local_file_path = "C:/Users/YourUsername/Desktop/audio.mp3"  # Update with the path to your MP3 file
bucket_name = "your-s3-bucket-name"  # Replace with your S3 bucket name
s3_object_name = "audio/audio.mp3"  # Path and name for the file in S3
transcription_job_name = "my-transcription-job"  # Unique name for your transcription job
language_code = 'fr-FR'

# Chargement du fichier dans s3
def upload_file_to_s3(local_path, bucket, s3_path):
    try:
        s3_client.upload_file(local_path, bucket, s3_path)
        print(f"File uploaded to S3: s3://{bucket}/{s3_path}")
        return f"s3://{bucket}/{s3_path}"
    except Exception as e:
        print(f"Error uploading file: {e}")
        return None

s3_uri = upload_file_to_s3(local_file_path, bucket_name, s3_object_name)
if not s3_uri:
    exit()

# Step 2: Start transcription job
def start_transcription(s3_uri, job_name, language):
    try:
        transcribe.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': s3_uri},
            MediaFormat='mp3',  # Update if your file has a different format
            LanguageCode=language,
            OutputBucketName=bucket_name  # Optional: store the result in S3
        )
        print(f"Transcription job '{job_name}' started.")
    except Exception as e:
        print(f"Error starting transcription job: {e}")
        return False
    return True

if not start_transcription(s3_uri, transcription_job_name, language_code):
    exit()

# Step 3: Wait for transcription job to complete
def wait_for_transcription(job_name):
    while True:
        status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
        job_status = status['TranscriptionJob']['TranscriptionJobStatus']
        if job_status in ['COMPLETED', 'FAILED']:
            print(f"Transcription job status: {job_status}")
            if job_status == 'COMPLETED':
                return status['TranscriptionJob']['Transcript']['TranscriptFileUri']
            else:
                print(f"Transcription failed: {status}")
                return None
        print("Waiting for transcription to complete...")
        time.sleep(10)

transcript_uri = wait_for_transcription(transcription_job_name)

# Step 4: Retrieve transcription result
if transcript_uri:
    print(f"Transcription completed. Transcript URL: {transcript_uri}")
    # Optional: Download and display the transcription
    import requests
    response = requests.get(transcript_uri)
    if response.status_code == 200:
        transcript = response.json()['results']['transcripts'][0]['transcript']
        print(f"Transcript: {transcript}")
    else:
        print(f"Failed to retrieve transcript from {transcript_uri}")
else:
    print("Transcription failed or job did not complete.")



## Translate

## Textract

In [None]:
# Ménage
!rm -rf $work_directory