In [1]:
!pip install jinja2 pdfplumber boto3==1.34.131


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import sys
import os

# Specify the new package root directory
new_package_root = "../"

# Add the new package root to the system path
sys.path.insert(0, os.path.abspath(new_package_root))

In [3]:
from jinja2 import Environment, FileSystemLoader
import pdfplumber
from src.utils import *

bedrock_client = boto3.client(service_name='bedrock-runtime')

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [4]:
# Specify the directory where the template file is located
template_dir = '..'

# Create a Jinja environment with the FileSystemLoader
env = Environment(loader=FileSystemLoader(template_dir))

# Load the template file
system_prompt_template = env.get_template('template/system_prompt_template.jinja')
user_prompt_pre_template = env.get_template('template/user_prompt_pre_template.jinja')
user_prompt_post_template = env.get_template('template/user_prompt_post_template.jinja')

In [5]:
document = "../data/User_Manual_W11_Acer_1.0_A_A_SHORT.pdf"
pdf_obj = pdfplumber.open(document)

In [6]:
for idx, page in enumerate(pdf_obj.pages[:3]):

    # Prepare prompts
    PDF_TEXT = page.extract_text(
        layout=True, 
    )

    system_text = system_prompt_template.render()
    input_text_pre = user_prompt_pre_template.render(PDF_TEXT=PDF_TEXT, FILENAME=document, PAGE_NUMBER=idx)
    input_text_post =user_prompt_post_template.render()

    suitable_image_size = find_suitable_image_size(page)

    input_image = './output_image.png'

    model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"

    response = generate_conversation(
        bedrock_client,
        model_id,
        system_text,
        input_text_pre,
        input_text_post,
        input_image
    )

    print(response['output']['message']['content'][0]['text'])
    print("\n\n ########################## \n\n")

<result>

# Acer 筆記型電腦導覽

P214-52 / P214-52G / P214-53 / P214-53G / P214-41 / P214-41-G2 / P214-41-G3

![Acer laptop diagram showing numbered components: 1. Microphone, 2. Webcam shutter, 5. Foldable hinge screen, 6. Webcam](image_description)

| #  | 項目             | 說明                                                                                     |
|----|------------------|------------------------------------------------------------------------------------------|
| 1  | 麥克風           | 內建數位麥克風，可用來錄音。                                                             |
| 2  | 網路攝影機快門   | 將快門往右邊滑動以蓋上網路攝影機。<br>往左邊滑動即可打開護蓋。                           |
| 3  | 網路攝影機       | 用於視訊通訊的網路攝影機，可偵測<br>用於臉部驗證的紅外線訊號。<br>網路攝影機旁的燈號顯示已啟用網路<br>攝影機。 |
| 4  | 紅外線 LED       | 發出用於網路攝影機和臉部驗證的紅<br>外線燈光。                                           |
| 5  | 平折轉軸螢幕     | 顯示電腦的輸出資訊。                                                                     |
| 6  | 網路攝影機       | 網路攝影機，用於進行視訊通訊。<br>網路攝影機旁的燈號顯示已啟用網路<br>攝影機。 

In [32]:
### SageMaker

In [72]:
%%writefile ../docker/Dockerfile

FROM python:3.10-slim
RUN pip3 install boto3==1.34.131 pdfplumber==0.11.2 Jinja2==3.1.4 asyncio

ENTRYPOINT ["python3"]

Overwriting ../docker/Dockerfile


In [73]:
import boto3

account_id = boto3.client('sts').get_caller_identity().get('Account')
region = boto3.Session().region_name
ecr_repository = 'sagemaker-processing-async-custom-container'
tag = ':latest'
processing_repository_uri = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(account_id, region, ecr_repository + tag)

In [21]:
#!whoami

In [3]:
# # SageMaker Studio Notebook
# !cd ../docker && docker build --network sagemaker -t {ecr_repository + tag} .
!cd ../docker && docker build -t {ecr_repository + tag} .

In [75]:
!aws ecr get-login-password --region {region} | docker login --username AWS --password-stdin {account_id}.dkr.ecr.{region}.amazonaws.com

Login Succeeded


In [1]:
!aws ecr create-repository --repository-name $ecr_repository

In [77]:
!docker tag {ecr_repository + tag} $processing_repository_uri

In [2]:
!docker push $processing_repository_uri

In [79]:
# # SageMaker Studio Notebook
# !docker run --network sagemaker --rm $processing_repository_uri
!docker run --rm $processing_repository_uri

In [None]:
## Processor/ScriptProcessor
### Processor需要先包script進入docker
### ScriptProcess可以從local的script進入程式

In [80]:
import sagemaker
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

role = "arn:aws:iam::<accountId>:role/service-role/<roleName>"

In [4]:
input_s3_path = "s3://<s3BucketName>/input"
script_s3_path = "s3://<s3BucketName>/script"
output_s3_path = "s3://<s3BucketName>/output"

processor = ScriptProcessor(image_uri="<imageUri>",
                            command=['python3'],
                            base_job_name="<jobName>",
                            role=role,
                            instance_count=1,
                            instance_type="ml.m5.xlarge")
processor.run(
    code="../src/start-async.py",
    inputs=[
        ProcessingInput(
            source=input_s3_path,
            destination='/opt/ml/processing/input'),
        ProcessingInput(
            source=script_s3_path,
            destination='/opt/ml/processing/script')
    ],
    outputs=[
        ProcessingOutput(
            source='/opt/ml/processing/output',
            destination=output_s3_path)
    ],
)