In [None]:
# !pip3 install -q pymupdf

---
# Inference Deployment 

In [1]:
import boto3
from sagemaker.huggingface.model import HuggingFaceModel
import sagemaker
import json
from pathlib import Path
import os

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [2]:
!rm  sources.tar.gz 
!tar -czvf sources.tar.gz ./inference.py ./requirements.txt

./inference.py
./requirements.txt


In [3]:
import base64
import io 
from PIL import Image

In [4]:
# img= Image.open("./data/input/google-sec-1.png")
# base64_str = base64.b64encode(img.tobytes()).decode('utf-8')
# Image.open(io.BytesIO(base64.decodebytes(bytes(base64_str, "utf-8"))))

In [5]:
s3 = boto3.client(
    's3'
)

In [6]:
project = "table-extraction"
bucket_name = 'cc-ai-bucket'
model_file = 'microsoft--table-transformer-detection.tar.gz'  

script_file = 'inference.py'
source_file = 'sources.tar.gz'

# Upload model and script
s3.upload_file(model_file, bucket_name, f'models/{project}/models/model.tar.gz')
s3.upload_file(script_file, bucket_name, f'models/{project}/scripts/inference.py')
s3.upload_file(source_file, bucket_name, f'models/{project}/scripts/sources.tar.gz')

# Sagemaker Role 
Needs SagemakerFullAccess policy to deploy get access to EC2 instances and deploy models

In [7]:
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

print(f"sagemaker role arn: {role}")

sagemaker role arn: arn:aws:iam::767397973834:role/service-role/AmazonSageMaker-ExecutionRole-20240809T105831


In [8]:
sagemaker_session = sagemaker.Session()

# Specify Artifact Location
Location where model and source artifacts are uploaded 

In [10]:
bucket_name = f"cc-ai-bucket/models/{project}"
model_artifact = f's3://{bucket_name}/models/model.tar.gz'
script_location = f's3://{bucket_name}/scripts/'
source_dir = script_location + 'sources.tar.gz',

# Deploy Hugging Face Model

In [11]:
hub= {
  'HF_MODEL_ID': 'microsoft/table-transformer-detection',  
  'HF_TASK': 'object-detection', 
}

In [12]:
# Initialize the Hugging Face model
huggingface_model = HuggingFaceModel(
    model_data=model_artifact,
    role=role,
    source_dir=f"s3://cc-ai-bucket/models/{project}/scripts/sources.tar.gz",
    entry_point="inference.py",  # Path to your custom inference script
    transformers_version="4.37.0",
    pytorch_version="2.1.0",
    py_version="py310",
    sagemaker_session=sagemaker_session, 
)

In [14]:


# Deploy the model
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.xlarge" # Can be changed g4 or g5 gpus 
)

------------!

In [15]:
print(predictor.endpoint_name)

huggingface-pytorch-inference-2024-09-27-06-45-57-996


# Test Inference from Client 

In [18]:
endpoint_name = predictor.endpoint_name

In [19]:
print(endpoint_name)

huggingface-pytorch-inference-2024-09-27-06-45-57-996


In [20]:
sagemaker_client = boto3.client('sagemaker')  # Correct client

In [21]:
endpoint_description = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
endpoint_config_name = endpoint_description['EndpointConfigName']

In [26]:
endpoint_config_description = sagemaker_client.describe_endpoint_config(
    EndpointConfigName=endpoint_config_name
)
production_variants = endpoint_config_description['ProductionVariants']
for variant in production_variants:
    model_name = variant['ModelName']
    print(f"Model Name: {model_name}")
    

Model Name: huggingface-pytorch-inference-2024-09-27-06-45-57-313


In [29]:

# print(f"Endpoint Config Name: {endpoint_config_name}")
# endpoint_config_description = sagemaker_client.describe_endpoint_config(
#     EndpointConfigName=endpoint_config_name
# )
# production_variants = endpoint_config_description['ProductionVariants']
# for variant in production_variants:
#     model_name = variant['ModelName']
#     print(f"Model Name: {model_name}")
    
    
production_variant = {
    'VariantName': 'AllTraffic',
    'InstanceType': 'ml.g5.xlarge',  # Choose the instance type
    'ModelName': model_name,
    'InitialInstanceCount': 1,
    'InitialVariantWeight': 1,
    'ContainerStartupHealthCheckTimeoutInSeconds': 300,  # Increase the startup timeout to 300 seconds (5 minutes)
    'ModelDataDownloadTimeoutInSeconds': 600  # Increase model data download timeout to 600 seconds (10 minutes)
}

# # Create the endpoint configuration without specifying the 'AcceleratorType'
sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name + "-2",
    ProductionVariants=[production_variant]
)

{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-1:767397973834:endpoint-config/huggingface-pytorch-inference-2024-09-27-06-45-57-996-2',
 'ResponseMetadata': {'RequestId': 'fd908a6b-503e-4555-a552-591f15958d62',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'fd908a6b-503e-4555-a552-591f15958d62',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '136',
   'date': 'Fri, 27 Sep 2024 07:24:37 GMT'},
  'RetryAttempts': 0}}

In [30]:
sagemaker_client.update_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name + "-2",
)

{'EndpointArn': 'arn:aws:sagemaker:us-east-1:767397973834:endpoint/huggingface-pytorch-inference-2024-09-27-06-45-57-996',
 'ResponseMetadata': {'RequestId': '9d417c50-a6ca-4446-9efa-71a3a01994c4',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '9d417c50-a6ca-4446-9efa-71a3a01994c4',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '121',
   'date': 'Fri, 27 Sep 2024 07:24:45 GMT'},
  'RetryAttempts': 0}}

# Test Inference

In [None]:
project = "table-extraction"
bucket_name = 'cc-ai-bucket'

In [None]:
pdf_file_path = './data/input/pdfs/01-Polaris-Credit-Agreement-Executed48024639.2.pdf'

In [None]:
s3.upload_file(pdf_file_path, bucket_name, f'data/{project}/{Path(pdf_file_path).name}')

In [None]:
s3_file_path = f"{bucket_name}/data/{project}/"
s3_file_path

In [None]:
# s3_file = s3.download_file("cc-ai-bucket", 
#                            "data/table-extraction/01-Polaris-Credit-Agreement-Executed48024639.2.pdf", 
#                            "test.pdf")

In [34]:

# Prepare the payload
payload = {
    "inputs": {'data': f"{bucket_name}/data/{project}/01-Polaris-Credit-Agreement-Executed48024639.2.pdf"}
}


In [35]:
# Make a prediction
response = predictor.predict(payload, {'ContentType': 'application/json'})
print(response)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{
  "code": 400,
  "type": "InternalServerException",
  "message": "\u0027data\u0027"
}
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/huggingface-pytorch-inference-2024-09-27-06-45-57-996 in account 767397973834 for more information.

In [None]:
with open('./data/input/pdfs/01-Polaris-Credit-Agreement-Executed48024639.2.pdf', 'rb') as f:
    pdf_data = f.read()
    encoded_pdf = base64.b64encode(pdf_data).decode('utf-8')


In [None]:
pdf_data_base64 = payload['pdf_data']

    # Decode the base64-encoded string back to binary PDF data
pdf_data = base64.b64decode(pdf_data_base64)

In [None]:
import pandas as pd

In [None]:
df = pd.read_json("./data/cleaned_tags_data_na_tag_lt3.json", orient='records')

In [None]:
sample_df = df[df.tags != "NA"].sort_values(by="word_count")
sample_df.head()

In [None]:
test_data = {
    "inputs": sample_df.sort_values(by="word_count", ascending=False).iloc[0].section_content
}
print(test_data)

In [None]:
import time 

In [None]:
start_time = time.time()
for t in sample_df.sample(20).itertuples(): 
    test_data = {
        "inputs": t.section_content
    }
    result = predictor.predict(json.dumps(test_data))
    result = json.loads(result)
    print(result[0], t.tags, result[0] == t.tags)
print("Time taken ", time.time() - start_time)

In [None]:
7.87/20

# Test Inference from Client 

In [136]:
sagemaker_client = boto3.client('sagemaker')  # Correct client
endpoint_description = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
endpoint_config_name = endpoint_description['EndpointConfigName']
print(f"Endpoint Config Name: {endpoint_config_name}")
endpoint_config_description = sagemaker_client.describe_endpoint_config(
    EndpointConfigName=endpoint_config_name
)
production_variants = endpoint_config_description['ProductionVariants']
for variant in production_variants:
    model_name = variant['ModelName']
    print(f"Model Name: {model_name}")
    
    
production_variant = {
    'VariantName': 'AllTraffic',
    'ModelName': model_name,
    'InstanceType': 'ml.m5.xlarge',  # Choose the instance type
    'InitialInstanceCount': 1,
    'InitialVariantWeight': 1,
    'ContainerStartupHealthCheckTimeoutInSeconds': 300,  # Increase the startup timeout to 300 seconds (5 minutes)
    'ModelDataDownloadTimeoutInSeconds': 600  # Increase model data download timeout to 600 seconds (10 minutes)
}

# Create the endpoint configuration without specifying the 'AcceleratorType'
sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name + "-2",
    ProductionVariants=[production_variant]
)

# Deploy the endpoint
sagemaker_client.update_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name + "-2",
)

In [None]:
runtime_client = boto3.client('sagemaker-runtime')
endpoint_name = "huggingface-pytorch-inference-2024-09-27-05-14-15-397"


In [132]:
# production_variant = {
#     'VariantName': 'AllTraffic',
#     'ModelName': model_name,
#     'InstanceType': 'ml.m5.xlarge',  # Choose the instance type
#     'InitialInstanceCount': 1,
#     'InitialVariantWeight': 1,
#     'ContainerStartupHealthCheckTimeoutInSeconds': 300,  # Increase the startup timeout to 300 seconds (5 minutes)
#     'ModelDataDownloadTimeoutInSeconds': 600  # Increase model data download timeout to 600 seconds (10 minutes)
# }

# # Create the endpoint configuration without specifying the 'AcceleratorType'
# sagemaker_client.create_endpoint_config(
#     EndpointConfigName=endpoint_config_name + "-2",
#     ProductionVariants=[production_variant]
# )

# # Deploy the endpoint
# sagemaker_client.update_endpoint(
#     EndpointName=endpoint_name,
#     EndpointConfigName=endpoint_config_name + "-2",
# )

{'EndpointArn': 'arn:aws:sagemaker:us-east-1:767397973834:endpoint/huggingface-pytorch-inference-2024-09-27-05-14-15-397',
 'ResponseMetadata': {'RequestId': 'a86415ed-b96d-44d2-a7c6-6b1e6e988aa9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a86415ed-b96d-44d2-a7c6-6b1e6e988aa9',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '121',
   'date': 'Fri, 27 Sep 2024 05:57:59 GMT'},
  'RetryAttempts': 0}}

In [134]:

# Initialize the SageMaker runtime client
runtime_client = boto3.client('sagemaker-runtime')

# Define the endpoint name
endpoint_name = "huggingface-pytorch-inference-2024-09-27-05-14-15-397"

# Convert input data to JSON string
payload = json.dumps(test_data)
# Invoke the endpoint
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=payload
)
# Parse the response
result = json.loads(response['Body'].read().decode())
print(f"Prediction: {result}")

NameError: name 'test_data' is not defined

In [135]:
print("Deleting Model")
predictor.delete_model()
print("Deleting Endpoint")
predictor.delete_endpoint()


Deleting Model
Deleting Endpoint


ClientError: An error occurred (ValidationException) when calling the DeleteEndpoint operation: Cannot update in-progress endpoint "arn:aws:sagemaker:us-east-1:767397973834:endpoint/huggingface-pytorch-inference-2024-09-27-05-14-15-397".