In [None]:
!pip install -r requirements.txt

In [None]:
import os
import sagemaker
import requests as req
from pathlib import Path 
from sagemaker.s3 import S3Uploader
from sagemaker.session import Session
from huggingface_hub import snapshot_download

In [None]:
# global constants
!pygmentize globals.py

In [None]:
%run -i globals.py

In [None]:
model_dir = Path(HF_MODEL_ID.split("/")[-1])
model_dir

In [None]:
model_tar_gz_path = os.path.join(os.getcwd(), f"model_{model_dir}.tar.gz")
model_tar_gz_path

In [None]:
%%time
model_dir.mkdir(exist_ok=True)
# Download model from Hugging Face into model_dir
snapshot_download(HF_MODEL_ID, local_dir=str(model_dir), local_dir_use_symlinks=False)

In [None]:
%%time
# Create SageMaker model.tar.gz artifact
!cd {model_dir.name};tar -cf {model_tar_gz_path} --use-compress-program=pigz *;cd -

In [None]:
%%time
# upload model.tar.gz to s3
# default_bucket = Session().default_bucket()
s3_model_uri = S3Uploader.upload(local_path=model_tar_gz_path, desired_s3_uri=f"s3://{BUCKET_NAME}/{os.path.basename(HF_MODEL_ID)}")

print(f"model uploaded to: {s3_model_uri}")

In [None]:
%%time
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

hub = {
  'HF_TASK':'question-answering'
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=S3_MODEL_URI,      # path to your model and script
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.28.1",  # transformers version used
   pytorch_version="2.0.0",       # pytorch version used
   py_version='py310',            # python version used
   model_server_workers=1,
   env=hub
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.xlarge",
    # container_startup_health_check_timeout=600, # increase timeout for large models
    # model_data_download_timeout=600, # increase timeout for large models
)
print(predictor)

In [None]:
with open("./endpointname.txt", 'w') as endpoint:
    endpoint.write(predictor.endpoint_name)

In [None]:
import requests as req
for i in range(MAX_SLIDES):
    image = SLIDE_IMAGE_URL_TEMPLATE.format(i+1)
    print(f"going to analyze image \"{image}\"")
    status_code = req.head(image).status_code
    if status_code != 200:
        print(status_code)
        break
    data = {
        "image" : image,
        "question" : "Describe this image in detail including all facts and figures in your description. Do not make up any information, only include information present in the image",
        #"question": "What is the net sales for Q3?",
        #The image could contain charts, text, footnotes, captions, data sources etc. include everything in your description.",
        # "max_new_tokens" : 1024,
        "temperature" : 0.1,
        # "stop_str" : "###"
    }

    # request
    output = predictor.predict(data)
    print(f"Image=\"{image}\"\nDescription: {output}\n\n")
    