## 1.使用Sagemaker部署VisualGLM模型

In [1]:
!pip install huggingface-hub -Uq
!pip install -Uq sagemaker 

In [2]:
%pip install sagemaker pip --upgrade  --quiet

Note: you may need to restart the kernel to use updated packages.


In [3]:
from huggingface_hub import snapshot_download
from pathlib import Path

local_model_path = Path("./LLM_visualglm_stream_model")
local_model_path.mkdir(exist_ok=True)
model_name = "THUDM/visualglm-6b"
commit_hash = "f4f759acde0926fefcd35e2c626e08adb452eff8"

In [4]:
snapshot_download(repo_id=model_name, cache_dir=local_model_path)

Fetching 18 files:   0%|          | 0/18 [00:00<?, ?it/s]

'LLM_visualglm_stream_model/models--THUDM--visualglm-6b/snapshots/f4f759acde0926fefcd35e2c626e08adb452eff8'

### 1.1 把模型拷贝到S3为后续部署做准备

In [5]:
import sagemaker
from sagemaker import image_uris
import jinja2
import boto3
import os
import time
import json

role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
bucket = sess.default_bucket()  # bucket to house artifacts

region = sess._region_name
account_id = sess.account_id()

s3_client = boto3.client("s3")
sm_client = boto3.client("sagemaker")
smr_client = boto3.client("sagemaker-runtime")

jinja_env = jinja2.Environment()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [6]:
s3_model_prefix = "LLM_visualglm_model"  # folder where model checkpoint will go
model_snapshot_path = list(local_model_path.glob("**/snapshots/*"))[0]
s3_code_prefix = "LLM_visualglm_deploy_code"

print(f"s3_code_prefix: {s3_code_prefix}")
print(f"model_snapshot_path: {model_snapshot_path}")



s3_code_prefix: LLM_visualglm_deploy_code
model_snapshot_path: LLM_visualglm_stream_model/models--THUDM--visualglm-6b/snapshots/f4f759acde0926fefcd35e2c626e08adb452eff8


In [7]:
!aws s3 cp --recursive {model_snapshot_path} s3://{bucket}/{s3_model_prefix}

upload: LLM_visualglm_stream_model/models--THUDM--visualglm-6b/snapshots/f4f759acde0926fefcd35e2c626e08adb452eff8/MODEL_LICENSE to s3://sagemaker-us-west-2-691188012938/LLM_visualglm_model/MODEL_LICENSE
upload: LLM_visualglm_stream_model/models--THUDM--visualglm-6b/snapshots/f4f759acde0926fefcd35e2c626e08adb452eff8/config.json to s3://sagemaker-us-west-2-691188012938/LLM_visualglm_model/config.json
upload: LLM_visualglm_stream_model/models--THUDM--visualglm-6b/snapshots/f4f759acde0926fefcd35e2c626e08adb452eff8/README.md to s3://sagemaker-us-west-2-691188012938/LLM_visualglm_model/README.md
upload: LLM_visualglm_stream_model/models--THUDM--visualglm-6b/snapshots/f4f759acde0926fefcd35e2c626e08adb452eff8/configuration_chatglm.py to s3://sagemaker-us-west-2-691188012938/LLM_visualglm_model/configuration_chatglm.py
upload: LLM_visualglm_stream_model/models--THUDM--visualglm-6b/snapshots/f4f759acde0926fefcd35e2c626e08adb452eff8/LICENSE to s3://sagemaker-us-west-2-691188012938/LLM_visualglm_m

In [63]:
!rm -rf {local_model_path}

### 1.2 模型部署准备（entrypoint脚本，容器镜像，服务配置)

In [8]:
inference_image_uri = image_uris.retrieve(
    framework="djl-deepspeed",
    region=sess.boto_session.region_name,
    version="0.22.1"
)
print(f"Image going to be used is ---- > {inference_image_uri}")

Image going to be used is ---- > 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.22.1-deepspeed0.9.2-cu118


In [9]:
!mkdir -p LLM_visualglm_deploy_code

In [10]:
%%writefile LLM_visualglm_deploy_code/model.py
from djl_python import Input, Output
import requests
import torch
import logging
import math
import os
from PIL import Image
import base64
from io import BytesIO
import json


from transformers import AutoModel, AutoTokenizer
model = None
tokenizer = None

DEVICE = "cuda"
DEVICE_ID = "0"
CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE
def torch_gc():
    if torch.cuda.is_available():
        with torch.cuda.device(CUDA_DEVICE):
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()
            
            
def parse_text(text):
    """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
    lines = text.split("\n")
    lines = [line for line in lines if line != ""]
    count = 0
    for i, line in enumerate(lines):
        if "```" in line:
            count += 1
            items = line.split('`')
            if count % 2 == 1:
                lines[i] = f'<pre><code class="language-{items[-1]}">'
            else:
                lines[i] = f'<br></code></pre>'
        else:
            if i > 0:
                if count % 2 == 1:
                    line = line.replace("`", "\`")
                    line = line.replace("<", "&lt;")
                    line = line.replace(">", "&gt;")
                    line = line.replace(" ", "&nbsp;")
                    line = line.replace("*", "&ast;")
                    line = line.replace("_", "&lowbar;")
                    line = line.replace("-", "&#45;")
                    line = line.replace(".", "&#46;")
                    line = line.replace("!", "&#33;")
                    line = line.replace("(", "&#40;")
                    line = line.replace(")", "&#41;")
                    line = line.replace("$", "&#36;")
                lines[i] = "<br>"+line
    text = "".join(lines)
    return text

def load_model(properties):
    global tokenizer,model
    tensor_parallel = properties["tensor_parallel_degree"]
    model_location = properties['model_dir']
    if "model_id" in properties:
        model_location = properties['model_id']
    logging.info(f"Loading model in {model_location}")
    
    tokenizer = AutoTokenizer.from_pretrained(model_location, trust_remote_code=True)
   
    model = AutoModel.from_pretrained(model_location, trust_remote_code=True).quantize(8).half().cuda()
    
    model.eval()
    
    return model, tokenizer
            


def handle(inputs: Input):
    global model, tokenizer
    if not model:
        model, tokenizer = load_model(inputs.get_properties())
    
    if inputs.is_empty():
        return None
    data = inputs.get_as_json()
    
    input_sentences = data["inputs"]
    image_path=data["image"]
    if image_path is None:
        return  Output().add_as_json({ "outputs":"图片不能为空。请重新上传图片并重试。"})
    ##测试链接是否能下载
    if image_path.startswith("http"):
        try:
            requests.get(image_path, timeout=10)
        except:
            return  Output().add_as_json({ "outputs":f'cannot download file from image_path:{image_path}'})

    
    #try:
    #    requests.get(image_path, timeout=10)
    #except:
    #    return  Output().add_as_json({ "outputs":f'cannot download file from image_path:{image_path}'})

    params = data["parameters"]
    history = data["history"]
    print(f'input prompt:{input_sentences}')
    
    response, history = model.chat(tokenizer,image_path, input_sentences, history=history)
    result = {"outputs": parse_text(response)}
    return Output().add_as_json(result)



Overwriting LLM_visualglm_deploy_code/model.py


In [22]:
s3url=f's3://{bucket}/{s3_model_prefix}/'
print(s3url)

s3://sagemaker-us-west-2-691188012938/LLM_visualglm_model/


In [23]:
%%writefile LLM_visualglm_deploy_code/serving.properties
engine=Python
option.tensor_parallel_degree=1
option.s3url= {{s3url}}

Writing LLM_visualglm_deploy_code/serving.properties


In [24]:
%%writefile LLM_visualglm_deploy_code/requirements.txt
transformers==4.30.2
SwissArmyTransformer==0.3.6
accelerate==0.20.3
cpm_kernels==1.0.11
torchvision==0.15.1

Overwriting LLM_visualglm_deploy_code/requirements.txt


In [25]:
# we plug in the appropriate model location into our `serving.properties` file based on the region in which this notebook is running
template = jinja_env.from_string(Path("LLM_visualglm_deploy_code/serving.properties").open().read())
Path("LLM_visualglm_deploy_code/serving.properties").open("w").write(
    template.render(s3url=s3url)
)
!pygmentize LLM_visualglm_deploy_code/serving.properties | cat -n

     1	[36mengine[39;49;00m=[33mPython[39;49;00m[37m[39;49;00m
     2	[36moption.tensor_parallel_degree[39;49;00m=[33m1[39;49;00m[37m[39;49;00m
     3	[36moption.s3url[39;49;00m=[37m [39;49;00m[33ms3://sagemaker-us-west-2-691188012938/LLM_visualglm_model/[39;49;00m[37m[39;49;00m


In [26]:
!rm model.tar.gz
!cd LLM_visualglm_deploy_code && rm -rf ".ipynb_checkpoints"
!tar czvf model.tar.gz LLM_visualglm_deploy_code

LLM_visualglm_deploy_code/
LLM_visualglm_deploy_code/serving.properties
LLM_visualglm_deploy_code/requirements.txt
LLM_visualglm_deploy_code/model.py


In [27]:
s3_code_artifact = sess.upload_data("model.tar.gz", bucket, s3_code_prefix)
print(f"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}")

S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-west-2-691188012938/LLM_visualglm_deploy_code/model.tar.gz


### 1.3 创建模型 & 创建endpoint

In [28]:
from sagemaker.utils import name_from_base
import boto3

model_name = name_from_base(f"visualglm") # Append a timestamp to the provided string
print(model_name)
print(f"Image going to be used is ---- > {inference_image_uri}")

create_model_response = sm_client.create_model(
    ModelName=model_name,
    ExecutionRoleArn=role,
    PrimaryContainer={
        "Image": inference_image_uri,
        "ModelDataUrl": s3_code_artifact
    },
    
)
model_arn = create_model_response["ModelArn"]

print(f"Created Model: {model_arn}")

visualglm-2023-11-10-01-54-20-117
Image going to be used is ---- > 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.22.1-deepspeed0.9.2-cu118
Created Model: arn:aws:sagemaker:us-west-2:691188012938:model/visualglm-2023-11-10-01-54-20-117


In [29]:
endpoint_config_name = f"{model_name}-config"
endpoint_name = f"{model_name}-endpoint"

#Note: ml.g4dn.2xlarge 也可以选择
endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "variant1",
            "ModelName": model_name,
            "InstanceType": "ml.g5.2xlarge",
            "InitialInstanceCount": 1,
            "ContainerStartupHealthCheckTimeoutInSeconds": 10*60,
        },
    ],
)
endpoint_config_response

{'EndpointConfigArn': 'arn:aws:sagemaker:us-west-2:691188012938:endpoint-config/visualglm-2023-11-10-01-54-20-117-config',
 'ResponseMetadata': {'RequestId': 'f5917659-eb55-4f87-9d97-14826eaf8cb9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f5917659-eb55-4f87-9d97-14826eaf8cb9',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '121',
   'date': 'Fri, 10 Nov 2023 01:54:20 GMT'},
  'RetryAttempts': 0}}

In [30]:
create_endpoint_response = sm_client.create_endpoint(
    EndpointName=f"{endpoint_name}", EndpointConfigName=endpoint_config_name
)
print(f"Created Endpoint: {create_endpoint_response['EndpointArn']}")

Created Endpoint: arn:aws:sagemaker:us-west-2:691188012938:endpoint/visualglm-2023-11-10-01-54-20-117-endpoint


In [31]:
import time

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print("Status: " + status)

while status == "Creating":
    time.sleep(60)
    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
    print("Status: " + status)

print("Arn: " + resp["EndpointArn"])
print("Status: " + status)

Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: InService
Arn: arn:aws:sagemaker:us-west-2:691188012938:endpoint/visualglm-2023-11-10-01-54-20-117-endpoint
Status: InService


## 2.生成关键帧的内容描述

### 2.1 对视频抽帧并上传至S3

In [32]:
!pip install opencv-python



In [33]:
import cv2
import os
import time
import datetime
import sagemaker
from sagemaker import image_uris
import boto3
import json


In [64]:
s3_frame_prefix = 'videokeyframe'
video_name='little_kitten_playing_his_toy_mouse'
video_path='../video/5/'+video_name+'.mp4'
frame_path='../video_frame/5/'

In [65]:

def extract_and_upload_img(video_path,frame_path):
    frame_list=[]
    vc = cv2.VideoCapture(video_path)  # 读入视频文件
    c = 1
    if vc.isOpened():  # 判断是否正常打开
        rval, frame = vc.read()
    else:
        rval = False
    timeF = 35  # 视频帧计数间隔频率
    while rval:  # 循环读取视频帧
        rval, frame = vc.read()
        if (c % timeF == 0):  # 每隔timeF帧进行存储操作
            if frame is not None:
                #timestr = time.strftime("%Y%m%d-%H%M%S")
                filename = "Frame1"+str(c)+'.jpg'
                cv2.imwrite(frame_path + filename, frame)
                frame_list.append(filename)
        c = c + 1
        cv2.waitKey(10)
    vc.release()
    return frame_list

In [66]:
frame_list= extract_and_upload_img(video_path,frame_path)

In [67]:
print(frame_list)

['Frame135.jpg', 'Frame170.jpg', 'Frame1105.jpg', 'Frame1140.jpg', 'Frame1175.jpg', 'Frame1210.jpg', 'Frame1245.jpg', 'Frame1280.jpg', 'Frame1315.jpg']


In [68]:
!aws s3 cp --recursive {frame_path} s3://{bucket}/{s3_frame_prefix}

upload: ../video_frame/5/Frame1105.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame1105.jpg
upload: ../video_frame/5/Frame1245.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame1245.jpg
upload: ../video_frame/5/Frame1175.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame1175.jpg
upload: ../video_frame/5/Frame170.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame170.jpg
upload: ../video_frame/5/Frame1315.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame1315.jpg
upload: ../video_frame/5/Frame1280.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame1280.jpg
upload: ../video_frame/5/Frame1210.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame1210.jpg
upload: ../video_frame/5/Frame1140.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame1140.jpg
upload: ../video_frame/5/Frame135.jpg to s3://sagemaker-us-west-2-691188012938/videokeyframe/Frame135.jpg


### 2.2 利用VLM 产生内容描述

In [69]:
import boto3
def generate_s3_image_url(bucket_name, key, expiration=3600):
    s3_client = boto3.client('s3')
    url = s3_client.generate_presigned_url(
        'get_object',
         Params={'Bucket': bucket_name, 'Key': key},
         ExpiresIn=expiration
    )
    return url

In [70]:
history = []
prompt = "请描述这张图片的内容"
parameters = {
  "max_length": 2000,
  "temperature": 0.1,
  "top_p":1
}

In [71]:
content_result_list=[]
for item in frame_list:
    imgurl=str(bucket)+'/'+str(s3_frame_prefix)+'/'+item
    bucket,imgobj = imgurl.split('/',1)
    image_path = generate_s3_image_url(bucket,imgobj)
    #print(image_path)
    response_model = smr_client.invoke_endpoint(
            EndpointName=endpoint_name,
            Body=json.dumps(
            {
                "inputs": prompt,
                "image":image_path,
                "parameters": parameters,
                "history" : []
            }
            ),
            ContentType="application/json",
        )

    result=response_model['Body'].read().decode('utf8')
    content_result_list.append(result)




In [72]:
print(content_result_list)

['{\n  "outputs":"这张照片展示了一只橙色的小猫，它站在一张沙发上。这只猫正在玩一个玩具球或毛绒玩具。此外，它还有一只猫薄荷的气味。这使图像有趣而生动。小猫和它的玩具的存在表明这个场景是一个有趣的家庭环境的一部分。"\n}', '{\n  "outputs":"这张照片显示一只橙色的小猫坐在一张沙发上，手里拿着一个玩具。这只猫正在玩耍或探索沙发周围的空间。它似乎非常好奇和兴奋，可能是在玩球或其他游戏。这个场景展示了小猫的好奇心、活力和对周围物体的兴趣。"\n}', '{\n  "outputs":"这张照片描绘了一只橙色的小猫，它站在一张沙发上，手里拿着一个玩具。这只猫似乎正在玩耍或探索这个玩具。此外，在沙发旁边有一个枕头和一本杂志，这表明这个地方可能是一个舒适的休息场所。照片展示了猫咪玩耍、探索和与周围环境互动的场景。"\n}', '{\n  "outputs":"这张照片展示了一只橙色的小猫，它正在沙发上玩耍。这只猫似乎很活跃和好奇，在玩玩具或探索家具时表现出了活力。此外，它还表现出一种快乐的情绪，因为它被一个有趣的东西吸引了注意力。这种活泼和好奇心的场景为照片增添了一些趣味感。"\n}', '{\n  "outputs":"这张照片显示一只橙色的小猫，可能是长毛猫或虎斑猫，躺在一张沙发上。这只猫似乎正在玩耍或者移动，它的尾巴在沙发和枕头之间摆动着。背景中有一只枕头，表明有其他人的存在，可能包括主人或其他家庭成员。"\n}', '{\n  "outputs":"这张照片显示一只橙色的小猫，很可能是一只小猫咪或小橘猫，在一张沙发上玩耍。它似乎正在用爪子抓住沙发垫子上的一块布。这只猫可能正在探索家具表面、沙发垫子或其他物体，可能是寻找玩具或者与主人互动。"\n}', '{\n  "outputs":"这张照片展示了一只橙色的小猫，它站在一张沙发上，用爪子抓沙发。这只猫似乎正在玩耍或探索家具和物品。这场景非常有趣，因为小猫在玩球时表现出好奇、兴奋和活跃的精神。背景中的枕头为这个场景增添了温馨的氛围。"\n}', '{\n  "outputs":"这张照片展示了一只橙色的小猫，它站在一张沙发上。这只猫正在用爪子抓住沙发或家具上的一块布来玩耍。这个场景表明这只猫可能非常活跃和好奇，可能是在探索周围的环境或寻找玩具。此外，这只猫似乎很放松，因为它没有表现出任

## 3.使用Bedrock Claude进行不同任务

In [73]:
!pip install langchain
!pip install anthropic



In [74]:
from langchain.llms.bedrock import Bedrock
from langchain import LLMChain, PromptTemplate

In [75]:
import json
import os
import sys

import boto3
import botocore

from utils import bedrock

boto3_bedrock = bedrock.get_bedrock_client()

#boto3_bedrock.list_foundation_models()

Create new client
  Using region: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-west-2.amazonaws.com)


In [76]:
from langchain.llms.bedrock import Bedrock

inference_modifier = {'max_tokens_to_sample':4096, 
                      "temperature":0.5,
                      "top_k":250,
                      "top_p":1,
                      "stop_sequences": ["\n\nHuman"]
                     }

textgen_llm = Bedrock(model_id = "anthropic.claude-v2",
                    client = boto3_bedrock, 
                    model_kwargs = inference_modifier 
                    )

### 3.1 视频摘要（Video Summary）

In [77]:
with open('./utils/video_summary.txt', 'r', encoding="utf-8") as task:
    lines = task.readlines()
    multi_var_prompt = PromptTemplate(
    input_variables=["input"], 
    template="""

        Human: {}

        Assistant: ok """.format(lines)
        )
    prompt = multi_var_prompt.format(input=str(content_result_list))
    num_tokens = textgen_llm.get_num_tokens(prompt)
    print(f"Our prompt has {num_tokens} tokens")
    
    response = textgen_llm(prompt)
    content = response[response.index('\n')+1:]

Our prompt has 1371 tokens


In [78]:
print(content)


此视频显示一只橙色的小猫,它在一张沙发上玩耍。这只猫看起来很好奇和活跃,它在探索周围的环境和玩具。有时它站着,有时它躺着,但总是在动来动去。它用爪子抓住沙发垫或其他物体。有一个枕头和一本杂志放在沙发旁边。这只小猫的玩耍和探索为这个家庭场景增添了生机。整个视频描绘了一只活跃好动的小猫的日常生活片段。


### 3.2 社交圈文案生成

In [79]:
with open('./utils/social_media_post.txt', 'r', encoding="utf-8") as task:
    lines = task.readlines()
    multi_var_prompt = PromptTemplate(
    input_variables=["input"], 
    template="""

        Human: {}

        Assistant: ok """.format(lines)
        )
    prompt = multi_var_prompt.format(input=str(content_result_list))
    num_tokens = textgen_llm.get_num_tokens(prompt)
    print(f"Our prompt has {num_tokens} tokens")
    
    response = textgen_llm(prompt)
    content = response[response.index('\n')+1:]

Our prompt has 1577 tokens


In [80]:
print(content)


视频里有一只橙色的小猫咪,它看起来非常活泼可爱。😍它先是站在沙发上,一边玩着一个小球一边闻着猫薄荷。然后它坐在沙发上,两只小短腿耸拉着,一只爪子拿着一个玩具在玩。接着它站起来,用爪子抓着沙发玩耍。最后它躺在沙发上,抱着一个猫玩具滚来滚去。

这只小猫咪看起来非常开心和充满活力!🤩它好奇地探索着周围的环境,玩着各种玩具,样子非常可爱。我看到这个视频时,心里暖暖的,忍不住想逗它玩。😆

希望这个视频也能让你们开心一下!记得给我留言告诉我你们的感受哦!💬


### 3.3 基于图像的问答

In [81]:
with open('./utils/VQA.txt', 'r', encoding="utf-8") as task:
    lines = task.readlines()
    multi_var_prompt = PromptTemplate(
    input_variables=["input","question"], 
    template="""

        Human: {}

        Assistant:  """.format(lines)
        )
    prompt = multi_var_prompt.format(input=str(content_result_list),question="我的杂志放在了哪里?")
    num_tokens = textgen_llm.get_num_tokens(prompt)
    print(f"Our prompt has {num_tokens} tokens")
    
    response = textgen_llm(prompt)
    content = response[1:]

Our prompt has 1350 tokens


In [82]:
print(response)

 沙发旁边有一本杂志。
