# SVD model preparation

Let's start by installing some dependencies

In [1]:
!pip install "sagemaker==2.226.1" "huggingface_hub==0.24.2" --upgrade --quiet

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it doesn't exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

We create a code folder which will contain our custom inference code, and the requirements.txt for additional dependencies

In [3]:
!mkdir code

Next, we create a requirements.txt file and add the bitsandbytes library to it. The bitsandbytes library is used to quantize the model to 4bit. This library is not available by default in the Hugging Face Inference DLC image.

In [None]:
%%writefile code/requirements.txt
diffusers==0.27.2
transformers==4.37.0
accelerate==0.27.0 

To use custom inference code, we need to create an inference.py script. In our example, we are going to overwrite:
- the model_fn to load our svd model correctly 
- the predict_fn to process incoming requests

In the model_fn, we use the StableVideoDiffusionPipeline class from transformers to load the model from the local directory (model_dir).
In the predict_fn, we use the generate function from transformers to generate the text for a given text/image input

In [None]:
%%writefile code/inference.py
import base64
import torch
from io import BytesIO
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image

def model_fn(model_dir):
    
    pipe = StableVideoDiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16, variant="fp16")
    
    pipe.enable_model_cpu_offload()
    #pipe.unet.enable_forward_chunking() # https://huggingface.co/docs/diffusers/using-diffusers/svd#reduce-memory-usage

    return pipe


def predict_fn(data, pipe):
    
    # get prompt & parameters
    prompt = data.pop("inputs", data)
    
    seed = data.pop("seed", 42)
    decode_chunk_size = data.pop("decode_chunk_size", 8)
    
    image = load_image(prompt)
    image = image.resize((1024, 576))

    generator = torch.manual_seed(seed)
    frames = pipe(image, decode_chunk_size=decode_chunk_size, generator=generator).frames[0]

    # create response
    encoded_frames = []
    for image in frames:
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        encoded_frames.append(base64.b64encode(buffered.getvalue()).decode())

    # create response
    return {"frames": encoded_frames}

We now use the huggingface_hub SDK to easily download the stable-video-diffusion-img2vid-xt-1-1 model files from Hugging Face to a model folder. Make sure to replace the value of HF_TOKEN. The model is gated on Hugging Face. To get access, you need to create a user access token. The procedure is detailed here: https://huggingface.co/docs/hub/security-tokens
You will need to accept to share you contact information: The model deployed in this sample requires you to agree to share your information before you can access it. Once logged in, visit the [model page](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt-1-1) and click on the button 'Agree and access repository'.

In [None]:
from distutils.dir_util import copy_tree
from pathlib import Path
from huggingface_hub import snapshot_download
import random

HF_MODEL_ID="stabilityai/stable-video-diffusion-img2vid-xt-1-1"
HF_TOKEN="REPLACE_WITH_YOUR_TOKEN"
assert len(HF_TOKEN) > 0, "Please set HF_TOKEN to your huggingface token. You can find it here: https://huggingface.co/settings/tokens"


# download snapshot
snapshot_dir = snapshot_download(repo_id=HF_MODEL_ID,use_auth_token=HF_TOKEN)

# create model dir
model_tar = Path(f"model-{random.getrandbits(16)}")
model_tar.mkdir(exist_ok=True)

# copy snapshot to model dir
copy_tree(snapshot_dir, str(model_tar))

We copy our custom files (inference.py and requirements.txt) to the model folder

In [None]:
from distutils.dir_util import copy_tree
# copy code/ to model dir
copy_tree("code/", str(model_tar.joinpath("code")))

We create an archive which includes all our files to run inference

In [None]:
import tarfile
import os

# helper to create the model.tar.gz
def compress(tar_dir=None,output_file="model.tar.gz"):
    parent_dir=os.getcwd()
    os.chdir(tar_dir)
    with tarfile.open(os.path.join(parent_dir, output_file), "w:gz") as tar:
        for item in os.listdir('.'):
          print(item)
          tar.add(item, arcname=item)
    os.chdir(parent_dir)

compress(str(model_tar))

Finally, we upload the archive to an Amazon Simple Storage Service bucket

In [None]:
from sagemaker.s3 import S3Uploader

# upload model.tar.gz to s3
s3_model_uri=S3Uploader.upload(local_path="model.tar.gz", desired_s3_uri=f"s3://{sess.default_bucket()}/svd-hf-1")

print(f"model uploaded to: {s3_model_uri}")
# Take note of the s3_model_uri value, this is what the construct will use to deploy the model
