## Install prerequisite deps for model archiving (restart kernel after initial/greenfield pip installs)

In [2]:
!pip install -q -U torch-model-archiver huggingface_hub diffusers accelerate transformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## Pull the SD model from Huggingface & save locally

In [2]:
import torch
from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline

pipeline = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16",
    torch_dtype=torch.float16
)
pipeline.save_pretrained("./downloaded_model")

refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0",
    variant="fp16",
    torch_dtype=torch.float16
)
refiner.save_pretrained("./refiner_model")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

model_index.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

text_encoder_2/config.json:   0%|          | 0.00/575 [00:00<?, ?B/s]

tokenizer_2/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

tokenizer_2/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.71k [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/4.52G [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

## Zip the downloaded model (files must be in root dir of zip file)

In [3]:
%cd downloaded_model
!zip -FSr ../model.zip *
%cd ../

%cd refiner_model
!zip -FSr ../refiner.zip *
%cd ../

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/opt/app-root/src/SDXL/downloaded_model
updating: model_index.json (deflated 59%)
updating: scheduler/scheduler_config.json (deflated 51%)
updating: text_encoder/config.json (deflated 43%)
updating: text_encoder/model.safetensors (deflated 8%)
updating: text_encoder_2/config.json (deflated 43%)
updating: text_encoder_2/model.safetensors (deflated 8%)
updating: tokenizer/special_tokens_map.json (deflated 73%)
updating: tokenizer/merges.txt (deflated 60%)
updating: tokenizer/vocab.json (deflated 71%)
updating: tokenizer/tokenizer_config.json (deflated 63%)
updating: tokenizer_2/special_tokens_map.json (deflated 72%)
updating: tokenizer_2/merges.txt (deflated 60%)
updating: tokenizer_2/vocab.json (deflated 71%)
updating: tokenizer_2/tokenizer_config.json (deflated 68%)
updating: unet/config.json (deflated 62%)
updating: unet/diffusion_pytorch_model.safetensors (deflated 8%)
updating: vae/config.json (deflated 54%)
updating: vae/diffusion_pytorch_model.safetensors (deflated 7%)
/opt/app-ro

##  Archive the saved model, incl. custom handler that'll utilize the zip from model storage to seed HF diffuser pipelines

In [5]:
!torch-model-archiver --model-name stable-diffusion \
    --version 1.0 \
    --handler stable_diffusion_handler.py \
    --extra-files model.zip,refiner.zip \
    --requirements-file ./requirements.txt \
    --force #replace if already exists



## Bucket subdirectory structure:

```
bucket
├── stable-diffusion/              # name here is not too important
│   ├── config/
│   │   ├── config.properties      # copied from this workbook
│   ├── model-store/
│   |   ├── stable-diffusion.mar   # filename is important! copy file from this workbook after completing previous/last step
```

## config.properties
```
inference_address=http://0.0.0.0:8085
management_address=http://0.0.0.0:8085
metrics_address=http://0.0.0.0:8082
grpc_inference_port=7070
grpc_management_port=7071
enable_envvars_config=true
install_py_dep_per_model=true
enable_metrics_api=true
metrics_mode=prometheus
NUM_WORKERS=1
number_of_netty_threads=4
job_queue_size=10
max_response_size=30000000
model_store=/mnt/models/model-store
model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"stable-diffusion":{"1.0":{"defaultVersion":true,"marName":"stable-diffusion.mar","minWorkers":1,"maxWorkers":5,"batchSize":1,"maxBatchDelay":5000,"responseTimeout":900}}}}
```