Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix fp32 issues with DS fork wheel for stable diffusion, fix llm tests #390

Merged
merged 1 commit into from
Dec 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .github/workflows/llm_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ jobs:
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests
run: pip3 install requests pillow
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
Expand All @@ -178,6 +178,15 @@ jobs:
serve
python3 llm/client.py stable-diffusion stable-diffusion-v1-4
docker rm -f $(docker ps -aq)
- name: Test stable-diffusion-v1-5
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py stable-diffusion stable-diffusion-v1-5
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py stable-diffusion stable-diffusion-v1-5
docker rm -f $(docker ps -aq)
- name: Test bloom-7b
working-directory: tests/integration
run: |
Expand Down
28 changes: 20 additions & 8 deletions engines/python/setup/djl_python/stable-diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,7 @@ def initialize(self, properties: dict):
self.device = int(os.getenv("LOCAL_RANK", "0"))
self.tensor_parallel_degree = int(
properties.get("tensor_parallel_degree", 1))
self.ds_config = {
"replace_with_kernel_inject": True,
# TODO: Figure out why cuda graph doesn't work for stable diffusion via DS
"enable_cuda_graph": False,
"replace_method": "auto",
"dtype": self.data_type,
"mp_size": self.tensor_parallel_degree
}
self.ds_config = self._get_ds_config_for_dtype(self.data_type)

if not self.model_id:
config_file = os.path.join(self.model_dir, "model_index.json")
Expand Down Expand Up @@ -91,6 +84,25 @@ def initialize(self, properties: dict):
self.pipeline = pipeline
self.initialized = True

def _get_ds_config_for_dtype(self, dtype):
# This is a workaround due to 2 issues with DeepSpeed 0.7.5
# 1. No kernel injection is available for stable diffusion using fp32 (kernels only written for fp16)
# 2. Changes in our bf16 fork raise an error, but the original deepspeed codebase defaults to fp16
# when dtype is not set explicitly. We need to be explicit here with this config
ds_config = {
# TODO: Figure out why cuda graph doesn't work for stable diffusion via DS
"enable_cuda_graph": False,
"dtype": dtype,
"mp_size": self.tensor_parallel_degree
}
if dtype == torch.float16:
ds_config["replace_with_kernel_inject"] = True
ds_config["replace_method"] = "auto"
else:
ds_config["replace_with_kernel_inject"] = False
ds_config["replace_method"] = None
return ds_config

def inference(self, inputs: Input):
try:
content_type = inputs.get_property("Content-Type")
Expand Down
23 changes: 18 additions & 5 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import subprocess as sp
import logging
import math
from PIL import Image
from io import BytesIO

logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(description='Build the LLM configs')
Expand Down Expand Up @@ -79,6 +81,11 @@
"steps": [1, 2],
"worker": 2
},
"stable-diffusion-v1-5": {
"max_memory_per_gpu": 16.0,
"size": [256, 512],
"steps": [1, 2]
},
}


Expand All @@ -97,7 +104,7 @@ def check_worker_number(desired):
def send_json(data):
headers = {'content-type': 'application/json'}
res = requests.post(endpoint, headers=headers, json=data)
return res.json(), res.status_code
return res


def get_gpu_memory():
Expand Down Expand Up @@ -138,7 +145,8 @@ def test_handler(model, model_spec):
params = {"max_length": seq_length}
req["parameters"] = params
logging.info(f"req {req}")
res, _ = send_json(req)
res = send_json(req)
res = res.json()
logging.info(f"res {res}")
result = [item[0]['generated_text'] for item in res]
assert len(result) == batch_size
Expand All @@ -162,7 +170,8 @@ def test_ds_raw_model(model):
"use_pipeline": spec["use_pipeline"]
}
logging.info(f"req: {req}")
res, _ = send_json(req)
res = send_json(req)
res = res.json()
logging.info(f"res: {res}")
assert len(res["outputs"]) == batch_size
memory_usage = get_gpu_memory()
Expand All @@ -185,8 +194,12 @@ def test_sd_handler(model, model_spec):
params = {"height": size, "width": size, "steps": step}
req["parameters"] = params
logging.info(f"req: {req}")
res, status_code = send_json(req)
assert status_code == 200
res = send_json(req)
assert res.status_code == 200
try:
img = Image.open(BytesIO(res.content)).convert("RGB")
except Exception as e:
raise IOError("failed to deserialize image from response", e)
memory_usage = get_gpu_memory()
logging.info(memory_usage)
for memory in memory_usage:
Expand Down
5 changes: 5 additions & 0 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@
"option.tensor_parallel_degree": 2,
"option.dtype": "fp16"
},
"stable-diffusion-v1-5": {
"option.s3url": "s3://djl-llm/stable-diffusion-v1-5/",
"option.tensor_parallel_degree": 4,
"option.dtype": "fp32"
},
}


Expand Down