Skip to content

Commit

Permalink
fix(ray): fix model file extension (#453)
Browse files Browse the repository at this point in the history
  • Loading branch information
heiruwu committed Nov 14, 2023
1 parent 850aa90 commit 424d632
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 27 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ RUN pip install jsonschema pyyaml
RUN pip install --upgrade pip setuptools wheel
RUN pip install --no-cache-dir opencv-contrib-python-headless transformers pillow torch torchvision onnxruntime dvc[gs]==2.34.2
RUN pip install --no-cache-dir ray[serve] scikit-image
RUN pip install --no-cache-dir instill-sdk==0.3.1
RUN pip install --no-cache-dir instill-sdk==0.3.2rc1

# Need permission of /tmp folder for internal process such as store temporary files.
RUN chown -R nobody:nogroup /tmp
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ RUN pip install jsonschema pyyaml
RUN pip install --upgrade pip setuptools wheel
RUN pip install --no-cache-dir opencv-contrib-python-headless transformers pillow torch torchvision onnxruntime dvc[gs]==2.34.2
RUN pip install --no-cache-dir ray[serve] scikit-image
RUN pip install --no-cache-dir instill-sdk==0.3.1
RUN pip install --no-cache-dir instill-sdk==0.3.2rc1

# -- set up Go
COPY go.mod go.sum ./
Expand Down
37 changes: 12 additions & 25 deletions pkg/ray/ray.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,6 @@ import (

type InferInput interface{}

type TextToImageInput struct {
Prompt string
Steps int64
CfgScale float32
Seed int64
Samples int64
}

type TextGenerationInput struct {
Prompt string
OutputLen int64
BadWordsList string
StopWordsList string
TopK int64
Seed int64
}

type Ray interface {
// grpc
ModelReadyRequest(ctx context.Context, modelName string, modelInstance string) *rayserver.ModelReadyResponse
Expand Down Expand Up @@ -248,7 +231,7 @@ func (r *ray) ModelInferRequest(ctx context.Context, task commonPB.Task, inferIn

switch task {
case commonPB.Task_TASK_TEXT_TO_IMAGE:
textToImageInput := inferInput.(*TextToImageInput)
textToImageInput := inferInput.(*triton.TextToImageInput)
samples := make([]byte, 4)
binary.LittleEndian.PutUint32(samples, uint32(textToImageInput.Samples))
steps := make([]byte, 4)
Expand All @@ -265,19 +248,23 @@ func (r *ray) ModelInferRequest(ctx context.Context, task commonPB.Task, inferIn
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, guidanceScale)
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, seed)
case commonPB.Task_TASK_TEXT_GENERATION:
textGenerationInput := inferInput.(*TextGenerationInput)
outputLen := make([]byte, 4)
binary.LittleEndian.PutUint32(outputLen, uint32(textGenerationInput.OutputLen))
textGenerationInput := inferInput.(*triton.TextGenerationInput)
maxNewToken := make([]byte, 4)
binary.LittleEndian.PutUint32(maxNewToken, uint32(textGenerationInput.MaxNewTokens))
temperature := make([]byte, 4)
binary.LittleEndian.PutUint32(temperature, math.Float32bits(textGenerationInput.Temperature))
topK := make([]byte, 4)
binary.LittleEndian.PutUint32(topK, uint32(textGenerationInput.TopK))
seed := make([]byte, 8)
binary.LittleEndian.PutUint64(seed, uint64(textGenerationInput.Seed))
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, triton.SerializeBytesTensor([][]byte{[]byte(textGenerationInput.Prompt)}))
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, outputLen)
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, triton.SerializeBytesTensor([][]byte{[]byte(textGenerationInput.BadWordsList)}))
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, triton.SerializeBytesTensor([][]byte{[]byte(textGenerationInput.PromptImage)}))
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, maxNewToken)
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, triton.SerializeBytesTensor([][]byte{[]byte(textGenerationInput.StopWordsList)}))
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, temperature)
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, topK)
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, seed)
modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, triton.SerializeBytesTensor([][]byte{[]byte(textGenerationInput.ExtraParams)}))
case commonPB.Task_TASK_CLASSIFICATION,
commonPB.Task_TASK_DETECTION,
commonPB.Task_TASK_KEYPOINT,
Expand Down Expand Up @@ -388,7 +375,7 @@ func (r *ray) DeployModel(modelPath string) error {
modelPath = filepath.Join(config.Config.RayServer.ModelStore, modelPath)
cmd := exec.Command("python", "model.py",
"--func", "deploy",
"--model", filepath.Join(modelPath, "model.onnx"),
"--model", modelPath,
)
cmd.Dir = modelPath

Expand All @@ -405,7 +392,7 @@ func (r *ray) UndeployModel(modelPath string) error {
modelPath = filepath.Join(config.Config.RayServer.ModelStore, modelPath)
cmd := exec.Command("python", "model.py",
"--func", "undeploy",
"--model", filepath.Join(modelPath, "model.onnx"),
"--model", modelPath,
)
cmd.Dir = modelPath

Expand Down

0 comments on commit 424d632

Please sign in to comment.