# **Triton request example**

Start the Triton Inference Server before running the inference code

In [None]:
# Example Docker command to run Triton Inference Server (do not execute this cell)
#
# docker run --rm -it \
#   -v ./model.onnx:/models/streaming_acoustic/1/model.onnx \
#   -v ./config.pbtxt:/models/streaming_acoustic/config.pbtxt \
#   -p 8000:8000 -p 8001:8001 -p 8002:8002 \
#   nvcr.io/nvidia/tritonserver:25.06-py3 \
#   tritonserver --model-repository=/models

Send inference request to Triton Inference Server

In [2]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tritonclient.grpc as grpcclient
from typing import Iterator
from tone.demo.read_audio import read_stream_example_audio
from tone.decoder import GreedyCTCDecoder


def send_to_triton(audio_chunks: Iterator[np.ndarray]) -> np.ndarray:
    triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
    model_name = "streaming_acoustic"

    signal_input = "signal"
    state_input = "state"
    logprobs_output = "logprobs"
    state_output = "state_next"

    state = np.zeros((1, 219729), dtype=np.float16)
    all_logprobs = []

    for chunk in audio_chunks:
        signal = chunk.astype(np.int32)
        signal = np.expand_dims(signal, axis=(0, 2))

        inputs = [
            grpcclient.InferInput(signal_input, signal.shape, "INT32"),
            grpcclient.InferInput(state_input, state.shape, "FP16")
        ]
        inputs[0].set_data_from_numpy(signal)
        inputs[1].set_data_from_numpy(state)

        outputs = [
            grpcclient.InferRequestedOutput(logprobs_output),
            grpcclient.InferRequestedOutput(state_output)
        ]

        result = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs)

        logprobs = result.as_numpy(logprobs_output)
        state = result.as_numpy(state_output)

        if logprobs is not None:
            all_logprobs.append(logprobs)

    if not all_logprobs:
        return np.empty((1, 0, 35), dtype=np.float32)

    return np.concatenate(all_logprobs, axis=1)


if __name__ == "__main__":
    audio_stream = read_stream_example_audio(long_audio=False)
    logprobs = send_to_triton(audio_stream)

    logprobs_squeezed = np.squeeze(logprobs, axis=0)

    decoder = GreedyCTCDecoder()
    text = decoder.forward(logprobs_squeezed)
    print(text)

ну сейчас к тебе приедет бригада давай давай я жду
