In [1]:
# https://docs.ray.io/en/latest/serve/getting_started.html

In [10]:
pip install "ray[serve]" transformers requests torch

Note: you may need to restart the kernel to use updated packages.


# Local version before Ray Serve

In [1]:
# File name: model.py
from transformers import pipeline


class Translator:
    def __init__(self):
        # Load model
        self.model = pipeline("translation_en_to_fr", model="t5-small")

    def translate(self, text: str) -> str:
        # Run inference
        model_output = self.model(text)

        # Post-process output to return only the translation text
        translation = model_output[0]["translation_text"]

        return translation


translator = Translator()

translation = translator.translate("Hello world!")
print(translation)

  from .autonotebook import tqdm as notebook_tqdm
Downloading: 100%|██████████| 242M/242M [00:06<00:00, 40.3MB/s] 
Downloading: 100%|██████████| 792k/792k [00:00<00:00, 1.78MB/s]
Downloading: 100%|██████████| 1.39M/1.39M [00:00<00:00, 3.18MB/s]


Bonjour monde!


For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


# Serve the model with Ray Serve

In [76]:
@serve.deployment(num_replicas=2, ray_actor_options={"num_cpus": 0.2, "num_gpus": 0})
class Translator:
    def __init__(self):
        # Load model
        self.model = pipeline("translation_en_to_fr", model="t5-small")

    def translate(self, text: str) -> str:
        # Run inference
        model_output = self.model(text)

        # Post-process output to return only the translation text
        translation = model_output[0]["translation_text"]

        return translation

    async def __call__(self, http_request: Request) -> str:
        english_text: str = await http_request.json()
        return self.translate(english_text)

In [78]:
#Run following command from CLI
#serve run serve_deployment:translator
translator = Translator.bind()
serve.run(translator)

(ServeController pid=334880) INFO 2022-12-13 19:06:53,199 controller 334880 deployment_state.py:1310 - Adding 1 replica to deployment 'Translator'.
(ServeController pid=334880) INFO 2022-12-13 19:06:57,666 controller 334880 deployment_state.py:1214 - Stopping 1 replicas of deployment 'Translator' with outdated versions.
(ServeController pid=334880) INFO 2022-12-13 19:06:59,831 controller 334880 deployment_state.py:1310 - Adding 1 replica to deployment 'Translator'.
(ServeController pid=334880) INFO 2022-12-13 19:07:04,261 controller 334880 deployment_state.py:1336 - Removing 1 replica from deployment 'Summarizer'.


RayServeSyncHandle(deployment='Translator')

In [79]:
import requests

english_text = "Hello world!"

response = requests.post("http://127.0.0.1:8000/", json=english_text)
french_text = response.text

print(french_text)

Bonjour monde!


(HTTPProxyActor pid=334924) INFO 2022-12-13 19:07:07,418 http_proxy 10.0.63.8 http_proxy.py:361 - POST / 200 212.8ms
(ServeReplica:Translator pid=336416) INFO 2022-12-13 19:07:07,417 Translator Translator#PLOWuY replica.py:505 - HANDLE __call__ OK 208.0ms


In [81]:
serve.shutdown()

(ServeController pid=334880) INFO 2022-12-13 19:08:24,250 controller 334880 deployment_state.py:1336 - Removing 2 replicas from deployment 'Translator'.


# Composing Machine Learning Models with Deployment Graphs

In [82]:
from transformers import pipeline
class Summarizer:
    def __init__(self):
        # Load model
        self.model = pipeline("summarization", model="t5-small")

    def summarize(self, text: str) -> str:
        # Run inference
        model_output = self.model(text, min_length=5, max_length=15)

        # Post-process output to return only the summary text
        summary = model_output[0]["summary_text"]

        return summary


summarizer = Summarizer()

summary = summarizer.summarize(
    "It was the best of times, it was the worst of times, it was the age "
    "of wisdom, it was the age of foolishness, it was the epoch of belief"
)
print(summary)

it was the best of times, it was worst of times .


In [83]:
@serve.deployment
class Translator:
    def __init__(self):
        # Load model
        self.model = pipeline("translation_en_to_fr", model="t5-small")

    def translate(self, text: str) -> str:
        # Run inference
        model_output = self.model(text)

        # Post-process output to return only the translation text
        translation = model_output[0]["translation_text"]

        return translation


@serve.deployment
class Summarizer:
    def __init__(self, translator):
        # Load model
        self.model = pipeline("summarization", model="t5-small")
        self.translator = translator

    def summarize(self, text: str) -> str:
        # Run inference
        model_output = self.model(text, min_length=5, max_length=15)

        # Post-process output to return only the summary text
        summary = model_output[0]["summary_text"]

        return summary

    async def __call__(self, http_request: Request) -> str:
        english_text: str = await http_request.json()
        summary = self.summarize(english_text)
        print(summary,type(summary))
        translation_ref = await self.translator.translate.remote(summary)
        #translation = ray.get(translation_ref)
        translation = await translation_ref
        return translation

In [84]:
# Run following CLI in term
#!serve run graph:deployment_graph
summarizer = Summarizer.bind(Translator.bind())
serve.run(summarizer)

(ServeController pid=337163) INFO 2022-12-13 19:09:56,631 controller 337163 http_state.py:129 - Starting HTTP proxy with name 'SERVE_CONTROLLER_ACTOR:SERVE_PROXY_ACTOR-ab3314cb20f8ef656d173f3786b63fec5ade73b79f63643f522bb198' on node 'ab3314cb20f8ef656d173f3786b63fec5ade73b79f63643f522bb198' listening on '127.0.0.1:8000'
(HTTPProxyActor pid=337207) INFO:     Started server process [337207]
(ServeController pid=337163) INFO 2022-12-13 19:09:58,283 controller 337163 deployment_state.py:1310 - Adding 1 replica to deployment 'Translator'.
(ServeController pid=337163) INFO 2022-12-13 19:09:58,301 controller 337163 deployment_state.py:1310 - Adding 1 replica to deployment 'Summarizer'.


RayServeSyncHandle(deployment='Summarizer')

In [85]:
import requests

english_text = (
    "It was the best of times, it was the worst of times, it was the age "
    "of wisdom, it was the age of foolishness, it was the epoch of belief"
)
response = requests.post("http://127.0.0.1:8000/", json=english_text)
french_text = response.text

print(french_text)

(ServeReplica:Summarizer pid=337257) it was the best of times, it was worst of times . <class 'str'>
c'était le meilleur des temps, c'était le pire des temps .


In [86]:
serve.shutdown()

(HTTPProxyActor pid=337207) INFO 2022-12-13 19:10:04,854 http_proxy 10.0.63.8 http_proxy.py:361 - POST / 200 1567.5ms
(ServeReplica:Translator pid=337256) INFO 2022-12-13 19:10:04,851 Translator Translator#leTUjQ replica.py:505 - HANDLE translate OK 914.5ms
(ServeReplica:Summarizer pid=337257) INFO 2022-12-13 19:10:04,852 Summarizer Summarizer#ZIxqhd replica.py:505 - HANDLE __call__ OK 1562.5ms
(ServeController pid=337163) INFO 2022-12-13 19:10:04,883 controller 337163 deployment_state.py:1336 - Removing 1 replica from deployment 'Translator'.
(ServeController pid=337163) INFO 2022-12-13 19:10:04,886 controller 337163 deployment_state.py:1336 - Removing 1 replica from deployment 'Summarizer'.


# To test .deploy() method

In [73]:
sss = Summarizer.bind(Translator.bind())

In [89]:
type(Translator.bind())

ray.dag.class_node.ClassNode

In [75]:
english_text = (
    "It was the best of times, it was the worst of times, it was the age "
    "of wisdom, it was the age of foolishness, it was the epoch of belief"
)
response = requests.post("http://127.0.0.1:8000/", json=english_text)
french_text = response.text

print(french_text)

(ServeReplica:Summarizer pid=335866) it was the best of times, it was worst of times . <class 'str'>
c'était le meilleur des temps, c'était le pire des temps .


(HTTPProxyActor pid=334924) INFO 2022-12-13 19:04:42,720 http_proxy 10.0.63.8 http_proxy.py:361 - POST / 200 1499.7ms
(ServeReplica:Translator pid=335823) INFO 2022-12-13 19:04:42,717 Translator Translator#jChlNT replica.py:505 - HANDLE translate OK 897.4ms
(ServeReplica:Summarizer pid=335866) INFO 2022-12-13 19:04:42,719 Summarizer Summarizer#sJqdGy replica.py:505 - HANDLE __call__ OK 1494.5ms
