Install deps

In [1]:
!pip install ray["default"]=="2.9.3"



Create our staging area for results

In [135]:
!mkdir -p /home/data/code /home/data/results
!touch /home/data/results/.placeholder

touch: setting times of '/home/data/results/.placeholder': Operation not permitted


This is our batch inference script that will be submitted to Ray Cluster

In [136]:
%%writefile /home/data/code/inference.py
import ray
import pandas as pd

s3_uri = "s3://anonymous@air-example-data-2/imagenette2/val/"


ds = ray.data.read_images(s3_uri)

from typing import Dict
import numpy as np

from transformers import pipeline
from PIL import Image

BATCH_SIZE = 16

class ImageClassifier:
    def __init__(self):
        # If doing CPU inference, set `device="cpu"` instead.
        self.classifier = pipeline("image-classification", model="google/vit-base-patch16-224", device=0)

    def __call__(self, batch: Dict[str, np.ndarray]):
        # Convert the numpy array of images into a list of PIL images which is the format the HF pipeline expects.
        outputs = self.classifier(
            [Image.fromarray(image_array) for image_array in batch["image"]], 
            top_k=1, 
            batch_size=BATCH_SIZE)
        
        # `outputs` is a list of length-one lists. For example:
        # [[{'score': '...', 'label': '...'}], ..., [{'score': '...', 'label': '...'}]]
        batch["score"] = [output[0]["score"] for output in outputs]
        batch["label"] = [output[0]["label"] for output in outputs]
        return batch

predictions = ds.map_batches(
    ImageClassifier,
    compute=ray.data.ActorPoolStrategy(size=4), # Change this number based on the number of GPUs in your cluster.
    num_gpus=1, # Specify 1 GPU per model replica.
    batch_size=BATCH_SIZE # Use the largest batch size that can fit on our GPUs
)

# Take some sample predictions
sample_predictions = predictions.take(5)

# Convert sample predictions to DataFrame
df = pd.DataFrame(sample_predictions)

# Write predictions DataFrame to csv file
df.to_csv('/data/results/sample_predictions.csv', index=False)

Writing /home/data/code/inference.py


Submit the Ray Job

In [137]:
train_deps = [
    "transformers",
    "torch==2.2.0"
]

from ray.job_submission import JobSubmissionClient
import os
client = JobSubmissionClient("http://"+os.environ["RAY_CLUSTER"]+":8265")
job_id = client.submit_job(
    entrypoint="python /data/code/inference.py",
    # Path to the local directory that contains the script.py file
    runtime_env={"pip": train_deps}
)
print(job_id)

raysubmit_jpsA3UygQNgspP3Q


Monitor Job status

In [138]:
from ray.job_submission import JobStatus
import time

def wait_until_status(job_id, status_to_wait_for, timeout_seconds=300):
    start = time.time()
    while time.time() - start <= timeout_seconds:
        status = client.get_job_status(job_id)
        print(f"status: {status}")
        if status in status_to_wait_for:
            break
        time.sleep(30)
wait_until_status(job_id, {JobStatus.SUCCEEDED, JobStatus.STOPPED, JobStatus.FAILED})
logs = client.get_job_logs(job_id)
print(logs)

status: RUNNING
status: RUNNING
status: RUNNING
status: RUNNING
status: RUNNING
status: RUNNING
status: RUNNING
status: RUNNING
status: RUNNING
status: RUNNING
2024-03-15 00:39:41,451	INFO worker.py:1405 -- Using address 192.168.186.125:6379 set in the environment variable RAY_ADDRESS
2024-03-15 00:39:41,451	INFO worker.py:1540 -- Connecting to existing Ray cluster at address: 192.168.186.125:6379...
2024-03-15 00:39:41,455	INFO worker.py:1715 -- Connected to Ray cluster. View the dashboard at [1m[32mhttp://192.168.186.125:8265 [39m[22m
2024-03-15 00:39:44,773	INFO dataset.py:2488 -- Tip: Use `take_batch()` instead of `take() / show()` to return records in pandas or numpy batch format.
2024-03-15 00:39:44,793	INFO set_read_parallelism.py:115 -- Using autodetected parallelism=200 for stage ReadImage to satisfy DataContext.get_current().min_parallelism=200.
2024-03-15 00:39:44,793	INFO streaming_executor.py:112 -- Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[ReadImage]

Install deps for testing the inference

In [139]:
!pip install pandas



Read the CSV with sample predictions created by via batch inference

In [140]:
import pandas as pd


# Read in predictions csv
df = pd.read_csv("/home/data/results/sample_predictions.csv")

Let's see what it says

In [146]:
print("Label: ", df["label"].iloc[3])
print("Score: ", df["score"].iloc[0])

Label:  tench, Tinca tinca
Score:  0.9996908903121948
