In [None]:
!pip install -U "ray[air]"

# Training

In [None]:
import boto3
import pickle
import ray
from ray.air.config import RunConfig, ScalingConfig
from ray.train.xgboost import XGBoostTrainer

In [None]:
ray.shutdown()
ray.init(address="ray://ray-cluster-kuberay-head-svc.ray-cluster.svc.cluster.local:10001")

In [None]:
ray.cluster_resources()

In [None]:
bucket = "fm-ops-datasets"
prefix = "model"

def prepare_dataset():
    dataset = ray.data.read_csv(f"s3://{bucket}/training/demo.csv")

    train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3)
    test_dataset = valid_dataset.drop_columns(cols=["Target"])

    return train_dataset, valid_dataset, test_dataset

In [None]:
train_dataset, valid_dataset, test_dataset = prepare_dataset()

In [None]:
trainer = XGBoostTrainer(
    scaling_config=ScalingConfig(
        num_workers=2,
        use_gpu=False,
        _max_cpu_fraction_per_node=0.9,
    ),
    run_config=RunConfig(
        name="training_demo", storage_path=f"s3://{bucket}/{prefix}"
    ),
    label_column="Target",
    num_boost_round=20,
    params={
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
    },
    datasets={"train": train_dataset, "valid": valid_dataset},
)

In [None]:
model = trainer.fit()

In [None]:
model.metrics

In [None]:
checkpoint = model.checkpoint

In [None]:
pickle_obj = pickle.dumps(checkpoint)
s3_resource = boto3.resource("s3")
s3_resource.Object(bucket, f"{prefix}/checkpoint.pkl").put(Body=pickle_obj)

In [None]:
pickle_obj = pickle.dumps(model)
s3_resource = boto3.resource("s3")
s3_resource.Object(bucket, f"{prefix}/model.pkl").put(Body=pickle_obj)

# Serving

In [None]:
serve.start(detached=True)

In [None]:
s3 = boto3.client("s3", region_name="us-east-1")
bucket = "fm-ops-datasets"
model_prefix = "model"

## From checkpoint

In [None]:
import boto3
import pickle
from ray.train.xgboost import XGBoostPredictor
from ray import serve
from ray.serve import PredictorDeployment
from ray.serve.http_adapters import pandas_read_json

In [None]:
response = s3.get_object(Bucket=bucket, Key=f"{model_prefix}/checkpoint.pkl")
pickle_data = response["Body"].read()
model_checkpoint = pickle.loads(pickle_data)

In [None]:
model_checkpoint

In [None]:
serve.run(
    PredictorDeployment.options(name="XGBoostService").bind(
        XGBoostPredictor, model_checkpoint, http_adapter=pandas_read_json
    )
)

## From model

In [None]:
import pickle
import ray
from ray import serve
import boto3

In [None]:
@serve.deployment(num_replicas=2, route_prefix="/predict")
class XGB:
    def __init__(self, model):
        self.model = model

    async def __call__(self, starlette_request):
        payload = await starlette_request.json()
        print("Worker: received starlette request with data", payload)

        input_vector = [
            payload["Pregnancies"],
            payload["Glucose"],
            payload["Blood Pressure"],
            payload["Skin Thickness"],
            payload["Insulin"],
            payload["BMI"],
            payload["DiabetesPedigree"],
            payload["Age"],
        ]
        prediction = self.model.predict([input_vector])[0]
        return {"result": prediction}

In [None]:
serve.run(XGB.bind(model))

# Predict

In [None]:
serve.list_deployments()

In [None]:
serve.get_deployment("default_XGB").url

In [None]:
import requests
import json

sample_request_input = {
    "Pregnancies": 6,
    "Glucose": 148,
    "BloodPressure": 72,
    "SkinThickness": 35,
    "Insulin": 0,
    "BMI": 33.6,
    "DiabetesPedigree": 0.625,
    "Age": 50,
}

response = requests.get(
    "http://127.0.0.1:8000/predict", json=sample_request_input
)

print(response)

# Cleanup

In [None]:
serve.delete("default")

In [None]:
serve.shutdown()

In [None]:
ray.shutdown()