## Model Serving

### Init ray

In [1]:
import ray
from ray import serve

# Initialize the cluster
ray_head = "ray-head"
ray.init(address=f'ray://{ray_head}:10001')

0,1
Python version:,3.8.13
Ray version:,2.2.0
Dashboard:,http://172.21.0.5:8265


### Start Application

In [2]:
# Start the app
serve.start(detached=False, http_options={'host':"0.0.0.0", 'port':5010})
# serve.start(detached=True, http_options={'host':"0.0.0.0", 'port':5010})

[2m[36m(ServeController pid=1781)[0m INFO 2024-01-28 08:30:26,342 controller 1781 http_state.py:129 - Starting HTTP proxy with name 'SERVE_CONTROLLER_ACTOR:yZODkb:SERVE_PROXY_ACTOR-e20552446d9268018fcb0afaf6b674f46394178c0b97c6e0e6048a4a' on node 'e20552446d9268018fcb0afaf6b674f46394178c0b97c6e0e6048a4a' listening on '0.0.0.0:5010'


<ray.serve._private.client.ServeControllerClient at 0x7f7ae0de6130>

### Pipeline

In [3]:
import mlflow
import pandas as pd

from fastapi import FastAPI, Query
from pydantic import BaseModel#, Field
from fastapi.responses import HTMLResponse
from fastapi.responses import RedirectResponse


example_values = {
    "specimen_number": 1,
    "eccentricity": 0.86224,
    "aspect_ratio": 2.0735,
    "elongation": 0.52269,
    "solidity": 0.98686,
    "stochastic_convexity": 0.99474,
    "isoperimetric_factor": 0.70529,
    "maximal_indentation_depth": 0.010097,
    "lobedness": 0.018554,
    "average_intensity": 0.041404,
    "average_contrast": 0.12163,
    "smoothness": 0.014579,
    "third_moment": 0.0048689,
    "uniformity": 0.00027608,
    "entropy": 0.9458
}

class Input(BaseModel):
    specimen_number: float = Query(..., gt=0, example=example_values['specimen_number'])
    eccentricity: float = Query(..., gt=0, example=example_values['eccentricity'])
    aspect_ratio: float = Query(..., gt=0, example=example_values['aspect_ratio'])
    elongation: float = Query(..., gt=0, example=example_values['elongation'])
    solidity: float = Query(..., gt=0, example=example_values['solidity'])
    stochastic_convexity: float = Query(..., gt=0, example=example_values['stochastic_convexity'])
    isoperimetric_factor: float = Query(..., gt=0, example=example_values['isoperimetric_factor'])
    maximal_indentation_depth: float = Query(..., gt=0, example=example_values['maximal_indentation_depth'])
    lobedness: float = Query(..., gt=0, example=example_values['lobedness'])
    average_intensity: float = Query(..., gt=0, example=example_values['average_intensity'])
    average_contrast: float = Query(..., gt=0, example=example_values['average_contrast'])
    smoothness: float = Query(..., gt=0, example=example_values['smoothness'])
    third_moment: float = Query(..., gt=0, example=example_values['third_moment'])
    uniformity: float = Query(..., gt=0, example=example_values['uniformity'])
    entropy: float = Query(..., gt=0, example=example_values['entropy'])

    class Config:
        schema_extra = {
            "example": example_values
        }


app = FastAPI(title='Predictor API',
              description='Pipeline online inference')


@serve.deployment()
@serve.ingress(app)
# @serve.deployment()
class LeafDeployment:
    def __init__(self):
        
        # Inicializa los modelos. Los carga desde el MLflow.
        model_name = "extratree"
        model_stage = "1"
        self.predictor = mlflow.sklearn.load_model(
                                model_uri=f"models:/{model_name}/{model_stage}")

    # Aqui van los metodos que tenga tu pipeline
    def preprocessing(self, df):
        with mlflow.start_run(run_name='preprocessing') as mlrun:
            df_cleaned = df.loc[:, df.columns != 'specimen_number'].copy()
            df_cleaned[df_cleaned.columns] = df_cleaned[df_cleaned.columns].astype(float)
            df_cleaned.to_csv('preprocessed_data.csv', index=False)
            mlflow.log_artifact('preprocessed_data.csv')

            # logging
            mlflow.log_param(key='n_samples', value=len(df_cleaned))
            mlflow.log_param(key='n_features', value=len(df_cleaned.columns))

            return df_cleaned

    @app.get('/', include_in_schema=False)
    async def docs_redirect(self):
        return RedirectResponse(url='/docs')

    @app.post("/predict",
         tags=['Production model'],
         summary="Usar el modelo de produccion")
    def call(self, request: Input):

        data = request.__dict__
        data = pd.json_normalize(data)
        preprocessed = self.preprocessing(data)
        result = self.predictor.predict(preprocessed)

        return result


# LeafDeployment.deploy()
serve.run(LeafDeployment.bind())

[2m[36m(HTTPProxyActor pid=1845)[0m INFO:     Started server process [1845]
The new client HTTP config differs from the existing one in the following fields: ['host', 'port', 'location']. The new HTTP config is ignored.
[2m[36m(ServeController pid=1781)[0m INFO 2024-01-28 08:30:28,244 controller 1781 deployment_state.py:1310 - Adding 1 replica to deployment 'LeafDeployment'.


RayServeSyncHandle(deployment='LeafDeployment')

[2m[36m(HTTPProxyActor pid=1845)[0m INFO 2024-01-28 08:30:35,025 http_proxy 172.21.0.5 http_proxy.py:361 - GET / 200 7.1ms
[2m[36m(ServeReplica:LeafDeployment pid=1926)[0m INFO 2024-01-28 08:30:35,022 LeafDeployment LeafDeployment#COWEaJ replica.py:505 - HANDLE __call__ OK 0.5ms
[2m[36m(HTTPProxyActor pid=1845)[0m INFO 2024-01-28 08:30:35,616 http_proxy 172.21.0.5 http_proxy.py:361 - GET / 200 16.9ms
[2m[36m(ServeReplica:LeafDeployment pid=1926)[0m INFO 2024-01-28 08:30:35,612 LeafDeployment LeafDeployment#COWEaJ replica.py:505 - HANDLE __call__ OK 10.6ms
[2m[36m(HTTPProxyActor pid=1845)[0m INFO 2024-01-28 08:31:11,934 http_proxy 172.21.0.5 http_proxy.py:361 - POST / 200 519.3ms
[2m[36m(ServeReplica:LeafDeployment pid=1926)[0m INFO 2024-01-28 08:31:11,912 LeafDeployment LeafDeployment#COWEaJ replica.py:505 - HANDLE __call__ OK 495.0ms


Visit http://localhost:5010/docs

![image.png](attachment:014b3805-043b-48e4-a66b-a019470b4fb1.png)

# Cliente

In [8]:
import pandas as pd
import requests

filename = "X_inference.csv"
df = pd.read_csv(filename)
request = df.head(1).to_dict('records')
request

[{'specimen_number': 1,
  'eccentricity': 0.86224,
  'aspect_ratio': 2.0735,
  'elongation': 0.52269,
  'solidity': 0.98686,
  'stochastic_convexity': 0.99474,
  'isoperimetric_factor': 0.70529,
  'maximal_indentation_depth': 0.010097,
  'lobedness': 0.018554,
  'average_intensity': 0.041404,
  'average_contrast': 0.12163,
  'smoothness': 0.014579,
  'third_moment': 0.0048689,
  'uniformity': 0.00027608,
  'entropy': 0.9458}]

In [9]:
response = requests.post("http://ray-head:5010/predict", json=request[0])
result = response.json()[0]
result

32

[2m[36m(HTTPProxyActor pid=1537)[0m INFO 2023-12-13 21:12:00,051 http_proxy 172.18.0.4 http_proxy.py:361 - POST / 200 530.3ms
[2m[36m(ServeReplica:LeafDeployment pid=1619)[0m INFO 2023-12-13 21:12:00,049 LeafDeployment LeafDeployment#shwKgR replica.py:505 - HANDLE __call__ OK 526.1ms
