# Model servers: MLServer custom example

## 1. Write the models.py file

This file will contain the logic of your model. It must contain a class with the following methods:
- load: Loads the model into memory
- predict: Makes a prediction with the model

The class can have any other methods or attributes you need.

In [None]:
%%writefile models.py

from mlserver import MLModel
from mlserver.codecs import decode_args
from mlserver.utils import get_model_uri
from mlserver.errors import InferenceError
from mlserver.types import InferenceRequest, InferenceResponse
from mlserver import types
from pydantic import BaseModel
from typing import List
import numpy as np
import joblib


class customModel(MLModel):
    async def load(self) -> bool:
        
        model_uri = await get_model_uri(self._settings) #"../../model.pkl" #
        
        with open(model_uri, 'rb') as f:
            self._model = joblib.load(f)
            
        self.ready = True
        return self.ready
    
    @decode_args
    async def predict(self, data: np.ndarray) -> np.ndarray:
        data = data.reshape(1, -1)
        data = data.astype(np.float32)
        
        predictions = self._model.predict(data)
        return np.asarray(predictions)

## 2. Write the settings and model-settings files

In [None]:
%%writefile settings.json
{
    "debug": "true"
}

In [None]:
%%writefile model-settings.json
{
    "name": "iris-rf-custom",
    "implementation": "models.customModel",
    "parameters": {
        "uri": "../model.pkl",
        "version": "v0.1.0"
    }
}

## 3. Start the model server

If running in colab:
    
    ```bash
    ! nohup mlserver start . &
    ```

If running locally:

    ```bash
    mlserver start .
    ```

In [None]:
# Check if the process is running
! ps | grep mlserver

## 4. Make a prediction

In [None]:
import requests
import numpy as np

from mlserver.types import InferenceRequest
from mlserver.codecs import NumpyCodec

x_0 = np.asarray([[5.9, 3. , 5.1, 1.8]])
inference_request = InferenceRequest(
    inputs=[
        NumpyCodec.encode_input(name="data", payload=x_0)
    ]
)


endpoint = "http://localhost:8080/v2/models/iris-rf-custom/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request.dict())

response.json()