Import common libraries

In [None]:
import mlflow

Models with column based inputs work as is (checkout example below - sklearn). Look at the following examples for models with tensor based inputs work when logged as pyfunc (checkout examples below - xgboost, pytorch, tensorflow and simple python functions). 

Use the test data (example, X_test for xgboost) or the first element of the test data for sending a request to these models. Once you register them and create a model API in Domino, if you are using the tester on the UI, make sure they are properly formatted and the request is of the form : {"data": your test data}

Logging models using the following templates works with our model APIs. 

XGBOOST (array input)

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    data["data"], data["target"], test_size=0.2
)

xgb_classifier = XGBClassifier(
    n_estimators=10,
    max_depth=3,
    learning_rate=1,
    objective="binary:logistic",
    random_state=123,
)

# train model
xgb_classifier.fit(X_train, y_train)

class SomeModel(mlflow.pyfunc.PythonModel):
    def __init__(self,model):
        self.model = model
    # Define a function that returns prediction
    def nn_func(x):
        return self.model.predict(x)

model = SomeModel(xgb_classifier)

# Create the Pyfunc and log it to MLflow
with mlflow.start_run() as run:
    model_info = mlflow.pyfunc.log_model(
        registered_model_name="pyfunc-xgboost-model", 
        python_model=model,
        artifact_path="test-model"
    )
print(model_info)

In [None]:
signature

SKLEARN (column based inputs)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split


# loading the California housing dataset
cali_housing = fetch_california_housing(as_frame=True)

# split the dataset into train and test partitions
X_train, X_test, y_train, y_test = train_test_split(
    cali_housing.data, cali_housing.target, test_size=0.2, random_state=123
)

# train the model
lin_reg = LinearRegression().fit(X_train, y_train)

# Infer model signature
predictions = lin_reg.predict(X_train)
signature = infer_signature(X_train, predictions)


with mlflow.start_run() as run:
    mlflow.sklearn.log_model(lin_reg, registered_model_name="sklearn-model", artifact_path="sklearn-model", signature=signature)
mlflow.end_run()

In [None]:
signature

TENSORFLOW (tensor based inputs)

In [None]:
import tensorflow as tf

mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
    ]
)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])

# train model 
model.fit(x_train, y_train, epochs=5)

class SomeModel(mlflow.pyfunc.PythonModel):
    def __init__(self,model):
        self.model = model
    # Define a function that returns prediction
    def nn_func(x):
        return self.model.predict(x)

model_tf = SomeModel(model)

# Create the Pyfunc and log it to MLflow
with mlflow.start_run() as run:
    model_info = mlflow.pyfunc.log_model(
        registered_model_name="pyfunc-tensorflow-model", 
        python_model=model_tf,
        artifact_path="test-model",
        pip_requirements=["tensorflow"]
    )
print(model_info)

In [None]:
signature

PYTORCH (tensor based inputs)

In [None]:
import mlflow.pyfunc
import numpy as np
import torch
from torch import nn

net = nn.Linear(6, 1)
loss_function = nn.L1Loss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)

X = torch.randn(6)
y = torch.randn(1)

# train model 
epochs = 5
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = net(X)

    loss = loss_function(outputs, y)
    loss.backward()

    optimizer.step()

class SomeModel(mlflow.pyfunc.PythonModel):
    def __init__(self,model):
        self.model = model
    # Define a function that returns prediction
    def nn_func(x):
        return self.model(x).detach().numpy()

model = SomeModel(net)

# Create the Pyfunc and log it to MLflow
with mlflow.start_run() as run:
    model_info = mlflow.pyfunc.log_model(
        registered_model_name="pyfunc-torch-model-4", # important,
        python_model=model,
        artifact_path="test-model-4",
        pip_requirements=["torch"]
    )
print(model_info)

In [None]:
signature

CUSTOM PYTHON FUNCTION (tensor based inputs)

Simple python function with artifact

In [None]:
import mlflow.pyfunc
import numpy as np
import pandas as pd

class SomeModel(mlflow.pyfunc.PythonModel):
    # Define a function that takes in a NumPy array and returns its sum
    def sum_func(arr):
        return np.sum(arr)

# Define a function that generates an artifact in memory
def generate_artifact():
    data = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    return data.to_csv("data.csv", index=False)

generate_artifact()
model = SomeModel()

# Create the Pyfunc and log it to MLflow
with mlflow.start_run() as run:
    model_info = mlflow.pyfunc.log_model(
        registered_model_name="test", 
        python_model=model,
        artifact_path="test-model",
        artifacts={"model_file": "data.csv"}
    )
print(model_info)

Simple python function without artifact 

In [None]:
class SomeModel(mlflow.pyfunc.PythonModel):
    # Define a function that takes in a NumPy array and returns its sum
    def sum_func(arr):
        return np.sum(arr)

# # Define a function that generates an artifact in memory
# def generate_artifact():
#     data = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
#     return data.to_csv("data.csv", index=False)

# generate_artifact()
model = SomeModel()

# Create the Pyfunc and log it to MLflow
with mlflow.start_run() as run:
    model_info = mlflow.pyfunc.log_model(
        registered_model_name="test-wo-artifact",
        python_model=model,
        artifact_path="test-model-1"
        #artifacts={"model_file": "data.csv"}
    )
print(model_info)