In [5]:
import os
import sys

import mlflow
from mlflow.client import MlflowClient
from mlflow.models import infer_signature, ModelSignature
from mlflow.types import Schema, ColSpec

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pandas as pd



### Model Training Phase

In [6]:
# Loading data
data = datasets.load_breast_cancer()
# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data.data, 
                                                    data.target,
                                                    stratify=data.target)
# Instantiating and fitting the model
model = LogisticRegression(max_iter=1000)            
model.fit(X=X_train, y=y_train)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
# Converting train features into a DataFrame
X_train_df = pd.DataFrame(data=X_train, columns=data.feature_names)

# Inferring the input signature
signature = infer_signature(model_input=X_train_df, 
                           model_output=model.predict(X_test))

In [8]:
# Creating an input schema for the breast cancer dataset
input_schema = Schema(inputs=[ColSpec(type="double", name=feature_name) 
                              for feature_name in data.feature_names])

# Creating an output schema for the breast cancer dataset
output_schema = Schema(inputs=[ColSpec("double")])

# Creating a signature from our schemas
#signature = ModelSignature(inputs=input_schema, outputs=output_schema)

### Save the model locally to /mnt/mymodel

This is just to show how you can save and run the model locally

In [9]:
import os
import shutil
folder_path = "/tmp/mymodel"
if os.path.exists(folder_path):
        if os.path.isdir(folder_path):
            shutil.rmtree(folder_path)
            
# Saving the model. Note the path. This will save the model under /mnt/model
input_example = X_train_df.iloc[:1]
mlflow.sklearn.save_model(sk_model=model, 
                          path=folder_path, 
                          signature=signature,
                          input_example=input_example)
##Verify that is looks good
os.listdir(folder_path)

['model.pkl',
 'conda.yaml',
 'python_env.yaml',
 'input_example.json',
 'requirements.txt',
 'MLmodel']

### Review the output

Especially take a look at the `requirements.txt` and the yaml files and the `requirements.txt`

### Run the locally saved model

This is a way all models should be run if you want them to be portable. This is the industry
standard MLFLOW based mechanism to load and run models.

In [10]:
#Run the locally saved model
import os
import pandas as pd
def predict(model_uri,features):
    loaded_model = mlflow.pyfunc.load_model(model_uri)
    return loaded_model.predict(features)
cwd = os.getcwd()
d = pd.read_json(f'{cwd}/features.json', orient='records', lines=True)    
predict(folder_path,d)



array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,

### Now we register this model with Domino Experiment Manager

1. Create an experiment with a meaningful name
2. Create a registered model name
3. Finally register model

In [11]:
client = MlflowClient()
mlflow.set_experiment('foundry-export-example')
model_name = "foundry_model" 
try:
    client.create_registered_model(model_name)
except:
    print('Model already exists')

Model already exists


## The Most Important Part - Model Registration

Pay close attention to not just the models that are being registered. But also the additional files we are choosing to add to the model registry. We can add anything our final image in our 
final execution environment will need

In [19]:
# Saving the model as an artifact in a run
from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository
mlflow.set_experiment('foundry-export-example')


run_id=''
##Specify Dependencies implicitly
with mlflow.start_run() as run:
    # Obtaining the ID of this run
    run_id = run.info.run_id
    # Logging our model
    model_folder = 'mymodel'
    model_client_folder = 'client'
    model_entry_point = 'python'
    model_command_line = 'client/execute_model.py'
    model_info = mlflow.sklearn.log_model(sk_model=model, 
                             artifact_path=model_folder,  
                             signature=signature,
                             input_example=input_example)
    ##Note these artifacts being logged.
    mlflow.log_artifact(f'{cwd}/client/features.json',model_client_folder)
    mlflow.log_artifact(f'{cwd}/client/example_predict.py',model_client_folder)
    mlflow.log_artifact(f'{cwd}/client/execute_model.py',model_client_folder)
    mlflow.log_artifact(f'{cwd}/Dockerfile.template')
    mlflow.log_artifact(f'{cwd}/create_docker_image.sh.template')

'''
Tags are a way of passing metadata to the model version client. In our case it will be the 
external program that will download these model versions and publish images to foundry

'''
my_tags={}
#The download client will ignore models which do not have this flag set
#This is how a Model Engineer tells the external program that this model is intended to be
#registered to foundry
my_tags['TARGET_PLTR_FOUNDRY']='True'
#Foundry URL goes here. Add additional tags as needed. DO NOT ADD TOKENS OR CREDENTIALS
#The client program is supposed to get them from some SECRET STORE
my_tags['FOUNDRY_URL']='quay.io/domino'
my_tags['MODEL_FOLDER']='mymodel'
my_tags['MODEL_CLIENT_FOLDER']='client'
my_tags['MODEL_ENTRY_POINT']='python'
my_tags['MODEL_EXECUTE_PATH']='client/execute_model.py'

model_src = RunsArtifactRepository.get_underlying_uri(f"runs:/{run_id}/")
mv = client.create_model_version(model_name, model_src, run_id,tags=my_tags)
print("Name: {}".format(mv.name))
print("Version: {}".format(mv.version))
print("Description: {}".format(mv.description))
print("Status: {}".format(mv.status))
print("Stage: {}".format(mv.current_stage))

2024/08/01 19:33:33 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: foundry_model, version 34


Name: foundry_model
Version: 34
Description: 
Status: READY
Stage: None


## Review the registered models
1. Review the model artifacts for each of the two models above
2. Specifically note the "requirements.txt"
3. Ideally unless you know exactly what you are doing you should let mlflow decide on the dependencies. It pretty much takes all of the packages based on the model specification (pickle file) and adds them. This is usually right


In [42]:
### This is the implicit version
print(model_src)
print(run_id)

mlflow-artifacts:/mlflow/7b60f02b5a1c47b1a5608c09a7330447/artifacts
7b60f02b5a1c47b1a5608c09a7330447


## Now imagine you are outside Domino and want to consume this model for prediction

1. First you need to know the model name and version
2. Use it to fetch the model version
3. From the model version get the mlflow run_id
4. Download the model locally

## How do I get all models which have been tagged with foundry

### This is the part where we download the model, install the dependencies and invoke the model

In [99]:
import os
import sys
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import mlflow
from mlflow.client import MlflowClient
# Create a Dockerfile 
import string

def create_file_from_template(template_string, context, output_file_path):
    # Create a Template object

    template = string.Template(template_string)
    
    # Substitute placeholders with actual values
    content = template.substitute(context)
    print(content)
    # Write the content to a file
    with open(output_file_path, 'w') as file:
        file.write(content)
    
    print(f"File created at {output_file_path}")

def predict(model_uri,features):
    loaded_model = mlflow.pyfunc.load_model(model_uri)
    return loaded_model.predict(features)
    
def download_and_test(model_versions_to_build,base_path='/tmp/local_models'):
    client = MlflowClient()
    for mv in model_versions_to_build:
        #mv = client.get_model_version(model_name, model_version)
        run_id = mv.run_id
        model_name = mv.name
        model_version = mv.version
        #An example path that exists on every machine. Modify as needed
        model_download_path=f'{base_path}/{model_name}/v{model_version}'
        os.makedirs(model_download_path,exist_ok=True)
    
        #Download artifacts and verify if they exist
        client.download_artifacts(run_id,f"",model_download_path)
        os.listdir(model_download_path)
        #Resolve files
        context = {
            'FOUNDRY_URL': mv.tags['FOUNDRY_URL'],
            'model_name': model_name,
            'model_version': model_version,
            'model_download_folder':  model_download_path,
            'model_folder': mv.tags['MODEL_FOLDER'],
            'model_client_folder':  mv.tags['MODEL_CLIENT_FOLDER'],
            'entry_point': mv.tags['MODEL_ENTRY_POINT'],
            'command_line': mv.tags['MODEL_EXECUTE_PATH']
        }

        with open(f"{model_download_path}/create_docker.sh.template", 'r') as file:
           content = file.read()
           output_file = f"{model_download_path}/Dockerfile"
           create_file_from_template(content, context, output_file)
        with open(f"{model_download_path}/create_docker.sh.template", 'r') as file:
           content = file.read()
           output_file = f"{model_download_path}/create_image.sh"
           create_file_from_template(content, context, output_file)
        client_folder = mv.tags['MODEL_CLIENT_FOLDER']
        #Load the model and predict
        d = pd.read_json(f'{model_download_path}/{client_folder}/features.json', orient='records', lines=True)
        model_uri_saved = f'{model_download_path}/mymodel'
        p = predict(model_uri_saved,d)
        print(f'Model name {mv.name}, and version {mv.version} made prediction {p} for data {d}')


client = MlflowClient()
lst = mlflow.search_registered_models()
model_versions_to_build=[]
for m in lst:
    name = m.name    
    versions = m.latest_versions
    total_versions = 0
    if versions and len(versions)>0:
        latest_version = m.latest_versions[0]
        total_versions = latest_version.version
        for i in range(1,int(total_versions)+1):
            v = client.get_model_version(name,i)
            if 'PUBLISH_TO_FOUNDRY' in v.tags and v.tags['PUBLISH_TO_FOUNDRY'] :                
                model_versions_to_build.append(v)
            #This if of type ModelVersion - See MLFLOW API



print(download_and_test(model_versions_to_build))



with open("/mnt/export-example/create_docker.sh.template", 'r') as file:
    content = file.read()
    
context = {
    'FOUNDRY_URL': 'quay.io',
    'model_name': 'test',
    'model_version': '1',
}

# Specify the output file path
output_file_path = 'create_image.sh'


Downloading artifacts:   0%|          | 0/15 [00:00<?, ?it/s]

2024/08/01 17:23:18 INFO mlflow.store.artifact.artifact_repo: The progress bar can be disabled by setting the environment variable MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR to false


docker build --platform=linux/amd64 -f ./Dockerfile -t quay.io/foundry_model:15 .
docker push quay.io/foundry_model:15 
docker run quay.io/foundry_model:15


File created at /tmp/local_models/foundry_model/v15/Dockerfile
docker build --platform=linux/amd64 -f ./Dockerfile -t quay.io/foundry_model:15 .
docker push quay.io/foundry_model:15 
docker run quay.io/foundry_model:15


File created at /tmp/local_models/foundry_model/v15/create_image.sh
Model name foundry_model, and version 15 made prediction [0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 1 0 0 0 0 1 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 1
 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 0 0 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 1 0 1 1 0 1 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 1 1 0 0 1 1
 1 0 1 0 1 1 1 0 0 1 1 0 1 1 0 0 0 0 1 1 1 1 0



Downloading artifacts:   0%|          | 0/15 [00:00<?, ?it/s]

2024/08/01 17:23:19 INFO mlflow.store.artifact.artifact_repo: The progress bar can be disabled by setting the environment variable MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR to false


docker build --platform=linux/amd64 -f ./Dockerfile -t quay.io/foundry_model:17 .
docker push quay.io/foundry_model:17 
docker run quay.io/foundry_model:17


File created at /tmp/local_models/foundry_model/v17/Dockerfile
docker build --platform=linux/amd64 -f ./Dockerfile -t quay.io/foundry_model:17 .
docker push quay.io/foundry_model:17 
docker run quay.io/foundry_model:17


File created at /tmp/local_models/foundry_model/v17/create_image.sh
Model name foundry_model, and version 17 made prediction [0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 1 0 0 0 0 1 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 1
 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 0 0 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 1 0 1 1 0 1 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 1 1 0 0 1 1
 1 0 1 0 1 1 1 0 0 1 1 0 1 1 0 0 0 0 1 1 1 1 0



### Or run from the command line

```
export MODEL_NAME=foundry_model
export MODEL_VERSION=7
/tmp/local_models/${MODEL_NAME}/v${MODEL_VERSION}/example-prediction-code
python example_predict.py foundry_model 7
```

In [None]:
# Create a Dockerfile 
import string

with open("/mnt/export-example/Dockerfile_Template", 'r') as file:
    content = file.read()


def create_file_from_template(template_string, context, output_file_path):
    # Create a Template object

    template = string.Template(template_string)
    
    # Substitute placeholders with actual values
    content = template.substitute(context)
    print(content)
    # Write the content to a file
    with open(output_file_path, 'w') as file:
        file.write(content)
    
    print(f"File created at {output_file_path}")

# Define the context for substitution
context = {
    'model_name': 'm_name',
    'model_version': '1',
    'model_download_folder': '/tmp/test',
    'model_folder': 'mymodel',
    'model_client_folder': 'client',
    'entry_point': 'python',
    'command_line': 'client/execute_model.py'
}
            'model_folder': mv.tags['MODEL_FOLDER'],
            'model_client_folder':  mv.tags['MODEL_CLIENT_FOLDER'],
            'entry_point': mv.tags['MODEL_ENTRY_POINT'],
            'command_line': mv.tags['MODEL_EXECUTE_PATH']
# Specify the output file path
output_file_path = 'Dockerfile'

# Create the file from the template
create_file_from_template(content, context, output_file_path)

In [68]:
# Create a Dockerfile 
import string

with open("/mnt/export-example/create_docker.sh.template", 'r') as file:
    content = file.read()
    
context = {
    'FOUNDRY_URL': 'quay.io',
    'model_name': 'test',
    'model_version': '1',
}

# Specify the output file path
output_file_path = 'create_image.sh'

# Create the file from the template
create_file_from_template(content, context, output_file_path)

FOUNDRY_URL=""
docker build --platform=linux/amd64 -f ./Dockerfile -t quay.io/test:1 .
docker push quay.io/test:1 
docker run quay.io/test:1


File created at create_image.sh
