# Neomaril Training

This notebook give a exemple on how to use Neomaril to training a ML model

### NeomarilTrainingClient

It's where you can manage your trainining experiments

In [1]:
from neomaril_codex.training import NeomarilTrainingClient

In [2]:
# Start the client. We are reading the credentials in the NEOMARIL_TOKEN env variable

client = NeomarilTrainingClient()
client

2024-04-22 17:29:44.013 | INFO     | neomaril_codex.base:__init__:20 - Loading .env
2024-04-22 17:29:45.606 | INFO     | neomaril_codex.base:__init__:31 - Successfully connected to Neomaril


NeomarilTrainingClient(url="http://localhost:7070/api", version="eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IlFnc0JWQ0I5WFc0V1YtSkVCVkJiZyJ9.eyJodHRwczovL25lb21hcmlsLmRhdGFyaXNrLm5ldC9uZW9tYXJpbC1ncm91cCI6ImRhdGFyaXNrIiwiaHR0cHM6Ly9uZW9tYXJpbC5kYXRhcmlzay5uZXQvZW1haWwiOiJndWlsaGVybWUuY2FyZG9zb0BkYXRhcmlzay5pbyIsImh0dHBzOi8vbmVvbWFyaWwuZGF0YXJpc2submV0L3RlbmFudCI6ImRhdGFyaXNrIiwiaHR0cHM6Ly9uZW9tYXJpbC5kYXRhcmlzay5uZXQvdXNlci1hY3RpdmUiOnRydWUsImlzcyI6Imh0dHBzOi8vZGV2LW1rM283bGF6eGxlMzBod3EudXMuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY1YTE1NTA0ZDRiOGYwZDdiNDBlOGE0YiIsImF1ZCI6WyJodHRwczovL2Rldi1tazNvN2xhenhsZTMwaHdxLnVzLmF1dGgwLmNvbS9hcGkvdjIvIiwiaHR0cHM6Ly9kZXYtbWszbzdsYXp4bGUzMGh3cS51cy5hdXRoMC5jb20vdXNlcmluZm8iXSwiaWF0IjoxNzEzODE3Nzg1LCJleHAiOjE3MTM4Mjg1ODUsInNjb3BlIjoib3BlbmlkIHByb2ZpbGUgZW1haWwgYWRkcmVzcyBwaG9uZSByZWFkOmN1cnJlbnRfdXNlciB1cGRhdGU6Y3VycmVudF91c2VyX21ldGFkYXRhIGRlbGV0ZTpjdXJyZW50X3VzZXJfbWV0YWRhdGEgY3JlYXRlOmN1cnJlbnRfdXNlcl9tZXRhZGF0YSBjcmVhdGU6Y3VycmVudF91c2VyX2RldmljZV9jcm

## NeomarilTrainingExperiment

It's where you can create a training experiment to find the best model

#### Custom training

With Custom training you have to create the training function.

In [3]:
# Creating a new training experiment
training = client.create_training_experiment(
    experiment_name='Teste notebook Training custom', # Experiment name, this is how you find your model in MLFLow
    model_type='Classification', # Model type. Can be Classification, Regression or Unsupervised
    group='groupname' # This is the default group. Create a new one when using for a new project
)

2024-04-22 17:29:46.182 | INFO     | neomaril_codex.training:create_training_experiment:1335 - New Training 'Teste notebook Training custom' inserted.
2024-04-22 17:29:46.183 | INFO     | neomaril_codex.base:__init__:20 - Loading .env
2024-04-22 17:29:46.188 | INFO     | neomaril_codex.base:__init__:31 - Successfully connected to Neomaril


In [4]:
training

NeomarilTrainingExperiment(name="Teste notebook Training custom", 
                                                        group="groupname", 
                                                        training_id="T508b769c7ca49a4a5e60d96fab003cace7533032858439f9b584cc026be02e1",
                                                        model_type=Classification
                                                        )

In [5]:
# With the experiment class we can create multiple model runs
PATH = './samples/train/'

run = training.run_training(
    run_name='First test', # Run name
    train_data=PATH+'dados.csv', # Path to the file with training data
    source_file=PATH+'app.py', # Path of the source file
    requirements_file=PATH+'requirements.txt', # Path of the requirements file, 
    # env=PATH+'.env'  #  File for env variables (this will be encrypted in the server)
    # extra_files=[PATH+'utils.py'], # List with extra files paths that should be uploaded along (they will be all in the same folder)
    training_reference='train_model', # The name of the entrypoint function that is going to be called inside the source file 
    training_type='Custom',
    python_version='3.9', # Can be 3.8 to 3.10
    wait_complete=True
)

2024-04-22 17:29:46.496 | INFO     | neomaril_codex.training:__upload_training:915 - {"ExecutionId":1,"Message":"Training files have been uploaded! Use the id \u00271\u0027 to execute the train experiment."}
2024-04-22 17:29:46.791 | INFO     | neomaril_codex.training:__execute_training:939 - Model training starting - Hash: T508b769c7ca49a4a5e60d96fab003cace7533032858439f9b584cc026be02e1
2024-04-22 17:29:46.813 | INFO     | neomaril_codex.base:__init__:20 - Loading .env
2024-04-22 17:29:46.817 | INFO     | neomaril_codex.base:__init__:31 - Successfully connected to Neomaril
2024-04-22 17:29:46.818 | INFO     | neomaril_codex.base:__init__:279 - Loading .env


Waiting the training run.......

In [6]:
run.get_status()

{'ExecutionId': '1',
 'Status': 'Succeeded',
 'Message': 'wasbs://mlflow-dev@datariskmlops.blob.core.windows.net/artifacts/1/d0c77671b8684fdcbd2276d31b8ec27c/artifacts'}

In [7]:
run.execution_data

{'TrainingHash': 'T508b769c7ca49a4a5e60d96fab003cace7533032858439f9b584cc026be02e1',
 'ExperimentName': 'Teste notebook Training custom',
 'GroupName': 'groupname',
 'ModelType': 'Classification',
 'TrainingType': 'Custom',
 'ExecutionId': 1,
 'RunName': 'First test',
 'ExecutionState': 'Succeeded',
 'TimeElapsed': 164390,
 'Description': '',
 'Deployable': True,
 'RunData': {'metrics': [{'key': 'training_precision_score',
    'value': 1.0,
    'timestamp': 1713817886520,
    'step': 0},
   {'key': 'training_recall_score',
    'value': 1.0,
    'timestamp': 1713817886520,
    'step': 0},
   {'key': 'training_f1_score',
    'value': 1.0,
    'timestamp': 1713817886520,
    'step': 0},
   {'key': 'training_accuracy_score',
    'value': 1.0,
    'timestamp': 1713817886520,
    'step': 0},
   {'key': 'training_log_loss',
    'value': 0.00047178298806016674,
    'timestamp': 1713817886520,
    'step': 0},
   {'key': 'training_roc_auc',
    'value': 1.0,
    'timestamp': 1713817886520,
    '

In [8]:
# When the run is finished you can download the model file
run.download_result()

2024-04-22 17:32:50.098 | INFO     | neomaril_codex.base:download_result:408 - Output saved in ./output.zip


In [9]:
# or promote promete it to a deployed model

PATH = './samples/syncModel/'

model = run.promote_model(
    model_name='Teste notebook promoted custom', # model_name
    model_reference='score', # name of the scoring function
    source_file=PATH+'app.py', # Path of the source file
    schema=PATH+'schema.json', # Path of the schema file, but it could be a dict
    # env=PATH+'.env'  #  File for env variables (this will be encrypted in the server)
    # extra_files=[PATH+'utils.py'], # List with extra files paths that should be uploaded along (they will be all in the same folder)
    operation="Sync" # Can be Sync or Async
)

2024-04-22 17:32:50.290 | INFO     | neomaril_codex.training:__upload_model:492 - Model 'Teste notebook promoted custom' promoted from T508b769c7ca49a4a5e60d96fab003cace7533032858439f9b584cc026be02e1 - Hash: "Md68e3180ce2498cbfb88ca59b6c94ddec7eebc47dac4c9591e34ea88d191b04"
2024-04-22 17:32:52.204 | INFO     | neomaril_codex.training:__host_model:557 - Model host in process - Hash: Md68e3180ce2498cbfb88ca59b6c94ddec7eebc47dac4c9591e34ea88d191b04
2024-04-22 17:32:52.206 | INFO     | neomaril_codex.base:__init__:20 - Loading .env
2024-04-22 17:32:52.211 | INFO     | neomaril_codex.base:__init__:31 - Successfully connected to Neomaril


In [10]:
model

NeomarilModel(name="Teste notebook promoted custom", group="groupname", 
                                status="Building",
                                model_id="Md68e3180ce2498cbfb88ca59b6c94ddec7eebc47dac4c9591e34ea88d191b04",
                                operation="Sync",
                                )

#### AutoML

With AutoML you just need to upload the data and some configuration

In [11]:
PATH = './samples/autoML/'

run = training.run_training(
    run_name='First test', # Run name
    training_type='AutoML',
    train_data=PATH+'dados.csv', # Path to the file with training data
    conf_dict=PATH+'conf.json', # Path of the configuration file
    wait_complete=True
)

2024-04-22 17:33:37.256 | INFO     | neomaril_codex.training:__upload_training:915 - {"ExecutionId":2,"Message":"Training files have been uploaded! Use the id \u00272\u0027 to execute the train experiment."}
2024-04-22 17:33:37.419 | INFO     | neomaril_codex.training:__execute_training:939 - Model training starting - Hash: T508b769c7ca49a4a5e60d96fab003cace7533032858439f9b584cc026be02e1
2024-04-22 17:33:37.440 | INFO     | neomaril_codex.base:__init__:20 - Loading .env
2024-04-22 17:33:37.445 | INFO     | neomaril_codex.base:__init__:31 - Successfully connected to Neomaril
2024-04-22 17:33:37.447 | INFO     | neomaril_codex.base:__init__:279 - Loading .env


Waiting the training run................

In [12]:
run

NeomarilTrainingExecution(name="First test",
                                        exec_id="2", status="Succeeded")

In [13]:
run.get_status()

{'ExecutionId': '2',
 'Status': 'Succeeded',
 'Message': 'wasbs://mlflow-dev@datariskmlops.blob.core.windows.net/artifacts/1/250f70714e5d4a6f9fa3b55c6b9aaf43/artifacts'}

In [14]:
# Promote a AutoML model is a lot easier

PATH = './samples/autoML/'
MODEL_PATH = './samples/syncModel/'

model = run.promote_model(
    model_name='Teste notebook promoted autoML', # model_name
    operation="Async", # Can be Sync or Async,
    input_type="json",
    schema=PATH+'schema.json'
)

2024-04-22 17:41:28.830 | INFO     | neomaril_codex.training:__upload_model:492 - Model 'Teste notebook promoted autoML' promoted from T508b769c7ca49a4a5e60d96fab003cace7533032858439f9b584cc026be02e1 - Hash: "M0fa3683553e41c4a1a99290c2451ff190785d06b90646e6afe7ffa352c00193"
2024-04-22 17:41:29.135 | INFO     | neomaril_codex.training:__host_model:557 - Model host in process - Hash: M0fa3683553e41c4a1a99290c2451ff190785d06b90646e6afe7ffa352c00193
2024-04-22 17:41:29.137 | INFO     | neomaril_codex.base:__init__:20 - Loading .env
2024-04-22 17:41:29.140 | INFO     | neomaril_codex.base:__init__:31 - Successfully connected to Neomaril


In [15]:
model

NeomarilModel(name="Teste notebook promoted autoML", group="groupname", 
                                status="Building",
                                model_id="M0fa3683553e41c4a1a99290c2451ff190785d06b90646e6afe7ffa352c00193",
                                operation="Async",
                                )