In [1]:
import mlrun
project = mlrun.get_or_create_project(name='mlops', user_project=True, context='./')

> 2023-01-08 11:20:46,016 [info] loaded project mlops from MLRun DB


In [2]:
import os
get_data = mlrun.code_to_function(name='gen_dataset', kind='job', image='mlrun/mlrun', handler='get_data', filename='src/get_data.py')

# Mount it:
get_data.apply(mlrun.auto_mount())
if os.getenv('V3IO_ACCESS_KEY','False')=='False':
    get_data.spec.disable_auto_mount=False
    
project.set_function(get_data)

<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f5f1c4bd8b0>

# Pipeline

In [3]:
%%writefile src/trainer_baseline.py
import mlrun
from kfp import dsl
import sklearn

@dsl.pipeline(
    name="Automatic Pipeline",
    description="Train & Evaluate"
)
def kfpipeline(dataset: str='housing',
               path: str='/home/jovyan/data/src/housing.csv',
               label_column:str='MEDV'):
    
    project = mlrun.get_current_project()
        
    get_data_run = mlrun.run_function(name='get_data',
                                      function='gen-dataset',
                                      params={'dataset': dataset,
                                              'path': path},
                                      outputs=[dataset])

    
    # Train a model using the auto_trainer hub function
    train_run = mlrun.run_function("hub://auto_trainer",
                                   inputs={"dataset": get_data_run.outputs[dataset]},
                                   params = {
                                       "model_class": "xgboost.XGBRegressor",
                                       "label_columns": label_column,
                                       "model_name": dataset,
                                       "random_state": 50
                                   }, 
                                   handler='train',
                                   outputs=["model"],
                               )

Overwriting src/trainer_baseline.py


In [4]:
# Register the workflow file:
workflow_name = "trainer_baseline"
project.set_workflow(workflow_name, "src/trainer_baseline.py")

# Save the project:
project.save()

<mlrun.projects.project.MlrunProject at 0x7f5f6d2b89a0>

In [5]:
run = project.run(name=workflow_name,watch=False,local=True, overwrite=True)



> 2023-01-08 11:20:46,635 [info] starting run get_data uid=7d1b9af1ddba45c38d78a0eec3ef6d76 DB=http://mlrun-api:8080


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
mlops-jovyan,...c3ef6d76,0,Jan 08 11:20:46,completed,get_data,workflow=ce3bdeb480424b1594bb89f255eef7a9kind=owner=jovyanhost=mlrun-jupyter-5cd9c659c-2dpxf,,dataset=housingpath=/home/jovyan/data/src/housing.csv,,housing





> 2023-01-08 11:20:47,743 [info] run executed, status=completed
> 2023-01-08 11:20:48,382 [info] starting run auto-trainer-train uid=2428126736894c1bbbc3259939c7f931 DB=http://mlrun-api:8080
> 2023-01-08 11:20:48,624 [info] test_set or train_test_split_size are not provided, setting train_test_split_size to 0.2
> 2023-01-08 11:20:48,684 [info] Sample set not given, using the whole training set as the sample set
> 2023-01-08 11:20:49,563 [info] training 'housing'


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
mlops-jovyan,...39c7f931,0,Jan 08 11:20:48,completed,auto-trainer-train,workflow=ce3bdeb480424b1594bb89f255eef7a9kind=owner=jovyanhost=mlrun-jupyter-5cd9c659c-2dpxf,dataset,model_class=xgboost.XGBRegressorlabel_columns=MEDVmodel_name=housingrandom_state=50,mean_absolute_error=1.7996130775002872r2_score=0.9218963689612159root_mean_squared_error=2.450679172306786mean_squared_error=6.005828405578273,feature-importancetest_setmodel





> 2023-01-08 11:20:50,487 [info] run executed, status=completed


uid,start,state,name,parameters,results
...c3ef6d76,Jan 08 11:20:46,completed,get_data,dataset=housingpath=/home/jovyan/data/src/housing.csv,
...39c7f931,Jan 08 11:20:48,completed,auto-trainer-train,model_class=xgboost.XGBRegressorlabel_columns=MEDVmodel_name=housingrandom_state=50,mean_absolute_error=1.7996130775002872r2_score=0.9218963689612159root_mean_squared_error=2.450679172306786mean_squared_error=6.005828405578273


> 2023-01-08 11:20:50,528 [info] started run workflow mlops-jovyan-trainer_baseline with run id = 'ce3bdeb480424b1594bb89f255eef7a9' by local engine


In [6]:
project.save()

<mlrun.projects.project.MlrunProject at 0x7f5f6d2b89a0>

In [7]:
run = project.run(name=workflow_name,watch=False,local=True, overwrite=True,
                  arguments={'dataset': 'motor',
                             'path': '/home/jovyan/data/src/freMTPL2freq.csv',
                             'label_column': 'ClaimNb'})

> 2023-01-08 11:20:50,660 [info] starting run get_data uid=117e880359904701bd99574ea1fb2611 DB=http://mlrun-api:8080


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
mlops-jovyan,...a1fb2611,0,Jan 08 11:20:50,completed,get_data,workflow=892ea43c87bc4e2ab73d1a39b8a1003dkind=owner=jovyanhost=mlrun-jupyter-5cd9c659c-2dpxf,,dataset=motorpath=/home/jovyan/data/src/freMTPL2freq.csv,,motor





> 2023-01-08 11:20:57,402 [info] run executed, status=completed
> 2023-01-08 11:20:57,665 [info] starting run auto-trainer-train uid=be1f3621c6b046efaa3ec87237c801a1 DB=http://mlrun-api:8080
> 2023-01-08 11:20:57,800 [info] test_set or train_test_split_size are not provided, setting train_test_split_size to 0.2
> 2023-01-08 11:20:58,580 [info] Sample set not given, using the whole training set as the sample set
> 2023-01-08 11:20:58,848 [info] training 'motor'


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
mlops-jovyan,...37c801a1,0,Jan 08 11:20:57,completed,auto-trainer-train,workflow=892ea43c87bc4e2ab73d1a39b8a1003dkind=owner=jovyanhost=mlrun-jupyter-5cd9c659c-2dpxf,dataset,model_class=xgboost.XGBRegressorlabel_columns=ClaimNbmodel_name=motorrandom_state=50,mean_absolute_error=0.0969508139257626r2_score=0.03861524283885853root_mean_squared_error=0.23244593029610103mean_squared_error=0.05403111051121986,feature-importancetest_setmodel





> 2023-01-08 11:21:29,853 [info] run executed, status=completed


uid,start,state,name,parameters,results
...a1fb2611,Jan 08 11:20:50,completed,get_data,dataset=motorpath=/home/jovyan/data/src/freMTPL2freq.csv,
...37c801a1,Jan 08 11:20:57,completed,auto-trainer-train,model_class=xgboost.XGBRegressorlabel_columns=ClaimNbmodel_name=motorrandom_state=50,mean_absolute_error=0.0969508139257626r2_score=0.03861524283885853root_mean_squared_error=0.23244593029610103mean_squared_error=0.05403111051121986


> 2023-01-08 11:21:29,875 [info] started run workflow mlops-jovyan-trainer_baseline with run id = '892ea43c87bc4e2ab73d1a39b8a1003d' by local engine


In [1]:
import mlrun

In [2]:
project_name = 'boston'
project = mlrun.new_project(name=project_name,
                            init_git=True,
                            user_project=True,
                            context='./',
                            overwrite=True)

> 2023-01-10 08:18:31,525 [info] Deleting project boston-jovyan from MLRun DB due to overwrite
> 2023-01-10 08:18:31,784 [info] Created and saved project boston-jovyan: {'from_template': None, 'overwrite': True, 'context': './', 'save': True}
