In [1]:
import mlrun
import os

project_name = 'test-notebooks'
# Initialize the MLRun project object
project = mlrun.get_or_create_project(project_name, context='/v3io/users/dani/test-notebooks', user_project=True)
project.artifact_path = 's3://igz-app-lab/my_project'

> 2022-12-11 15:10:26,400 [info] loaded project test-notebooks from MLRun DB


In [3]:
#mlrun: start-code

import mlrun
import pandas as pd 
import json
from xgboost import XGBClassifier
import pickle
from mlrun.artifacts.base import DirArtifact

def get_artifact_target_path(context, key):
    for artifact in context.artifacts:
        if artifact['kind'] == 'model' and artifact['metadata'].get('key',None) == key:
            return mlrun.get_dataitem(artifact['spec']['target_path'] + artifact['spec']['model_file'])
        elif artifact['metadata'].get('key',None) == key:
            context.logger.info(artifact['spec']['target_path'])
            return mlrun.get_dataitem(artifact['spec']['target_path'])
    context.logger.info('Artifact not found')
    
def log_transactions(context,
                     target_path='s3://igz-app-lab/transactions_cut.csv',
                     local_path='encoded_transactions.csv'):

    # logging artifact
    context.log_artifact('transactions-'+context.artifact_path[:2], target_path=target_path)
    # reading artifact
    df_encode = get_artifact_target_path(context, 'transactions-'+context.artifact_path[:2]).as_df().to_json().encode()
    
    # uploading new artifact 
    context.logger.info(f'logging encoded artifact to {local_path}')
    context.log_artifact('encoded_trans-'+context.artifact_path[:2], body=df_encode, local_path=local_path)
    # reading artifact
    trans_df = pd.DataFrame(json.loads(get_artifact_target_path(context, 'encoded_trans-'+context.artifact_path[:2]).get()))
    context.logger.info(f'dataframe shape : {trans_df.shape}')
    
    # logging a model
    bst = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic')
    context.log_model('bst_model', body=pickle.dumps(bst), model_file='bst.pkl')
    # getting the model
    model = pickle.loads(get_artifact_target_path(context, 'bst_model').get())
    context.logger.info(f'logged model : {model}')
    
    # Logging directory
    context.log_artifact(DirArtifact(key='my_project', target_path=context.artifact_path + '/'))
    
    # Logging dataset
    context.log_dataset('transaction_dataset', pd.read_csv(target_path),local_path='trans_dataset.csv')
    # Getting dataset
    context.logger.info(f'logged dataset {get_artifact_target_path(context, "transaction_dataset").as_df().shape}')
    
    return
            
#mlrun: end-code

In [4]:
fn = mlrun.code_to_function(name='log_transactions', kind='job', image='mlrun/ml-models', handler='log_transactions')
fn.apply(mlrun.platforms.mount_s3())
fn.run(local=True)

> 2022-12-11 15:11:28,733 [info] starting run log-transactions-log_transactions uid=93acd6bc36ca455e8936cdcb483c9079 DB=http://mlrun-api:8080
> 2022-12-11 15:11:28,904 [info] s3://igz-app-lab/transactions_cut.csv
> 2022-12-11 15:11:31,681 [info] logging encoded artifact to encoded_transactions.csv
> 2022-12-11 15:11:32,125 [info] s3://igz-app-lab/my_project/log-transactions-log_transactions/0/encoded_trans-s3.csv
> 2022-12-11 15:11:32,217 [info] dataframe shape : (500, 4)
> 2022-12-11 15:11:32,483 [info] logged model : XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None, gamma=None,
              gpu_id=None, importance_type='gain', interaction_constraints=None,
              learning_rate=1, max_delta_step=None, max_depth=2,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=2, n_jobs=None, num_parallel_tree=None,
              random_state=None, reg_alpha=N

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
test-notebooks-dani,...483c9079,0,Dec 11 15:11:28,completed,log-transactions-log_transactions,v3io_user=danikind=owner=danihost=jupyter-dani-86ccdd8c48-7gkl9,,,,transactions-s3encoded_trans-s3bst_modelmy_projecttransaction_dataset





> 2022-12-11 15:11:34,748 [info] run executed, status=completed


<mlrun.model.RunObject at 0x7f86b966ec50>

In [5]:
project.artifact_path = 'gs://test_bucket-igz/dani'

In [7]:
fn = mlrun.code_to_function(name='log_transactions', kind='job', image='mlrun/ml-models', handler='log_transactions')
fn.apply(mlrun.platforms.mount_s3())
fn.run(local=False, params = {'target_path': 'gs://test_bucket-igz/dani/transactions_cut.csv'})

> 2022-12-11 15:12:53,546 [info] starting run log-transactions-log_transactions uid=47c0a731b393405c8e722f34b1d94efb DB=http://mlrun-api:8080
> 2022-12-11 15:12:53,713 [info] Job is running in the background, pod: log-transactions-log-transactions-ljrpl
> 2022-12-11 15:12:58,641 [info] gs://test_bucket-igz/dani/transactions_cut.csv
> 2022-12-11 15:12:58,642 [info] No GCS credentials available - auth will rely on auto-discovery of credentials
> 2022-12-11 15:12:58,878 [info] logging encoded artifact to encoded_transactions.csv
> 2022-12-11 15:12:59,237 [info] gs://test_bucket-igz/dani/log-transactions-log_transactions/0/encoded_trans-gs.csv
> 2022-12-11 15:12:59,269 [info] dataframe shape : (500, 4)
> 2022-12-11 15:12:59,912 [info] logged model : XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None,

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
test-notebooks-dani,...b1d94efb,0,Dec 11 15:12:58,completed,log-transactions-log_transactions,v3io_user=danikind=jobowner=danimlrun/client_version=1.2.1-rc2host=log-transactions-log-transactions-ljrpl,,target_path=gs://test_bucket-igz/dani/transactions_cut.csv,,transactions-gsencoded_trans-gsbst_modelmy_projecttransaction_dataset





> 2022-12-11 15:13:03,061 [info] run executed, status=completed


<mlrun.model.RunObject at 0x7f86d6fa2910>