# LightGBM
training a classifier using Microsoft's LightGBM on Dask

In [7]:
import mlrun

mlrun.mlconf.dbpath = 'http://mlrun-api:8080'

## parameters


In [8]:
FUNCTION           = 'clf_xgboost_dask'                                # python function file name
DESCRIPTION        = 'train a XGBoost classifier on a Dask cluster'       

IMAGE              = 'yjbds/mlrun-dask:dev'                      # custom image
JOB_KIND           = 'job'                                           

TASK_NAME          = 'user-task-train-xgboost-dask'                         # whatever

ARTIFACTS_PATH     = '/User/repos/demos/dask/artifacts'

DASK_CLIENT        = 'scheduler.json'
TRAIN_SET          = ('xtrain', 'ytrain') # published names in the cluster 
VALID_SET          = ('xvalid', 'yvalid')

## load and configure function

In [9]:
FUNC_PY = '/User/repos/demos/dask/code/clf_xgboost_dask.py'
FUNC_YAML = '/User/repos/demos/dask/yaml/clf_xgboost_dask.yaml'

In [13]:
clf_xgb = mlrun.new_function(command=FUNC_PY, 
                               image=IMAGE,
                               kind=JOB_KIND)
clf_xgb.spec.build.image = IMAGE

clf_xgb.export(FUNC_YAML)
# clf_lgbm = mlrun.import_function(FUNC_YAML)

clf_xgb.apply(mlrun.mount_v3io())

clf_xgb.deploy(skip_deployed=True, with_mlrun=False)

# create and run the task
train_clf_xgb_task = mlrun.NewTask(
    TASK_NAME, 
    handler=FUNCTION,  
    params={
        'dask_client': DASK_CLIENT,
        'train_set'  : TRAIN_SET,
        'valid_set'  : VALID_SET,
        'target_path': ARTIFACTS_PATH,
        'name'       : 'xgb-model.pkl',
        'key'        : 'xgb-model',
        'params'     : {
            'max_depth'          : 3, 
            'num_boost_round'    : 3,
            'eta'                : 1,
            'objective'          : 'binary:logistic',
            'eval_metric'        : ['auc', 'ams@0'],
            'evals'              : [VALID_SET],
            'silent'             : False,
            'verbose_eval'       : True,
        #    'categorical_feature': ['UniqueCarrier','Origin','Dest'],
        #    'is_pre_partition'   : True
        }})

# run
rn = clf_xgb.run(train_clf_xgb_task)

[mlrun] 2020-02-12 12:10:21,273 function spec saved to path: /User/repos/demos/dask/yaml/clf_xgboost_dask.yaml
[mlrun] 2020-02-12 12:10:21,281 starting run user-task-train-xgboost-dask uid=a72640ca63ce4983be89fdda911d38a5  -> http://mlrun-api:8080
[mlrun] 2020-02-12 12:10:21,351 Job is running in the background, pod: user-task-train-xgboost-dask-krpzv
[mlrun] 2020-02-12 12:14:41,199 log artifact xgb-model at /User/repos/demos/dask/artifacts/xgb-model.pkl, size: None, db: Y

[mlrun] 2020-02-12 12:14:41,500 run executed, status=completed
Intel(R) Data Analytics Acceleration Library (Intel(R) DAAL) solvers for sklearn enabled: https://intelpython.github.io/daal4py/sklearn.html
final state: succeeded


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...1d38a5,0,Feb 12 12:10:32,completed,clf_xgboost_dask,host=user-task-train-xgboost-dask-krpzvkind=jobowner=admin,,"dask_client=scheduler.jsonkey=xgb-modelname=xgb-model.pklparams={'eta': 1, 'eval_metric': ['auc', 'ams@0'], 'evals': [['xvalid', 'yvalid']], 'max_depth': 3, 'num_boost_round': 3, 'objective': 'binary:logistic', 'silent': False, 'verbose_eval': True}target_path=/User/repos/demos/dask/artifactstrain_set=['xtrain', 'ytrain']valid_set=['xvalid', 'yvalid']",,xgb-model


to track results use .show() or .logs() or in CLI: 
!mlrun get run a72640ca63ce4983be89fdda911d38a5  , !mlrun logs a72640ca63ce4983be89fdda911d38a5 
[mlrun] 2020-02-12 12:14:44,981 run executed, status=completed
