# XGBoost
train a classifier using XGBoost on Dask

In [1]:
import mlrun

mlrun.mlconf.dbpath = 'http://mlrun-api:8080'

## parameters


In [2]:
DESCRIPTION        = 'train a XGBoost classifier on a Dask cluster'       

IMAGE              = 'yjbds/mlrun-dask:dev'                      # custom image
JOB_KIND           = 'job'                                           

TASK_NAME          = 'user-task-train-xgboost-dask'                         # whatever

ARTIFACTS_PATH     = '/User/repos/demos/dask/artifacts'

DASK_CLIENT        = 'scheduler.json'
TRAIN_SET          = ('xtrain', 'ytrain') # published names in the cluster 
VALID_SET          = ('xvalid', 'yvalid')

## load and configure function

In [3]:
FUNC_PY   = '/User/repos/demos/dask/code/clf_xgboost_dask.py'
FUNC_YAML = '/User/repos/demos/dask/yaml/clf_xgboost_dask.yaml'

HANDLER   = 'clf_xgboost_dask'

In [None]:
clf_xgb = mlrun.new_function(command=FUNC_PY, 
                               image=IMAGE,
                               kind=JOB_KIND)
clf_xgb.spec.build.image = IMAGE

clf_xgb.export(FUNC_YAML)
# clf_lgbm = mlrun.import_function(FUNC_YAML)

In [None]:
clf_xgb.apply(mlrun.mount_v3io())

clf_xgb.deploy(skip_deployed=True, with_mlrun=False)

# create and run the task
train_clf_xgb_task = mlrun.NewTask(
    TASK_NAME, 
    handler=HANDLER,  
    params={
        'dask_client': DASK_CLIENT,
        'train_set'  : TRAIN_SET,
        'valid_set'  : VALID_SET,
        'target_path': ARTIFACTS_PATH,
        'name'       : 'xgb-model.pkl',
        'key'        : 'xgb-model',
        'params'     : {
            'max_depth'          : 3, 
            'num_boost_round'    : 3,
            'eta'                : 1,
            'objective'          : 'binary:logistic',
            'eval_metric'        : ['auc', 'ams@0'],
            'evals'              : [VALID_SET],
            'silent'             : False,
            'verbose_eval'       : True,
        #    'categorical_feature': ['UniqueCarrier','Origin','Dest'],
        #    'is_pre_partition'   : True
        }})

# run
rn = clf_xgb.run(train_clf_xgb_task)

[mlrun] 2020-02-17 12:10:58,507 function spec saved to path: /User/repos/demos/dask/yaml/clf_xgboost_dask.yaml
[mlrun] 2020-02-17 12:10:58,518 starting run user-task-train-xgboost-dask uid=f03662a35af74ba4abaf0b9e9e161c5f  -> http://mlrun-api:8080
[mlrun] 2020-02-17 12:10:58,595 Job is running in the background, pod: user-task-train-xgboost-dask-lrtdv


## tests

* artifact exists
* ...
* model can be loaded
* model has correct properties
* model can predict