In [None]:
import oml
import xgboost as xgb
import numpy as np
import pandas as pd


Connect to database

In [None]:
oml.connect("user","password",dsn='(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=localhost)(PORT=1521))(CONNECT_DATA=(service_name=pdb1)))')

Load data from files to database tables

In [None]:
from sklearn.datasets import load_svmlight_file

dtrain = xgb.DMatrix('agaricus.txt.train')
col_names=dtrain.feature_names

train_data = load_svmlight_file('agaricus.txt.train')

train_X = train_data[0].toarray()
train_y = train_data[1]
train_df=pd.DataFrame(data=train_X,columns=col_names[1:127])
train_target_df=pd.DataFrame(data=train_y)

oml.create(train_df,table='TRAIN_DF')
oml.create(train_target_df,table='TARGET_DF')

In [None]:
test_data = load_svmlight_file('agaricus.txt.test')

test_X = test_data[0].toarray()
test_y = test_data[1]
test_df=pd.DataFrame(data=test_X,columns=col_names[1:127])
test_target_df=pd.DataFrame(data=test_y)

oml.create(test_df,table='TEST_DF')
oml.create(test_target_df,table='TEST_TARGET_DF')

Create functions to train and use the model. Store them in the script repository

In [None]:
def train_xgboost():
    import oml
    import xgboost as xgb
    
    dat=oml.sync(table="TRAIN_DF").pull()
    target=oml.sync(table="TARGET_DF").pull()
    
    dtrain=xgb.DMatrix(dat, label=target)
    param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic' }
    num_round = 2
    bst = xgb.train(param, dtrain, num_round)
    mod=bst.save_raw()
    oml.ds.save(objs={'xgboost_raw':mod}, name="MED", append=True)
    return mod

oml.script.create("train_xgboost", train_xgboost, overwrite = True)

In [None]:
def use_xgboost():
    import oml
    import xgboost as xgb
    
    objs=oml.ds.load(name="MED", to_globals=False)
    bst2 = xgb.Booster({'nthread': '4'})
    bst2.load_model(objs['xgboost_raw'])
    
    dat=oml.sync(table="TEST_DF").pull()
    target=oml.sync(table="TEST_TARGET_DF").pull()
    
    dtest=xgb.DMatrix(dat, label=target)
    pred = bst2.predict(dtest)
    return pred

oml.script.create("use_xgboost", use_xgboost, overwrite = True)

In [None]:
oml.script.dir(name="xgboost$",regex_match=True)

Run embedded execution for the xgboost training

In [None]:
res = oml.do_eval(func='train_xgboost',oml_connect=True)

Look for the model in database's data store

In [None]:
oml.ds.dir(name="MED")

In [None]:
oml.ds.describe(name="MED")

Use xgboost more with test data

In [None]:
res = oml.do_eval(func='use_xgboost',oml_connect=True)
res