In [None]:
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

In [None]:
TEST_DATA_ROOT = '~/workspace/datarobot-user-models/tests/testdata'
BINARY_DATA = os.path.join(
    TEST_DATA_ROOT, 'iris_binary_training.csv'
)
REGRESSION_DATA = os.path.join(
    TEST_DATA_ROOT, 'juniors_3_year_stats_regression.csv'
)
MULTICLASS_DATA = os.path.join(
    TEST_DATA_ROOT, 'skyserver_sql2_27_2018_6_51_39_pm.csv'
)

bin_X = pd.read_csv(BINARY_DATA)
bin_y = bin_X.pop('Species')

reg_X = pd.read_csv(REGRESSION_DATA)
reg_y = reg_X.pop('Grade 2014')

multi_X = pd.read_csv(MULTICLASS_DATA)
multi_y = multi_X.pop('class')

In [None]:
bin_target_encoder = LabelEncoder()
bin_target_encoder.fit(bin_y)
bin_dtrain = xgb.DMatrix(bin_X, bin_target_encoder.transform(bin_y))
bin_model = xgb.train({'objective':'binary:logistic'}, bin_dtrain)

reg_dtrain = xgb.DMatrix(reg_X, reg_y)
reg_model = xgb.train({'objective':'reg:squarederror'}, reg_dtrain)

multi_target_encoder = LabelEncoder()
multi_target_encoder.fit(multi_y)
multi_dtrain = xgb.DMatrix(multi_X, multi_target_encoder.transform(multi_y))
multi_model = xgb.train({'objective':'multi:softprob', 'num_class': len(multi_target_encoder.classes_)}, multi_dtrain)

In [None]:
bin_dtest = xgb.DMatrix(bin_X)
print(bin_model.predict(bin_dtest))
reg_dtest = xgb.DMatrix(reg_X)
print(reg_model.predict(reg_dtest))
multi_dtest = xgb.DMatrix(multi_X)
print(multi_model.predict(multi_dtest))

In [None]:
FIXTURE_ROOT = '~/workspace/datarobot-user-models/tests/fixtures/drop_in_model_artifacts'
with open(os.path.expanduser(os.path.join(FIXTURE_ROOT, 'xgb_bin.pkl')), 'wb') as picklefile:
    pickle.dump(bin_model, picklefile)
with open(os.path.expanduser(os.path.join(FIXTURE_ROOT, 'xgb_reg.pkl')), 'wb') as picklefile:
    pickle.dump(reg_model, picklefile)
with open(os.path.expanduser(os.path.join(FIXTURE_ROOT, 'xgb_multi.pkl')), 'wb') as picklefile:
    pickle.dump(multi_model, picklefile)