# ML Trainer Tests

In [1]:
%run -m ipy_startup

In [2]:
from sklearn.datasets import load_boston

In [3]:
d_raw = load_boston()
X = pd.DataFrame(d_raw['data'], columns=d_raw['feature_names'])
y = pd.Series(d_raw['target'], name='price')
Y = pd.concat([y.rename('y1'), y.rename('y2')], axis=1)

X_eval = X.head(250)
y_eval = y.head(250)
Y_eval = Y.head(250)

X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 13 columns):
CRIM       506 non-null float64
ZN         506 non-null float64
INDUS      506 non-null float64
CHAS       506 non-null float64
NOX        506 non-null float64
RM         506 non-null float64
AGE        506 non-null float64
DIS        506 non-null float64
RAD        506 non-null float64
TAX        506 non-null float64
PTRATIO    506 non-null float64
B          506 non-null float64
LSTAT      506 non-null float64
dtypes: float64(13)
memory usage: 51.5 KB


In [4]:
from ml.api import trainer
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import KFold
from collections import OrderedDict

ests = OrderedDict({
  'enet': ElasticNet(random_state=1),
  'rf': RandomForestRegressor(random_state=1)
})
cv = KFold(n_splits=3, random_state=1)
cv_trainer = trainer.Trainer(trainer.TrainerConfig())

# Train on single outcome
cv_res_st = cv_trainer.train_regressors(X, y, ests, cv.split(X))

# Train on multiple outcomes
cv_res_mt = cv_trainer.train_regressors(X, Y, ests, cv.split(X))

Beginning cross validation (see /tmp/ml-models-exec.log for progress updates)
Beginning model refitting
Training complete
Beginning cross validation (see /tmp/ml-models-exec.log for progress updates)
Beginning model refitting
Training complete


### Single Task

In [5]:
from ml.api.results import predictions
d_pred_st = predictions.extract(cv_res_st)
d_pred_st.head()

Category,Metadata,Metadata,Predictions,Predictions
Task,Fold,Model,Value:Actual:price,Value:Predicted:price
0,1,enet,24.0,31.747575
1,1,enet,21.6,25.669645
2,1,enet,34.7,29.801338
3,1,enet,33.4,29.468843
4,1,enet,36.2,27.860452


In [6]:
from ml.api.results import predictions
pred_res_st1 = cv_trainer.predict(cv_res_st, X_eval)
pred_res_st2 = cv_trainer.predict(cv_res_st, X_eval, y_eval)

d_eval_st1 = predictions.extract(pred_res_st1)
d_eval_st2 = predictions.extract(pred_res_st2)

In [11]:
d_eval_st1.head()

Category,Metadata,Metadata,Predictions
Task,Fold,Model,Value:Predicted:price
0,0,enet,31.04918
1,0,enet,25.661981
2,0,enet,29.850903
3,0,enet,29.386711
4,0,enet,27.901432


In [12]:
d_eval_st2.head()

Category,Metadata,Metadata,Predictions,Predictions
Task,Fold,Model,Value:Actual:price,Value:Predicted:price
0,0,enet,24.0,31.04918
1,0,enet,21.6,25.661981
2,0,enet,34.7,29.850903
3,0,enet,33.4,29.386711
4,0,enet,36.2,27.901432


In [13]:
assert len(d_pred_st) == len(X) * len(ests)
assert len(d_eval_st1) == len(X_eval) * len(ests)
assert len(d_eval_st2) == len(X_eval) * len(ests)

### Multi Task

In [15]:
from ml.api.results import predictions
d_pred_mt = predictions.extract(cv_res_mt)
d_pred_mt.head()

Category,Metadata,Metadata,Predictions,Predictions,Predictions,Predictions
Task,Fold,Model,Value:Actual:y1,Value:Actual:y2,Value:Predicted:y1,Value:Predicted:y2
0,1,enet,24.0,24.0,31.747575,31.747575
1,1,enet,21.6,21.6,25.669645,25.669645
2,1,enet,34.7,34.7,29.801338,29.801338
3,1,enet,33.4,33.4,29.468843,29.468843
4,1,enet,36.2,36.2,27.860452,27.860452


In [16]:
from ml.api.results import predictions
pred_res_mt1 = cv_trainer.predict(cv_res_mt, X_eval)
pred_res_mt2 = cv_trainer.predict(cv_res_mt, X_eval, Y_eval)

d_eval_mt1 = predictions.extract(pred_res_mt1)
d_eval_mt2 = predictions.extract(pred_res_mt2)

In [18]:
d_eval_mt1.head()

Category,Metadata,Metadata,Predictions,Predictions
Task,Fold,Model,Value:Predicted:y1,Value:Predicted:y2
0,0,enet,31.04918,31.04918
1,0,enet,25.661981,25.661981
2,0,enet,29.850903,29.850903
3,0,enet,29.386711,29.386711
4,0,enet,27.901432,27.901432


In [19]:
d_eval_mt2.head()

Category,Metadata,Metadata,Predictions,Predictions,Predictions,Predictions
Task,Fold,Model,Value:Actual:y1,Value:Actual:y2,Value:Predicted:y1,Value:Predicted:y2
0,0,enet,24.0,24.0,31.04918,31.04918
1,0,enet,21.6,21.6,25.661981,25.661981
2,0,enet,34.7,34.7,29.850903,29.850903
3,0,enet,33.4,33.4,29.386711,29.386711
4,0,enet,36.2,36.2,27.901432,27.901432


In [17]:
assert len(d_pred_mt) == len(X) * len(ests)
assert len(d_eval_mt1) == len(X_eval) * len(ests)
assert len(d_eval_mt2) == len(X_eval) * len(ests)