In [3]:
%load_ext autoreload
%autoreload 2
from functools import partial

import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import resample

from vflow import Vset, init_args  # must install pcsp first (pip install pcsp)

np.set_printoptions(threshold=5)  # to limit printing

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
np.random.seed(13)

X, y = make_classification(n_samples=50, n_features=5)
X_train, X_test, y_train, y_test = init_args(train_test_split(X, y, random_state=42),
                                             names=['X_train', 'X_test', 'X_train',
                                                    'X_test'])

subsampling_funcs = [partial(resample,
                             n_samples=20,
                             random_state=i)
                     for i in range(3)]

subsampling_set = Vset(name='subsampling', modules=subsampling_funcs)
X_trains, y_trains = subsampling_set(X_train, y_train)

# fit models
modeling_set = Vset(name='modeling',
                    modules=[LogisticRegression(C=1, max_iter=1000, tol=0.1),
                             DecisionTreeClassifier(min_samples_leaf=1)],
                    module_keys=["LR", "DT"])

_ = modeling_set.fit(X_trains, y_trains)

# predict now returns modeling_set.output rather than the result of sep_dicts(output_dict)
preds_test = modeling_set.predict(X_test)

hard_metrics_set = Vset(name='hard_metrics',
                        modules=[accuracy_score, balanced_accuracy_score],
                        module_keys=["Acc", "Bal_Acc"],
                        tracking_dir='./mlruns')

hard_metrics = hard_metrics_set.evaluate(y_test, preds_test)

AttributeError: 'str' object has no attribute 'origin'

In [None]:
!mlflow ui