In [5]:
import warnings
import time
from timeit import default_timer as timer

warnings.filterwarnings("ignore")

In [6]:
from collections import OrderedDict
from functools import partial
from timeit import default_timer as timer
from dataclasses import dataclass
import typing
from flytekit import Resources, task, workflow, dynamic
from flytekit.types.file import FlyteFile
from flytekit.types.schema import FlyteSchema
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import xgboost

### Common part: global variables

In [7]:
DATASET_PATH = "santander_train.csv"
ETL_KEYS = ["t_readcsv", "t_etl", "t_connect"]
ML_KEYS = ["t_train_test_split", "t_dmatrix", "t_training", "t_infer", "t_ml"]
ML_SCORE_KEYS = ["mse_mean", "cod_mean", "mse_dev"]

VAR_COLS = ["var_%s" % i for i in range(200)]
COLUMNS_NAMES = ["ID_code", "target"] + VAR_COLS
COLUMNS_TYPES = ["object", "int64"] + ["float64" for _ in range(200)]

### Workflow consisting from 2 tasks

In [4]:
# from utils

def load_data_pandas(
    filename,
    columns_names=None,
    columns_types=None,
    header=None,
    nrows=None,
    use_gzip=False,
    parse_dates=None
):
    types = None
    if columns_types:
        types = {columns_names[i]: columns_types[i] for i in range(len(columns_names))}
    return pd.read_csv(
        filename,
        names=columns_names,
        nrows=nrows,
        header=header,
        dtype=types,
        compression="gzip" if use_gzip else None,
        parse_dates=parse_dates,
    )

In [5]:
def mse(y_test, y_pred):
    return ((y_test - y_pred) ** 2).mean()


def cod(y_test, y_pred):
    y_bar = y_test.mean()
    total = ((y_test - y_bar) ** 2).sum()
    residuals = ((y_test - y_pred) ** 2).sum()
    return 1 - (residuals / total)

In [6]:
def split_step(data, target):
    t0 = timer()
    train, valid = data[:-10000], data[-10000:]
    split_time = timer() - t0

    x_train = train.drop([target], axis=1)

    y_train = train[target]

    x_test = valid.drop([target], axis=1)

    y_test = valid[target]

    return (x_train, y_train, x_test, y_test), split_time

In [7]:
@task
def etl_pandas(
    filename: str,
    columns_names: typing.List[str],
    columns_types: typing.List[str],
    etl_keys: typing.List[str]
) -> (
    pd.DataFrame,
    typing.Dict[str, float]
):
    etl_times = {key: 0.0 for key in etl_keys}

    t0 = timer()
    train_pd = load_data_pandas(
        filename=filename,
        columns_names=columns_names,
        columns_types=columns_types,
        header=0,
        use_gzip=filename.endswith(".gz")
    )
    etl_times["t_readcsv"] = timer() - t0

    t_etl_begin = timer()

    for i in range(200):
        col = "var_%d" % i
        var_count = train_pd.groupby(col).agg({col: "count"})

        var_count.columns = ["%s_count" % col]
        var_count = var_count.reset_index()

        train_pd = train_pd.merge(var_count, on=col, how="left")

    for i in range(200):
        col = "var_%d" % i

        mask = train_pd["%s_count" % col] > 1
        train_pd.loc[mask, "%s_gt1" % col] = train_pd.loc[mask, col]

    train_pd = train_pd.drop(["ID_code"], axis=1)
    etl_times["t_etl"] = timer() - t_etl_begin

    return train_pd, etl_times

In [8]:
@task
def ml(
    ml_data: pd.DataFrame,
    target: str,
    ml_keys: typing.List[str],
    ml_score_keys: typing.List[str] 
) -> (
    typing.Dict[str, float],
     typing.Dict[str, float]
):

    ml_times = {key: 0.0 for key in ml_keys}
    ml_scores = {key: 0.0 for key in ml_score_keys}

    (x_train, y_train, x_test, y_test), ml_times["t_train_test_split"] = split_step(
        ml_data, target
    )

    t0 = timer()
    training_dmat_part = xgboost.DMatrix(data=x_train, label=y_train)
    testing_dmat_part = xgboost.DMatrix(data=x_test, label=y_test)
    ml_times["t_dmatrix"] = timer() - t0

    watchlist = [(testing_dmat_part, "eval"), (training_dmat_part, "train")]
    #     hard_code: cpu_params cannot be an input, cause values are not homogeneous
    xgb_params = {
        "objective": "binary:logistic",
        "tree_method": "hist",
        "max_depth": 1,
        "nthread": 56,
        "eta": 0.1,
        "silent": 1,
        "subsample": 0.5,
        "colsample_bytree": 0.05,
        "eval_metric": "auc",
    }

    t0 = timer()
    model = xgboost.train(
        xgb_params,
        dtrain=training_dmat_part,
        num_boost_round=10000,
        evals=watchlist,
        early_stopping_rounds=30,
        maximize=True,
        verbose_eval=1000,
    )
    ml_times["t_train"] = timer() - t0

    t0 = timer()
    yp = model.predict(testing_dmat_part)
    ml_times["t_inference"] = timer() - t0

    ml_scores["mse"] = mse(y_test, yp)
    ml_scores["cod"] = cod(y_test, yp)

    ml_times["t_ml"] += ml_times["t_train"] + ml_times["t_inference"]

    return ml_scores, ml_times

In [9]:
@workflow
def santander_ml_wf(
    filename: str = DATASET_PATH,
    columns_names: typing.List[str] = COLUMNS_NAMES,
    columns_types: typing.List[str] = COLUMNS_TYPES,
    etl_keys: typing.List[str] = ETL_KEYS,
    target: str = 'target',
    ml_keys: typing.List[str] = ML_KEYS,
    ml_score_keys: typing.List[str] = ML_SCORE_KEYS
) -> (
    typing.Dict[str, float],
     typing.Dict[str, float]
):
    df, etl_times = etl_pandas(filename=filename, columns_names=columns_names,
                               columns_types=columns_types, etl_keys=etl_keys)
    return ml(ml_data=df, target=target, ml_keys=ml_keys, ml_score_keys=ml_score_keys)

In [10]:
if __name__ == "__main__":
    start = time.time()
    print(santander_ml_wf())
    print("--- %s seconds ---" % (time.time() - start))

{"asctime": "2021-08-10 11:49:06,773", "name": "flytekit", "levelname": "INFO", "message": "Invoking __main__.etl_pandas with inputs: {'filename': '/localdisk/tvlasova/datasets/santander_train.csv', 'columns_names': ['ID_code', 'target', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5', 'var_6', 'var_7', 'var_8', 'var_9', 'var_10', 'var_11', 'var_12', 'var_13', 'var_14', 'var_15', 'var_16', 'var_17', 'var_18', 'var_19', 'var_20', 'var_21', 'var_22', 'var_23', 'var_24', 'var_25', 'var_26', 'var_27', 'var_28', 'var_29', 'var_30', 'var_31', 'var_32', 'var_33', 'var_34', 'var_35', 'var_36', 'var_37', 'var_38', 'var_39', 'var_40', 'var_41', 'var_42', 'var_43', 'var_44', 'var_45', 'var_46', 'var_47', 'var_48', 'var_49', 'var_50', 'var_51', 'var_52', 'var_53', 'var_54', 'var_55', 'var_56', 'var_57', 'var_58', 'var_59', 'var_60', 'var_61', 'var_62', 'var_63', 'var_64', 'var_65', 'var_66', 'var_67', 'var_68', 'var_69', 'var_70', 'var_71', 'var_72', 'var_73', 'var_74', 'var_75', 'var_76', 'v

{"asctime": "2021-08-10 11:50:19,118", "name": "flytekit", "levelname": "INFO", "message": "Task executed successfully in user level, outputs: (        target    var_0   var_1    var_2   var_3    var_4    var_5   var_6  \\\n0            0   8.9255 -6.7863  11.9081  5.0930  11.4607  -9.2834  5.1187   \n1            0  11.5006 -4.1473  13.8588  5.3890  12.3622   7.0433  5.6208   \n2            0   8.6093 -2.7457  12.0805  7.8928  10.5825  -9.0837  6.9427   \n3            0  11.0604 -2.1518   8.9522  7.1957  12.5846  -1.8361  5.8428   \n4            0   9.8369 -1.4834  12.8746  6.6375  12.2772   2.4486  5.9405   \n...        ...      ...     ...      ...     ...      ...      ...     ...   \n199995       0  11.4880 -0.4956   8.2622  3.5142  10.3404  11.6081  5.6709   \n199996       0   4.9149 -2.4484  16.7052  6.6345   8.3096 -10.5628  5.8802   \n199997       0  11.2232 -5.0518  10.5127  5.6456   9.3410  -5.4086  4.5555   \n199998       0   9.7148 -8.6098  13.6104  5.7930  12.5173   0.533

INFO:flytekit:Invoking __main__.ml with inputs: {'ml_data':         target    var_0   var_1    var_2   var_3    var_4    var_5   var_6  \
0            0   8.9255 -6.7863  11.9081  5.0930  11.4607  -9.2834  5.1187   
1            0  11.5006 -4.1473  13.8588  5.3890  12.3622   7.0433  5.6208   
2            0   8.6093 -2.7457  12.0805  7.8928  10.5825  -9.0837  6.9427   
3            0  11.0604 -2.1518   8.9522  7.1957  12.5846  -1.8361  5.8428   
4            0   9.8369 -1.4834  12.8746  6.6375  12.2772   2.4486  5.9405   
...        ...      ...     ...      ...     ...      ...      ...     ...   
199995       0  11.4880 -0.4956   8.2622  3.5142  10.3404  11.6081  5.6709   
199996       0   4.9149 -2.4484  16.7052  6.6345   8.3096 -10.5628  5.8802   
199997       0  11.2232 -5.0518  10.5127  5.6456   9.3410  -5.4086  4.5555   
199998       0   9.7148 -8.6098  13.6104  5.7930  12.5173   0.5339  6.0479   
199999       0  10.8762 -5.7105  12.1183  8.0328  11.5577   0.3488  5.2839   

   

Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[0]	eval-auc:0.53415	train-auc:0.52877
[1000]	eval-auc:0.89019	train-auc:0.90135
[2000]	eval-auc:0.90305	train-auc:0.91705
[3000]	eval-auc:0.90846	train-auc:0.92279
[4000]	eval-auc:0.91038	train-auc:0.92592
[4347]	eval-auc:0.91130	train-auc:0.92684


{"asctime": "2021-08-10 11:52:49,566", "name": "flytekit", "levelname": "INFO", "message": "Task executed successfully in user level, outputs: ({'mse_mean': 0.0, 'cod_mean': 0.0, 'mse_dev': 0.0, 'mse': 0.054204980425112737, 'cod': 0.40511252848379153}, {'t_train_test_split': 0.00017330492846667767, 't_dmatrix': 1.4182632009033114, 't_training': 0.0, 't_infer': 0.0, 't_ml': 142.77928009605967, 't_train': 142.75993236806244, 't_inference': 0.019347727997228503})"}
INFO:flytekit:Task executed successfully in user level, outputs: ({'mse_mean': 0.0, 'cod_mean': 0.0, 'mse_dev': 0.0, 'mse': 0.054204980425112737, 'cod': 0.40511252848379153}, {'t_train_test_split': 0.00017330492846667767, 't_dmatrix': 1.4182632009033114, 't_training': 0.0, 't_infer': 0.0, 't_ml': 142.77928009605967, 't_train': 142.75993236806244, 't_inference': 0.019347727997228503})


DefaultNamedTupleOutput(o0={'mse_mean': 0.0, 'cod_mean': 0.0, 'mse_dev': 0.0, 'mse': 0.054204980425112737, 'cod': 0.40511252848379153}, o1={'t_train_test_split': 0.00017330492846667767, 't_dmatrix': 1.4182632009033114, 't_training': 0.0, 't_infer': 0.0, 't_ml': 142.77928009605967, 't_train': 142.75993236806244, 't_inference': 0.019347727997228503})
--- 222.80751156806946 seconds ---


## Workflow consisting from more detalized tasks

In [8]:
@task
def load_data_pandas(
    filename: str,
    columns_names: typing.List[str],
    columns_types: typing.List[str],
    use_gzip: bool = False
) -> pd.DataFrame:
    types = None
    if columns_types:
        types = {columns_names[i]: columns_types[i] for i in range(len(columns_names))}
    return pd.read_csv(
        filename,
        names=columns_names,
        dtype=types,
        header=0,
        compression="gzip" if use_gzip else None
    )

{"asctime": "2021-08-13 11:28:42,878", "name": "flytekit", "levelname": "DEBUG", "message": "Task returns unnamed native tuple <class 'pandas.core.frame.DataFrame'>"}


In [15]:
@task
def mse(
    y_test: pd.DataFrame,
    y_pred: pd.DataFrame
) -> float:
    return ((y_test - y_pred) ** 2).mean()


@task
def cod(
    y_test: pd.DataFrame,
    y_pred: pd.DataFrame
) -> float:
    y_bar = y_test.mean()
    total = ((y_test - y_bar) ** 2).sum()
    residuals = ((y_test - y_pred) ** 2).sum()
    return 1 - (residuals / total)

{"asctime": "2021-08-13 11:47:01,431", "name": "flytekit", "levelname": "DEBUG", "message": "Task returns unnamed native tuple <class 'float'>"}
DEBUG:flytekit:Task returns unnamed native tuple <class 'float'>
{"asctime": "2021-08-13 11:47:01,435", "name": "flytekit", "levelname": "DEBUG", "message": "Task returns unnamed native tuple <class 'float'>"}
DEBUG:flytekit:Task returns unnamed native tuple <class 'float'>


In [10]:
@task
def split_step(
    data: pd.DataFrame,
    target: str
) -> (
    pd.DataFrame,
    pd.DataFrame,
    pd.DataFrame,
    pd.DataFrame,
    float
):
    t0 = timer()
    train, valid = data[:-10000], data[-10000:]
    split_time = timer() - t0

    x_train = train.drop([target], axis=1)

    y_train = train[target]

    x_test = valid.drop([target], axis=1)

    y_test = valid[target]

    return x_train, y_train, x_test, y_test, split_time

In [11]:
@dynamic
def etl_pandas(
    filename: str,
    columns_names: typing.List[str],
    columns_types: typing.List[str],
    etl_keys: typing.List[str]
) -> (
    pd.DataFrame,
    typing.Dict[str, float]
):
    etl_times = {key: 0.0 for key in etl_keys}

    t0 = timer()
    train_pd = load_data_pandas(
        filename=filename,
        columns_names=columns_names,
        columns_types=columns_types,
        use_gzip=filename.endswith(".gz")
    )
    etl_times["t_readcsv"] = timer() - t0

    t_etl_begin = timer()

    for i in range(200):
        col = "var_%d" % i
        var_count = train_pd.groupby(col).agg({col: "count"})

        var_count.columns = ["%s_count" % col]
        var_count = var_count.reset_index()

        train_pd = train_pd.merge(var_count, on=col, how="left")

    for i in range(200):
        col = "var_%d" % i

        mask = train_pd["%s_count" % col] > 1
        train_pd.loc[mask, "%s_gt1" % col] = train_pd.loc[mask, col]

    train_pd = train_pd.drop(["ID_code"], axis=1)
    etl_times["t_etl"] = timer() - t_etl_begin

    return train_pd, etl_times

In [16]:
@dynamic
def ml(
    ml_data: pd.DataFrame,
    target: str,
    ml_keys: typing.List[str],
    ml_score_keys: typing.List[str] 
) -> (
    typing.Dict[str, float],
     typing.Dict[str, float]
):

    ml_times = {key: 0.0 for key in ml_keys}
    ml_scores = {key: 0.0 for key in ml_score_keys}

    x_train, y_train, x_test, y_test, ml_times["t_train_test_split"] = split_step(
        data=ml_data, target=target
    )

    t0 = timer()
    training_dmat_part = xgboost.DMatrix(data=x_train, label=y_train)
    testing_dmat_part = xgboost.DMatrix(data=x_test, label=y_test)
    ml_times["t_dmatrix"] = timer() - t0

    watchlist = [(testing_dmat_part, "eval"), (training_dmat_part, "train")]
    #     hard_code: cpu_params cannot be an input, cause values are not homogeneous
    xgb_params = {
        "objective": "binary:logistic",
        "tree_method": "hist",
        "max_depth": 1,
        "nthread": 56,
        "eta": 0.1,
        "silent": 1,
        "subsample": 0.5,
        "colsample_bytree": 0.05,
        "eval_metric": "auc",
    }

    t0 = timer()
    model = xgboost.train(
        xgb_params,
        dtrain=training_dmat_part,
        num_boost_round=10000,
        evals=watchlist,
        early_stopping_rounds=30,
        maximize=True,
        verbose_eval=1000,
    )
    ml_times["t_train"] = timer() - t0

    t0 = timer()
    yp = pd.DataFrame({"preds": model.predict(testing_dmat_part)})
    ml_times["t_inference"] = timer() - t0

    ml_scores["mse"] = mse(y_test=y_test, y_pred=yp)
    ml_scores["cod"] = cod(y_test=y_test, y_pred=yp)

    ml_times["t_ml"] += ml_times["t_train"] + ml_times["t_inference"]

    return ml_scores, ml_times

In [17]:
@workflow
def santander_ml_wf(
    filename: str = DATASET_PATH,
    columns_names: typing.List[str] = COLUMNS_NAMES,
    columns_types: typing.List[str] = COLUMNS_TYPES,
    etl_keys: typing.List[str] = ETL_KEYS,
    target: str = 'target',
    ml_keys: typing.List[str] = ML_KEYS,
    ml_score_keys: typing.List[str] = ML_SCORE_KEYS
) -> (
    typing.Dict[str, float],
     typing.Dict[str, float]
):
    df, etl_times = etl_pandas(filename=filename, columns_names=columns_names,
                               columns_types=columns_types, etl_keys=etl_keys)
    return ml(ml_data=df, target=target, ml_keys=ml_keys, ml_score_keys=ml_score_keys)

In [18]:
if __name__ == "__main__":
    start = time.time()
    print(santander_ml_wf())
    print("--- %s seconds ---" % (time.time() - start))

{"asctime": "2021-08-13 11:48:45,320", "name": "flytekit", "levelname": "INFO", "message": "Invoking __main__.etl_pandas with inputs: {'filename': '/localdisk/tvlasova/datasets/santander_train.csv', 'columns_names': ['ID_code', 'target', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5', 'var_6', 'var_7', 'var_8', 'var_9', 'var_10', 'var_11', 'var_12', 'var_13', 'var_14', 'var_15', 'var_16', 'var_17', 'var_18', 'var_19', 'var_20', 'var_21', 'var_22', 'var_23', 'var_24', 'var_25', 'var_26', 'var_27', 'var_28', 'var_29', 'var_30', 'var_31', 'var_32', 'var_33', 'var_34', 'var_35', 'var_36', 'var_37', 'var_38', 'var_39', 'var_40', 'var_41', 'var_42', 'var_43', 'var_44', 'var_45', 'var_46', 'var_47', 'var_48', 'var_49', 'var_50', 'var_51', 'var_52', 'var_53', 'var_54', 'var_55', 'var_56', 'var_57', 'var_58', 'var_59', 'var_60', 'var_61', 'var_62', 'var_63', 'var_64', 'var_65', 'var_66', 'var_67', 'var_68', 'var_69', 'var_70', 'var_71', 'var_72', 'var_73', 'var_74', 'var_75', 'var_76', 'v

{"asctime": "2021-08-13 11:48:45,323", "name": "flytekit", "levelname": "INFO", "message": "Executing Dynamic workflow, using raw inputs"}
INFO:flytekit:Executing Dynamic workflow, using raw inputs
{"asctime": "2021-08-13 11:49:57,910", "name": "flytekit", "levelname": "INFO", "message": "Task executed successfully in user level, outputs: (        target    var_0   var_1    var_2   var_3    var_4    var_5   var_6  \\\n0            0   8.9255 -6.7863  11.9081  5.0930  11.4607  -9.2834  5.1187   \n1            0  11.5006 -4.1473  13.8588  5.3890  12.3622   7.0433  5.6208   \n2            0   8.6093 -2.7457  12.0805  7.8928  10.5825  -9.0837  6.9427   \n3            0  11.0604 -2.1518   8.9522  7.1957  12.5846  -1.8361  5.8428   \n4            0   9.8369 -1.4834  12.8746  6.6375  12.2772   2.4486  5.9405   \n...        ...      ...     ...      ...     ...      ...      ...     ...   \n199995       0  11.4880 -0.4956   8.2622  3.5142  10.3404  11.6081  5.6709   \n199996       0   4.9149 -

INFO:flytekit:Invoking __main__.ml with inputs: {'ml_data':         target    var_0   var_1    var_2   var_3    var_4    var_5   var_6  \
0            0   8.9255 -6.7863  11.9081  5.0930  11.4607  -9.2834  5.1187   
1            0  11.5006 -4.1473  13.8588  5.3890  12.3622   7.0433  5.6208   
2            0   8.6093 -2.7457  12.0805  7.8928  10.5825  -9.0837  6.9427   
3            0  11.0604 -2.1518   8.9522  7.1957  12.5846  -1.8361  5.8428   
4            0   9.8369 -1.4834  12.8746  6.6375  12.2772   2.4486  5.9405   
...        ...      ...     ...      ...     ...      ...      ...     ...   
199995       0  11.4880 -0.4956   8.2622  3.5142  10.3404  11.6081  5.6709   
199996       0   4.9149 -2.4484  16.7052  6.6345   8.3096 -10.5628  5.8802   
199997       0  11.2232 -5.0518  10.5127  5.6456   9.3410  -5.4086  4.5555   
199998       0   9.7148 -8.6098  13.6104  5.7930  12.5173   0.5339  6.0479   
199999       0  10.8762 -5.7105  12.1183  8.0328  11.5577   0.3488  5.2839   

   

Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[0]	eval-auc:0.53415	train-auc:0.52877
[1000]	eval-auc:0.89019	train-auc:0.90135
[2000]	eval-auc:0.90305	train-auc:0.91705
[3000]	eval-auc:0.90846	train-auc:0.92279
[4000]	eval-auc:0.91038	train-auc:0.92592
[4348]	eval-auc:0.91127	train-auc:0.92682


{"asctime": "2021-08-13 11:52:22,860", "name": "flytekit", "levelname": "INFO", "message": "Task executed successfully in user level, outputs: ({'mse_mean': 0.0, 'cod_mean': 0.0, 'mse_dev': 0.0, 'mse': preds    NaN\n190000   NaN\n190001   NaN\n190002   NaN\n190003   NaN\n          ..\n199995   NaN\n199996   NaN\n199997   NaN\n199998   NaN\n199999   NaN\nLength: 10001, dtype: float64, 'cod': preds     1.0\n190000    1.0\n190001    1.0\n190002    1.0\n190003    1.0\n         ... \n199995    1.0\n199996    1.0\n199997    1.0\n199998    1.0\n199999    1.0\nLength: 10001, dtype: float64}, {'t_train_test_split': 8.55959951877594e-05, 't_dmatrix': 1.1444270720239729, 't_training': 0.0, 't_infer': 0.0, 't_ml': 134.85849454486743, 't_train': 134.8375682339538, 't_inference': 0.02092631091363728})"}
INFO:flytekit:Task executed successfully in user level, outputs: ({'mse_mean': 0.0, 'cod_mean': 0.0, 'mse_dev': 0.0, 'mse': preds    NaN
190000   NaN
190001   NaN
190002   NaN
190003   NaN
          

DefaultNamedTupleOutput(o0={'mse_mean': 0.0, 'cod_mean': 0.0, 'mse_dev': 0.0, 'mse': preds    NaN
190000   NaN
190001   NaN
190002   NaN
190003   NaN
          ..
199995   NaN
199996   NaN
199997   NaN
199998   NaN
199999   NaN
Length: 10001, dtype: float64, 'cod': preds     1.0
190000    1.0
190001    1.0
190002    1.0
190003    1.0
         ... 
199995    1.0
199996    1.0
199997    1.0
199998    1.0
199999    1.0
Length: 10001, dtype: float64}, o1={'t_train_test_split': 8.55959951877594e-05, 't_dmatrix': 1.1444270720239729, 't_training': 0.0, 't_infer': 0.0, 't_ml': 134.85849454486743, 't_train': 134.8375682339538, 't_inference': 0.02092631091363728})
--- 217.5518991947174 seconds ---
