In [1]:
import time
from timeit import default_timer as timer

import pandas as pd
from collections import OrderedDict
from dataclasses import dataclass
import typing
from flytekit import Resources, task, workflow, dynamic
from flytekit.types.file import FlyteFile
from flytekit.types.schema import FlyteSchema
import numpy as np
import sklearn.linear_model as lm
from sklearn.model_selection import train_test_split
from sklearn import config_context

In [2]:
import warnings

warnings.filterwarnings("ignore")

In [13]:
# utils

def mse(y_test, y_pred):
    return ((y_test - y_pred) ** 2).mean()


def cod(y_test, y_pred):
    y_bar = y_test.mean()
    total = ((y_test - y_bar) ** 2).sum()
    residuals = ((y_test - y_pred) ** 2).sum()
    return 1 - (residuals / total)


def split(X, y, test_size=0.1, stratify=None, random_state=None):
    t0 = timer()
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, stratify=stratify, random_state=random_state
    )
    split_time = timer() - t0

    return (X_train, y_train, X_test, y_test), split_time

In [3]:
DATASET_PATH = "/localdisk/tvlasova/datasets/census.csv"

COLS = [
        "YEAR",
        "DATANUM",
        "SERIAL",
        "CBSERIAL",
        "HHWT",
        "CPI99",
        "GQ",
        "PERNUM",
        "SEX",
        "AGE",
        "INCTOT",
        "EDUC",
        "EDUCD",
        "EDUC_HEAD",
        "EDUC_POP",
        "EDUC_MOM",
        "EDUCD_MOM2",
        "EDUCD_POP2",
        "INCTOT_MOM",
        "INCTOT_POP",
        "INCTOT_MOM2",
        "INCTOT_POP2",
        "INCTOT_HEAD",
        "SEX_HEAD",
    ]

COLUMNS_TYPES = [
        "int",
        "int",
        "int",
        "float",
        "int",
        "float",
        "int",
        "float",
        "int",
        "int",
        "int",
        "int",
        "int",
        "int",
        "int",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
        "float",
    ]

# X = OrderedDict((zip(COLS, list(map(eval, COLUMNS_TYPES)))))
# Y = OrderedDict({"EDUC": X.pop("EDUC")})
# Y

In [4]:
# ML specific
N_RUNS = 50
TEST_SIZE = 0.1
RANDOM_STATE = 777

ML_KEYS = ["t_train_test_split", "t_train", "t_inference", "t_ml"]
ML_SCORE_KEYS = ["mse_mean", "cod_mean", "mse_dev"]

In [8]:
@task
def feature_eng_task(
    data: FlyteFile[typing.TypeVar("csv")],
    cols: typing.List[str]
) -> (
    pd.DataFrame
):

    df =  pd.read_csv(data)[cols]

    df = df[df["INCTOT"] != 9999999]
    df = df[df["EDUC"] != -1]
    df = df[df["EDUCD"] != -1]

    df["INCTOT"] = df["INCTOT"] * df["CPI99"]

    for column in cols:
        df[column] = df[column].fillna(-1)
        df[column] = df[column].astype("float64")

    return df

{"asctime": "2021-08-11 14:06:52,456", "name": "flytekit", "levelname": "DEBUG", "message": "Task returns unnamed native tuple <class 'pandas.core.frame.DataFrame'>"}


In [9]:
@task
def ml_task(
    df: pd.DataFrame,
    random_state: int,
    n_runs: int,
    test_size: float,
    ml_keys: typing.List[str],
    ml_score_keys: typing.List[str]
) -> (
    typing.Dict[str, float],
    typing.Dict[str, float]
):

    # Fetch the input and output data from train dataset
    y = np.ascontiguousarray(df["EDUC"], dtype=np.float64)
    X = np.ascontiguousarray(df.drop(columns=["EDUC", "CPI99"]), dtype=np.float64)
    
    clf = lm.Ridge()
    
    mse_values, cod_values = [], []
    ml_times = {key: 0.0 for key in ml_keys}
    ml_scores = {key: 0.0 for key in ml_score_keys}

    print("ML runs: ", n_runs)
    for i in range(n_runs):
        (X_train, y_train, X_test, y_test), split_time = split(
            X, y, test_size=test_size, random_state=random_state)
        ml_times["t_train_test_split"] += split_time
        random_state += 777

        t0 = timer()
        with config_context(assume_finite=True):
            model = clf.fit(X_train, y_train)
        ml_times["t_train"] += timer() - t0

        t0 = timer()
        y_pred = model.predict(X_test)
        ml_times["t_inference"] += timer() - t0

        mse_values.append(mse(y_test, y_pred))
        cod_values.append(cod(y_test, y_pred))

    ml_times["t_ml"] += ml_times["t_train"] + ml_times["t_inference"]

    ml_scores["mse_mean"] = sum(mse_values) / len(mse_values)
    ml_scores["cod_mean"] = sum(cod_values) / len(cod_values)
    ml_scores["mse_dev"] = pow(
        sum([(mse_value - ml_scores["mse_mean"]) ** 2 for mse_value in mse_values])
        / (len(mse_values) - 1),
        0.5,
    )
    ml_scores["cod_dev"] = pow(
        sum([(cod_value - ml_scores["cod_mean"]) ** 2 for cod_value in cod_values])
        / (len(cod_values) - 1),
        0.5,
    )

    return ml_scores, ml_times

In [10]:
@workflow
def census_bench_wf(
    dataset: FlyteFile["csv"] = DATASET_PATH,
    cols: typing.List[str] = COLS,
    random_state: int = RANDOM_STATE,
    n_runs: int = N_RUNS,
    test_size: float = TEST_SIZE,
    ml_keys: typing.List[str] = ML_KEYS,
    ml_score_keys: typing.List[str] = ML_SCORE_KEYS
) -> (
    typing.Dict[str, float],
    typing.Dict[str, float]
):
    df = feature_eng_task(data=dataset, cols=cols)
    ml_scores, ml_times = ml_task(df=df, random_state=random_state, n_runs=n_runs, test_size=test_size,
                                  ml_keys=ml_keys, ml_score_keys=ml_score_keys)
    return ml_scores, ml_times

In [18]:
if __name__ == "__main__":
    start = time.time()
    print(census_bench_wf())
    print("--- %s seconds ---" % (time.time() - start))

{"asctime": "2021-08-10 11:01:51,102", "name": "flytekit", "levelname": "INFO", "message": "Invoking __main__.feature_eng_task with inputs: {'data': /tmp/flyte/20210810_104954/mock_remote/fd6269515537e60ac9321275db9c9722/census.csv, 'cols': ['YEAR', 'DATANUM', 'SERIAL', 'CBSERIAL', 'HHWT', 'CPI99', 'GQ', 'PERNUM', 'SEX', 'AGE', 'INCTOT', 'EDUC', 'EDUCD', 'EDUC_HEAD', 'EDUC_POP', 'EDUC_MOM', 'EDUCD_MOM2', 'EDUCD_POP2', 'INCTOT_MOM', 'INCTOT_POP', 'INCTOT_MOM2', 'INCTOT_POP2', 'INCTOT_HEAD', 'SEX_HEAD']}"}
INFO:flytekit:Invoking __main__.feature_eng_task with inputs: {'data': /tmp/flyte/20210810_104954/mock_remote/fd6269515537e60ac9321275db9c9722/census.csv, 'cols': ['YEAR', 'DATANUM', 'SERIAL', 'CBSERIAL', 'HHWT', 'CPI99', 'GQ', 'PERNUM', 'SEX', 'AGE', 'INCTOT', 'EDUC', 'EDUCD', 'EDUC_HEAD', 'EDUC_POP', 'EDUC_MOM', 'EDUCD_MOM2', 'EDUCD_POP2', 'INCTOT_MOM', 'INCTOT_POP', 'INCTOT_MOM2', 'INCTOT_POP2', 'INCTOT_HEAD', 'SEX_HEAD']}
{"asctime": "2021-08-10 11:02:46,183", "name": "flytekit", "

INFO:flytekit:Invoking __main__.ml_task with inputs: {'df':             YEAR  DATANUM     SERIAL   CBSERIAL   HHWT  CPI99   GQ  PERNUM  \
0         1970.0      2.0        1.0       -1.0  100.0  4.540  1.0     1.0   
1         1970.0      2.0        1.0       -1.0  100.0  4.540  1.0     2.0   
2         1970.0      2.0        2.0       -1.0  100.0  4.540  1.0     1.0   
3         1970.0      2.0        2.0       -1.0  100.0  4.540  1.0     2.0   
4         1970.0      2.0        4.0       -1.0  100.0  4.540  1.0     1.0   
...          ...      ...        ...        ...    ...    ...  ...     ...   
21721915  2010.0      1.0  1397787.0  1413806.0   18.0  0.764  1.0     1.0   
21721916  2010.0      1.0  1397788.0  1414099.0   57.0  0.764  1.0     1.0   
21721917  2010.0      1.0  1397788.0  1414099.0   57.0  0.764  1.0     2.0   
21721920  2010.0      1.0  1397789.0  1414268.0  101.0  0.764  1.0     1.0   
21721921  2010.0      1.0  1397789.0  1414268.0  101.0  0.764  1.0     2.0   

   

ML runs:  50


{"asctime": "2021-08-10 11:07:39,080", "name": "flytekit", "levelname": "INFO", "message": "Task executed successfully in user level, outputs: ({'mse_mean': 0.03256456908804994, 'cod_mean': 0.9953675334603814, 'mse_dev': 4.179940420229173e-05, 'cod_dev': 5.869227912341005e-06}, {'t_train_test_split': 141.7956369665917, 't_train': 137.82118896697648, 't_inference': 2.0071465999353677, 't_ml': 139.82833556691185})"}
INFO:flytekit:Task executed successfully in user level, outputs: ({'mse_mean': 0.03256456908804994, 'cod_mean': 0.9953675334603814, 'mse_dev': 4.179940420229173e-05, 'cod_dev': 5.869227912341005e-06}, {'t_train_test_split': 141.7956369665917, 't_train': 137.82118896697648, 't_inference': 2.0071465999353677, 't_ml': 139.82833556691185})


{'mse_mean': 0.03256456908804994, 'cod_mean': 0.9953675334603814, 'mse_dev': 4.179940420229173e-05, 'cod_dev': 5.869227912341005e-06}
{'t_train_test_split': 141.7956369665917, 't_train': 137.82118896697648, 't_inference': 2.0071465999353677, 't_ml': 139.82833556691185}
--- 349.6951377391815 seconds ---


## Workflow consisting from more detalized tasks

In [5]:
FEATURES = OrderedDict((zip(COLS, list(map(eval, COLUMNS_TYPES)))))
TARGET = OrderedDict({"EDUC": FEATURES.pop("EDUC")})

In [14]:
# utils

@task
def mse(
    y_test: pd.DataFrame,
    y_pred: pd.DataFrame
) -> float:
    return ((y_test['EDUC'] - y_pred['EDUC']) ** 2).mean()


# @workflow
# def mse_wf(
#     y_test: pd.DataFrame,
#     y_pred: pd.DataFrame
# ) -> float:
#     return mse(y_test=y_test, y_pred=y_pred)

{"asctime": "2021-08-12 12:33:43,558", "name": "flytekit", "levelname": "DEBUG", "message": "Task returns unnamed native tuple <class 'float'>"}
DEBUG:flytekit:Task returns unnamed native tuple <class 'float'>


In [15]:
@task
def cod(
    y_test: pd.DataFrame,
    y_pred: pd.DataFrame
) -> float:
    y_bar = y_test.mean()
    total = ((y_test - y_bar) ** 2).sum()
    residuals = ((y_test - y_pred) ** 2).sum()
    return 1 - (residuals / total)


# @workflow
# def cod_wf(
#     y_test: pd.DataFrame,
#     y_pred: pd.DataFrame
# ) -> float:
#     return cod(y_test=y_test, y_pred=y_pred)

{"asctime": "2021-08-12 12:33:47,751", "name": "flytekit", "levelname": "DEBUG", "message": "Task returns unnamed native tuple <class 'float'>"}
DEBUG:flytekit:Task returns unnamed native tuple <class 'float'>


In [8]:
@task
def feature_eng_task(
    data: FlyteFile[typing.TypeVar("csv")],
    cols: typing.List[str]
) -> (
    pd.DataFrame
):

    df =  pd.read_csv(data)[cols]

    df = df[df["INCTOT"] != 9999999]
    df = df[df["EDUC"] != -1]
    df = df[df["EDUCD"] != -1]

    df["INCTOT"] = df["INCTOT"] * df["CPI99"]

    for column in cols:
        df[column] = df[column].fillna(-1)
        df[column] = df[column].astype("float64")

    return df

{"asctime": "2021-08-12 12:30:08,179", "name": "flytekit", "levelname": "DEBUG", "message": "Task returns unnamed native tuple <class 'pandas.core.frame.DataFrame'>"}


In [16]:
# @task
# @dynamic
@task
def ml_task(
    df: pd.DataFrame,
    random_state: int,
    n_runs: int,
    test_size: float,
    ml_keys: typing.List[str],
    ml_score_keys: typing.List[str]
) -> (
    typing.Dict[str, float],
    typing.Dict[str, float]
):

    # Fetch the input and output data from train dataset
#     y = np.ascontiguousarray(df["EDUC"], dtype=np.float64)
#     X = np.ascontiguousarray(df.drop(columns=["EDUC", "CPI99"]), dtype=np.float64)
    y = df["EDUC"]
    X = df.drop(columns=["EDUC", "CPI99"])
    
    clf = lm.Ridge()
    
    mse_values, cod_values = [], []
    ml_times = {key: 0.0 for key in ml_keys}
    ml_scores = {key: 0.0 for key in ml_score_keys}

    print("ML runs: ", n_runs)
    for i in range(n_runs):
        (X_train, y_train, X_test, y_test), split_time = split(X=X, y=y)
        y_test = pd.DataFrame({"EDUC": y_test})
        ml_times["t_train_test_split"] += split_time
        random_state += 777

        t0 = timer()
        with config_context(assume_finite=True):
            model = clf.fit(X_train, y_train)
        ml_times["t_train"] += timer() - t0

        t0 = timer()
        y_pred = pd.DataFrame({"EDUC": model.predict(X_test)})
        ml_times["t_inference"] += timer() - t0

        mse_values.append(mse(y_test=y_test, y_pred=y_pred))
        cod_values.append(cod(y_test=y_test, y_pred=y_pred))

    ml_times["t_ml"] += ml_times["t_train"] + ml_times["t_inference"]

    ml_scores["mse_mean"] = sum(mse_values) / len(mse_values)
    ml_scores["cod_mean"] = sum(cod_values) / len(cod_values)
    ml_scores["mse_dev"] = pow(
        sum([(mse_value - ml_scores["mse_mean"]) ** 2 for mse_value in mse_values])
        / (len(mse_values) - 1),
        0.5,
    )
    ml_scores["cod_dev"] = pow(
        sum([(cod_value - ml_scores["cod_mean"]) ** 2 for cod_value in cod_values])
        / (len(cod_values) - 1),
        0.5,
    )

    return ml_scores, ml_times

In [11]:
@workflow
def census_bench_wf(
    dataset: FlyteFile["csv"] = DATASET_PATH,
    cols: typing.List[str] = COLS,
    random_state: int = RANDOM_STATE,
    n_runs: int = N_RUNS,
    test_size: float = TEST_SIZE,
    ml_keys: typing.List[str] = ML_KEYS,
    ml_score_keys: typing.List[str] = ML_SCORE_KEYS
) -> (
    typing.Dict[str, float],
    typing.Dict[str, float]
):
    df = feature_eng_task(data=dataset, cols=cols)
    ml_scores, ml_times = ml_task(df=df, random_state=random_state, n_runs=n_runs, test_size=test_size,
                                  ml_keys=ml_keys, ml_score_keys=ml_score_keys)
    return ml_scores, ml_times

In [17]:
# how workflow output looks like if ml function is decorated as @task
if __name__ == "__main__":
    start = time.time()
    print(census_bench_wf())
    print("--- %s seconds ---" % (time.time() - start))

{"asctime": "2021-08-12 12:34:09,372", "name": "flytekit", "levelname": "INFO", "message": "Invoking __main__.feature_eng_task with inputs: {'data': /tmp/flyte/20210812_122914/mock_remote/5ee1fabb5a7f09c14da3cf4a9257aed8/census.csv, 'cols': ['YEAR', 'DATANUM', 'SERIAL', 'CBSERIAL', 'HHWT', 'CPI99', 'GQ', 'PERNUM', 'SEX', 'AGE', 'INCTOT', 'EDUC', 'EDUCD', 'EDUC_HEAD', 'EDUC_POP', 'EDUC_MOM', 'EDUCD_MOM2', 'EDUCD_POP2', 'INCTOT_MOM', 'INCTOT_POP', 'INCTOT_MOM2', 'INCTOT_POP2', 'INCTOT_HEAD', 'SEX_HEAD']}"}
INFO:flytekit:Invoking __main__.feature_eng_task with inputs: {'data': /tmp/flyte/20210812_122914/mock_remote/5ee1fabb5a7f09c14da3cf4a9257aed8/census.csv, 'cols': ['YEAR', 'DATANUM', 'SERIAL', 'CBSERIAL', 'HHWT', 'CPI99', 'GQ', 'PERNUM', 'SEX', 'AGE', 'INCTOT', 'EDUC', 'EDUCD', 'EDUC_HEAD', 'EDUC_POP', 'EDUC_MOM', 'EDUCD_MOM2', 'EDUCD_POP2', 'INCTOT_MOM', 'INCTOT_POP', 'INCTOT_MOM2', 'INCTOT_POP2', 'INCTOT_HEAD', 'SEX_HEAD']}
{"asctime": "2021-08-12 12:35:05,913", "name": "flytekit", "

INFO:flytekit:Invoking __main__.ml_task with inputs: {'df':             YEAR  DATANUM     SERIAL   CBSERIAL   HHWT  CPI99   GQ  PERNUM  \
0         1970.0      2.0        1.0       -1.0  100.0  4.540  1.0     1.0   
1         1970.0      2.0        1.0       -1.0  100.0  4.540  1.0     2.0   
2         1970.0      2.0        2.0       -1.0  100.0  4.540  1.0     1.0   
3         1970.0      2.0        2.0       -1.0  100.0  4.540  1.0     2.0   
4         1970.0      2.0        4.0       -1.0  100.0  4.540  1.0     1.0   
...          ...      ...        ...        ...    ...    ...  ...     ...   
21721915  2010.0      1.0  1397787.0  1413806.0   18.0  0.764  1.0     1.0   
21721916  2010.0      1.0  1397788.0  1414099.0   57.0  0.764  1.0     1.0   
21721917  2010.0      1.0  1397788.0  1414099.0   57.0  0.764  1.0     2.0   
21721920  2010.0      1.0  1397789.0  1414268.0  101.0  0.764  1.0     1.0   
21721921  2010.0      1.0  1397789.0  1414268.0  101.0  0.764  1.0     2.0   

   

ML runs:  3


{"asctime": "2021-08-12 12:35:25,899", "name": "flytekit", "levelname": "INFO", "message": "Invoking __main__.mse with inputs: {'y_test':           EDUC\n13002284  10.0\n13374354   6.0\n8634381    6.0\n5533081    5.0\n9306618    3.0\n...        ...\n16329469   3.0\n9627701   11.0\n5984924    2.0\n13312613   7.0\n5466884    2.0\n\n[1683360 rows x 1 columns], 'y_pred':               EDUC\n0        10.013471\n1         6.073498\n2         5.880829\n3         4.936109\n4         2.829866\n...            ...\n1683355   2.817439\n1683356  11.216817\n1683357   2.281044\n1683358   6.881670\n1683359   2.376068\n\n[1683360 rows x 1 columns]}"}
INFO:flytekit:Invoking __main__.mse with inputs: {'y_test':           EDUC
13002284  10.0
13374354   6.0
8634381    6.0
5533081    5.0
9306618    3.0
...        ...
16329469   3.0
9627701   11.0
5984924    2.0
13312613   7.0
5466884    2.0

[1683360 rows x 1 columns], 'y_pred':               EDUC
0        10.013471
1         6.073498
2         5.880829
3  

{"asctime": "2021-08-12 12:35:51,117", "name": "flytekit", "levelname": "INFO", "message": "Task executed successfully in user level, outputs: EDUC    0.842034\ndtype: float64"}
INFO:flytekit:Task executed successfully in user level, outputs: EDUC    0.842034
dtype: float64
{"asctime": "2021-08-12 12:35:51,119", "name": "flytekit", "levelname": "ERROR", "message": "Exception when executing unsupported operand type(s) for +: 'int' and 'Promise'", "exc_info": "Traceback (most recent call last):\n  File \"/nfs/site/home/tvlasova/.local/lib/python3.8/site-packages/flytekit/core/base_task.py\", line 483, in dispatch_execute\n    native_outputs = self.execute(**native_inputs)\n  File \"/nfs/site/home/tvlasova/.local/lib/python3.8/site-packages/flytekit/core/python_function_task.py\", line 159, in execute\n    return self._task_function(**kwargs)\n  File \"/tmp/ipykernel_87396/4183838655.py\", line 48, in ml_task\n    ml_scores[\"mse_mean\"] = sum(mse_values) / len(mse_values)\nTypeError: uns

TypeError: unsupported operand type(s) for +: 'int' and 'Promise'

In [17]:
#  how workflow output looks like if ml function is decorated as @dynamic
if __name__ == "__main__":
    start = time.time()
    print(census_bench_wf())
    print("--- %s seconds ---" % (time.time() - start))

{"asctime": "2021-08-11 14:07:58,197", "name": "flytekit", "levelname": "INFO", "message": "Invoking __main__.feature_eng_task with inputs: {'data': /tmp/flyte/20210811_140620/mock_remote/e638fe476fa2abd3272f00e014acedfd/census.csv, 'cols': ['YEAR', 'DATANUM', 'SERIAL', 'CBSERIAL', 'HHWT', 'CPI99', 'GQ', 'PERNUM', 'SEX', 'AGE', 'INCTOT', 'EDUC', 'EDUCD', 'EDUC_HEAD', 'EDUC_POP', 'EDUC_MOM', 'EDUCD_MOM2', 'EDUCD_POP2', 'INCTOT_MOM', 'INCTOT_POP', 'INCTOT_MOM2', 'INCTOT_POP2', 'INCTOT_HEAD', 'SEX_HEAD']}"}
INFO:flytekit:Invoking __main__.feature_eng_task with inputs: {'data': /tmp/flyte/20210811_140620/mock_remote/e638fe476fa2abd3272f00e014acedfd/census.csv, 'cols': ['YEAR', 'DATANUM', 'SERIAL', 'CBSERIAL', 'HHWT', 'CPI99', 'GQ', 'PERNUM', 'SEX', 'AGE', 'INCTOT', 'EDUC', 'EDUCD', 'EDUC_HEAD', 'EDUC_POP', 'EDUC_MOM', 'EDUCD_MOM2', 'EDUCD_POP2', 'INCTOT_MOM', 'INCTOT_POP', 'INCTOT_MOM2', 'INCTOT_POP2', 'INCTOT_HEAD', 'SEX_HEAD']}
{"asctime": "2021-08-11 14:08:58,051", "name": "flytekit", "

INFO:flytekit:Invoking __main__.ml_task with inputs: {'df':             YEAR  DATANUM     SERIAL   CBSERIAL   HHWT  CPI99   GQ  PERNUM  \
0         1970.0      2.0        1.0       -1.0  100.0  4.540  1.0     1.0   
1         1970.0      2.0        1.0       -1.0  100.0  4.540  1.0     2.0   
2         1970.0      2.0        2.0       -1.0  100.0  4.540  1.0     1.0   
3         1970.0      2.0        2.0       -1.0  100.0  4.540  1.0     2.0   
4         1970.0      2.0        4.0       -1.0  100.0  4.540  1.0     1.0   
...          ...      ...        ...        ...    ...    ...  ...     ...   
21721915  2010.0      1.0  1397787.0  1413806.0   18.0  0.764  1.0     1.0   
21721916  2010.0      1.0  1397788.0  1414099.0   57.0  0.764  1.0     1.0   
21721917  2010.0      1.0  1397788.0  1414099.0   57.0  0.764  1.0     2.0   
21721920  2010.0      1.0  1397789.0  1414268.0  101.0  0.764  1.0     1.0   
21721921  2010.0      1.0  1397789.0  1414268.0  101.0  0.764  1.0     2.0   

   

ML runs:  50






{"asctime": "2021-08-11 14:18:45,195", "name": "flytekit", "levelname": "INFO", "message": "Task executed successfully in user level, outputs: ({'mse_mean': 15.166442318379705, 'cod_mean': EDUC    0.841875\ndtype: float64, 'mse_dev': 0.05035051027266792, 'cod_dev': EDUC    0.000635\ndtype: float64}, {'t_train_test_split': 393.2775212880224, 't_train': 110.75327800540254, 't_inference': 2.285789751447737, 't_ml': 113.03906775685027})"}
INFO:flytekit:Task executed successfully in user level, outputs: ({'mse_mean': 15.166442318379705, 'cod_mean': EDUC    0.841875
dtype: float64, 'mse_dev': 0.05035051027266792, 'cod_dev': EDUC    0.000635
dtype: float64}, {'t_train_test_split': 393.2775212880224, 't_train': 110.75327800540254, 't_inference': 2.285789751447737, 't_ml': 113.03906775685027})


DefaultNamedTupleOutput(o0={'mse_mean': 15.166442318379705, 'cod_mean': EDUC    0.841875
dtype: float64, 'mse_dev': 0.05035051027266792, 'cod_dev': EDUC    0.000635
dtype: float64}, o1={'t_train_test_split': 393.2775212880224, 't_train': 110.75327800540254, 't_inference': 2.285789751447737, 't_ml': 113.03906775685027})
--- 649.1684489250183 seconds ---
