In [1]:
# pip install frozendict 
# pip install pyarrow
# pip install -e ./streamline_package
# pip install ray[default]

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [4]:
from streamline.delayed import delay_lib, step, Delayed, eval_delay
from streamline import RunEnv, load_runenv, Pipeline, Function, Var
from streamline.utilities import mainify, generate_random_variable_name
import streamline as sl

In [5]:
from sklearn.datasets import load_diabetes

X, y = load_diabetes(return_X_y=True)

In [6]:
pipe = Pipeline()
pipe.add_model(
    LinearRegression,
    out_var='mXR7lUyS',
    fit_args=[Var('X'), Var('y')]
)

res_env = pipe.run({
    'X': X,
    'y': y,
}, kw={
    # 'model_fit.cov_type': 'HC1'
})
res_env['mXR7lUyS_fit'].predict(X)

array([206.11667725,  68.07103297, 176.88279035, 166.91445843,
       128.46225834, 106.35191443,  73.89134662, 118.85423042,
       158.80889721, 213.58462442,  97.07481511,  95.10108423,
       115.06915952, 164.67656842, 103.07814257, 177.17487964,
       211.7570922 , 182.84134823, 148.00326937, 124.01754066,
       120.33362197,  85.80068961, 113.1134589 , 252.45225837,
       165.48779206, 147.71997564,  97.12871541, 179.09358468,
       129.05345958, 184.7811403 , 158.71516713,  69.47575778,
       261.50385365, 112.82234716,  78.37318279,  87.66360785,
       207.92114668, 157.87641942, 240.84708073, 136.93257456,
       153.48044608,  74.15426666, 145.62742227,  77.82978811,
       221.07832768, 125.21957584, 142.6029986 , 109.49562511,
        73.14181818, 189.87117754, 157.9350104 , 169.55699526,
       134.1851441 , 157.72539008, 139.11104979,  72.73116856,
       207.82676612,  80.11171342, 104.08335958, 134.57871054,
       114.23552012, 180.67628279,  61.12935368,  98.72

In [7]:
pipe = Pipeline()
pipe.add_model(
    sm.OLS,
    out_var='mXR7lUyS',
    args=[Var('y'), Var('X')],
    
    predict_run=True,
    predict_args=[Var('X')],
    
    # score_run=True,
    # score_args=[Var('y')],
)

res_env2 = pipe.run({
    'X': sm.add_constant(X),
    'y': y,
}, kw={
    # 'model_fit.cov_type': 'HC1'
})

In [8]:
pipe.get_dependencies()

{'X', 'y'}

In [9]:
mdl = pipe.gen_predictor('mXR7lUyS_predict', run_env=res_env2)
mdl.predict(X=sm.add_constant(X))

array([206.11667725,  68.07103297, 176.88279035, 166.91445843,
       128.46225834, 106.35191443,  73.89134662, 118.85423042,
       158.80889721, 213.58462442,  97.07481511,  95.10108423,
       115.06915952, 164.67656842, 103.07814257, 177.17487964,
       211.7570922 , 182.84134823, 148.00326937, 124.01754066,
       120.33362197,  85.80068961, 113.1134589 , 252.45225837,
       165.48779206, 147.71997564,  97.12871541, 179.09358468,
       129.05345958, 184.7811403 , 158.71516713,  69.47575778,
       261.50385365, 112.82234716,  78.37318279,  87.66360785,
       207.92114668, 157.87641942, 240.84708073, 136.93257456,
       153.48044608,  74.15426666, 145.62742227,  77.82978811,
       221.07832768, 125.21957584, 142.6029986 , 109.49562511,
        73.14181818, 189.87117754, 157.9350104 , 169.55699526,
       134.1851441 , 157.72539008, 139.11104979,  72.73116856,
       207.82676612,  80.11171342, 104.08335958, 134.57871054,
       114.23552012, 180.67628279,  61.12935368,  98.72