In [52]:
import typing
import pandas as pd

from typing_extensions import Annotated

from flytekit.remote.remote import FlyteRemote
from flytekit import task, workflow, LaunchPlan
from flytekit.configuration import Config
from flytekit.core.artifact import Artifact
from flytekit.extend import TypeEngine

from flytekit.core.context_manager import FlyteContextManager
from flytekit.types.structured.structured_dataset import StructuredDataset

In [74]:
r = FlyteRemote(
    Config.auto(config_file="/Users/ytong/.flyte/local_admin.yaml"),
    default_project="flytesnacks",
    default_domain="development",
)

### Existing Interaction Model

Ran the `run_gather_data` Fetch and traverse nodes

In [75]:
e1 = r.fetch_execution(name="ab7g4qhpvz5c7r5lgh6w")

In [76]:
r.sync_execution(e1, sync_nodes=True)

<FlyteLiteral id { project: "flytesnacks" domain: "development" name: "ab7g4qhpvz5c7r5lgh6w" } spec { launch_plan { resource_type: LAUNCH_PLAN project: "flytesnacks" domain: "development" name: "ml_demo.run_gather_data" version: "vzCR5rgllTnWtQEuWHLkgQ==" } metadata { system_metadata { } } labels { } annotations { } auth_role { } } closure { outputs { uri: "s3://my-s3-bucket/metadata/propeller/flytesnacks-development-ab7g4qhpvz5c7r5lgh6w/end-node/data/0/outputs.pb" } phase: SUCCEEDED started_at { seconds: 1691685440 nanos: 803249000 } duration { seconds: 36 nanos: 252519000 } created_at { seconds: 1691685440 nanos: 788673000 } updated_at { seconds: 1691685477 nanos: 55768000 } }>

In [77]:
v = e1.node_executions["n0"].outputs.get("o0")

In [78]:
df = v.open(pd.DataFrame).all()

In [79]:
df

Unnamed: 0,sectors,rides
0,SEA,148
1,SAE,370
2,ESA,268
3,EAS,589
4,ASE,206
5,AES,853


### Union Artifact Model

#### Data Access

In [81]:
execid = "ab7g4qhpvz5c7r5lgh6w"

In [82]:
a = r.get_artifact(f"flyte://av0.1/flytesnacks/development/{execid}/n0/0/o/o0")

In [83]:
a

Artifact: project=flytesnacks, domain=development, name=ab7g4qhpvz5c7r5lgh6w/n0/0/o/o0, version=ab7g4qhpvz5c7r5lgh6w
  name=ab7g4qhpvz5c7r5lgh6w/n0/0/o/o0
  partitions=None
  tags=None
  literal_type=<FlyteLiteral structured_dataset_type { }>, literal=<FlyteLiteral scalar { structured_dataset { uri: "s3://my-s3-bucket/data/u7/ab7g4qhpvz5c7r5lgh6w-n0-0/eacafaf086db564849d0715d395a8cc6" metadata { structured_dataset_type { format: "parquet" } } } }>)

In [84]:
ctx = FlyteContextManager.current_context()
v = TypeEngine.to_python_value(ctx, a.literal, pd.DataFrame)

In [85]:
v

Unnamed: 0,sectors,rides
0,SEA,148
1,SAE,370
2,ESA,268
3,EAS,589
4,ASE,206
5,AES,853


#### Ability to Launch

kick off new execution with the fetched artifact, confirm it can be used and querying doesn't fail.
then kick it off again without any artifact at all.

In [86]:
run_train_model_wf = r.fetch_workflow(
        "flytesnacks", "development", "ml_demo.run_train_model", "b3uzko1W7QFyAcBJjOOKmg=="
    )

In [87]:
r.execute(run_train_model_wf, inputs={"region": "SEA", "data": a})

<FlyteLiteral id { project: "flytesnacks" domain: "development" name: "f5ce2704393d0431c974" } spec { launch_plan { resource_type: LAUNCH_PLAN project: "flytesnacks" domain: "development" name: "ml_demo.run_train_model" version: "b3uzko1W7QFyAcBJjOOKmg==" } metadata { system_metadata { } } notifications { } labels { } annotations { } auth_role { } } closure { started_at { } duration { } created_at { seconds: 1691789538 nanos: 712338000 } updated_at { seconds: 1691789538 nanos: 712338000 } }>

Execute without specifying the data. Note that the fetched artifact should match what was picked up by the query when it was kicked off.

In [88]:
r.execute(run_train_model_wf, inputs={"region": "SEA"})

<FlyteLiteral id { project: "flytesnacks" domain: "development" name: "f8856078e763943e28f2" } spec { launch_plan { resource_type: LAUNCH_PLAN project: "flytesnacks" domain: "development" name: "ml_demo.run_train_model" version: "b3uzko1W7QFyAcBJjOOKmg==" } metadata { system_metadata { } } notifications { } labels { } annotations { } auth_role { } } closure { started_at { } duration { } created_at { seconds: 1691789662 nanos: 61454000 } updated_at { seconds: 1691789662 nanos: 61454000 } }>

In [89]:
queried_artifact = r.get_artifact(f"flyte://av0.1/flytesnacks/development/ride_count_data?region=SEA&ds=2023-08-58")

In [90]:
queried_artifact

Artifact: project=flytesnacks, domain=development, name=ride_count_data, version=a9njnpfrmjcvcbtdl5qk
  name=ride_count_data
  partitions={'ds': '2023-08-58', 'region': 'SEA'}
  tags=None
  literal_type=<FlyteLiteral structured_dataset_type { }>, literal=<FlyteLiteral scalar { structured_dataset { uri: "s3://my-s3-bucket/data/nh/a9njnpfrmjcvcbtdl5qk-n0-0/8097b91bed1dadbec5b2edde1164a2ef" metadata { structured_dataset_type { format: "parquet" } } } }>)

In [91]:
model_artifact = r.get_artifact(f"flyte://av0.1/flytesnacks/development/my-model:SEA")

In [92]:
model_artifact

Artifact: project=flytesnacks, domain=development, name=my-model, version=f8856078e763943e28f2
  name=my-model
  partitions=None
  tags=['SEA']
  literal_type=<FlyteLiteral blob { }>, literal=<FlyteLiteral scalar { blob { metadata { type { } } uri: "s3://my-s3-bucket/data/i5/f8856078e763943e28f2-n0-0/15c549ee029c8389db34c3f987109045/ml_demo.py" } }>)