# Setup

In [5]:
import boto3
import sagemaker

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity().get("Account")

ModuleNotFoundError: No module named 'sagemaker'

In [2]:
vpc_deployment = True

In [3]:
%store vpc_deployment

Stored 'vpc_deployment' (bool)


In [4]:
%store

Stored variables and their in-db values:
vpc_deployment             -> True


# Track the Pipeline as an `Experiment`

In [None]:
import time

timestamp = int(time.time())

pipeline_name = "BERT-pipeline-{}".format(timestamp)

In [None]:
%store pipeline_name

In [None]:
from smexperiments.experiment import Experiment

pipeline_experiment = Experiment.create(
    experiment_name=pipeline_name,
    description="Amazon Customer Reviews BERT Pipeline Experiment",
    sagemaker_boto_client=sm,
)

pipeline_experiment_name = pipeline_experiment.experiment_name
print("Pipeline experiment name: {}".format(pipeline_experiment_name))

In [None]:
%store pipeline_experiment_name

# Create the `Trial`

In [None]:
from smexperiments.trial import Trial

pipeline_trial = Trial.create(
    trial_name="trial-{}".format(timestamp), experiment_name=pipeline_experiment_name, sagemaker_boto_client=sm
)

pipeline_trial_name = pipeline_trial.trial_name
print("Trial name: {}".format(pipeline_trial_name))

In [None]:
%store pipeline_trial_name

# List All Artifacts Generated By The Pipeline

Amazon SageMaker ML Lineage Tracking creates and stores information about the steps of a machine learning (ML) workflow from data preparation to model deployment. 

Amazon SageMaker Lineage enables events that happen within SageMaker to be traced via a graph structure. The data simplifies generating reports, making comparisons, or discovering relationships between events. For example easily trace both how a model was generated and where the model was deployed.

The lineage graph is created automatically by SageMaker and you can directly create or modify your own graphs.

## Key Concepts

* **Lineage Graph** - A connected graph tracing your machine learning workflow end to end.

* **Artifacts** - Represents a URI addressable object or data. Artifacts are typically inputs or outputs to Actions.

* **Actions** - Represents an action taken such as a computation, transformation, or job.

* **Contexts** - Provides a method to logically group other entities.

* **Associations** - A directed edge in the lineage graph that links two entities.

* **Lineage Traversal** - Starting from an arbitrary point trace the lineage graph to discover and analyze relationships between steps in your workflow.

# Embedding Links

In [6]:
from IPython.core.display import display, HTML

display(
    HTML(
        '<b>Review <a target="top" href="https://console.aws.amazon.com/glue/home?region={}#table:catalog={};name=amazon_reviews_parquet;namespace=default">AWS Glue Catalog</a></b>'.format(
            region, account_id
        )
    )
)

NameError: name 'region' is not defined

# Resources

https://docs.aws.amazon.com/sagemaker/latest/dg/pipelines-caching.html

# Release Resources

In [1]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>