## 1. Set Up Your Environment


In [None]:
import os
import sys

# Directory where you want to clone the repository
repo_dir = 'gretel-mlops'

# Check if the directory exists
if not os.path.exists(repo_dir):
    # Directory does not exist, clone the repository
    !git clone https://github.com/gretelai/gretel-mlops.git
else:
    print(f"The directory '{repo_dir}' already exists.")

# Import Gretel MLOps modules
gretel_mlops_path = os.getcwd() + "/gretel-mlops/src/"
if gretel_mlops_path not in sys.path:
    sys.path.append(gretel_mlops_path)

In [None]:
import boto3
import sagemaker

# sagemaker settings
region = boto3.Session().region_name
role = sagemaker.get_execution_role()
default_bucket = sagemaker.session.Session().default_bucket()

## 2. Read in a dataset config

You can access and download the template configuration files from the [Gretel MLOps GitHub repository](https://github.com/gretelai/gretel-mlops/tree/main/src/gretel_mlops/aws/sagemaker/configs/).


In [None]:
import yaml

# URL of the raw YAML file
config_path = "configs/config_stroke.yaml"

# Open the YAML file and get the content
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

# Note uncomment below lines for Gretel Hybrid usage
# config['gretel']['mode'] = 'hybrid'
# config['gretel']['sink_bucket'] = 'gretel-hybrid-sandbox-sink' # your sink bucket name

# view config
yaml.dump(config, sys.stdout, default_flow_style=False, sort_keys=False)

## 3. ML pipeline


### 3.1 Initiate the pipeline

In [None]:
from gretel_mlops.aws.sagemaker.pipeline import get_pipeline

model_package_group_name = (
    f"GretelModelPackageGroup-{config['dataset']['name']}"
)
pipeline_name = f"GretelPipeline-{config['dataset']['name']}"
gretel_secret_name = "prod/Gretel/ApiKey"

print(f"Initiating {pipeline_name}")

pipeline = get_pipeline(
    region=region,
    role=role,
    default_bucket=default_bucket,
    model_package_group_name=model_package_group_name,
    pipeline_name=pipeline_name,
    gretel_secret=gretel_secret_name,
    config=config,
)

## 3.2 Start the pipeline

In [None]:
# Create a new or update existing Pipeline

pipeline.upsert(role_arn=role)

# start pipeline execution
train_execution = pipeline.start()

# wait for pipeline to be completed
train_execution.wait()

## 4. Inspect results

In [None]:
import json

s3_client = boto3.client("s3")
s3_path_report = f"{pipeline.steps[3].arguments['ProcessingOutputConfig']['Outputs'][0]['S3Output']['S3Uri']}/evaluation.json"
bucket_name = s3_path_report.replace("s3://", "").split("/", 1)[0]
file_key = s3_path_report.replace("s3://", "").split("/", 1)[1]

# Fetch the file from S3
response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
content = response["Body"].read()

# Parse the JSON content
data = json.loads(content)

# Pretty print the JSON data
pretty_json = json.dumps(data, indent=4)
print(pretty_json)

## 5. Clean-up the pipeline

In [None]:
client = boto3.client("sagemaker")
client.delete_pipeline(PipelineName=pipeline_name)