### CI/CD Pipeline

In [2]:
!pip install -U sagemaker -q

[0m

In [109]:
import sys

import boto3
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession

sagemaker_session = sagemaker.session.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()
pipeline_session = PipelineSession()
default_bucket = sagemaker_session.default_bucket()
model_package_group_name = f"FinalProjectHomePrices"

In [110]:
local_path = "Data/cleaned_data.csv"

s3 = boto3.resource("s3")

base_uri = f"s3://{default_bucket}/ci_cd/Final_Project"
input_data_uri = sagemaker.s3.S3Uploader.upload(
    local_path=local_path,
    desired_s3_uri=base_uri,
)
print(input_data_uri)

s3://sagemaker-us-east-1-004608622582/ci_cd/Final_Project/cleaned_data.csv


In [111]:
local_path_2 = "Data/cleaned_data_no_header.csv"

s3 = boto3.resource("s3")

base_uri = f"s3://{default_bucket}/ci_cd/Final_Project"
input_data_uri_2 = sagemaker.s3.S3Uploader.upload(
    local_path=local_path_2,
    desired_s3_uri=base_uri,
)
print(input_data_uri_2)

s3://sagemaker-us-east-1-004608622582/ci_cd/Final_Project/cleaned_data_no_header.csv


In [112]:
# Define Parameters for Pipeline execution
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat,
)

processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge")
model_approval_status = ParameterString(
    name="ModelApprovalStatus", default_value="PendingManualApproval"
)
input_data = ParameterString(
    name="InputData",
    default_value=input_data_uri_2, # no header
)
batch_data = ParameterString(
    name="BatchData",
    default_value=input_data_uri_2, # currently the same for now
)
mse_threshold = ParameterFloat(name="MseThreshold", default_value=1000.0)

In [113]:
!mkdir -p Code

### Training Code

In [114]:
%%writefile Code/preprocessing.py
import argparse
import os
import requests
import tempfile

import numpy as np
import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder


# Since we get a headerless CSV file, we specify the column names here.
feature_columns_names = [
    "AdjSquareFeet",
    "DistancetoCoast",
    "DistancetoSinkhole",
    "DistancetoFireDepartment",
    "LocationWindSpeed",
    "Terrain",
    "NumberOfBuildings",
    "NumberOfUnits",
    "Age",
]
label_column = "ValueofHome"

feature_columns_dtype = {
    "AdjSquareFeet": np.float64,
    "DistancetoCoast": np.float64,
    "DistancetoSinkhole": np.float64,
    "DistancetoFireDepartment": np.float64,
    "LocationWindSpeed": np.float64,
    "Terrain": str,
    "NumberOfBuildings": np.float64,
    "NumberOfUnits": np.float64,
    "Age": np.float64,
}
label_column_dtype = {"ValueofHome": np.float64}


def merge_two_dicts(x, y):
    z = x.copy()
    z.update(y)
    return z


if __name__ == "__main__":
    base_dir = "/opt/ml/processing"

    df = pd.read_csv(
        f"{base_dir}/input/cleaned_data_no_header.csv",
        header=None,
        names=feature_columns_names + [label_column],
        dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype),
    )
    numeric_features = list(feature_columns_names)
    numeric_features.remove("Terrain") #since this is a string
    numeric_transformer = Pipeline(
        steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
    )

    categorical_features = ["Terrain"]
    categorical_transformer = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="constant", fill_value="missing")),
            ("onehot", OneHotEncoder(handle_unknown="ignore")),
        ]
    )

    preprocess = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ]
    )

    y = df.pop("ValueofHome")
    X_pre = preprocess.fit_transform(df)
    y_pre = y.to_numpy().reshape(len(y), 1)

    X = np.concatenate((y_pre, X_pre), axis=1)

    np.random.shuffle(X)
    train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])

    pd.DataFrame(train).to_csv(f"{base_dir}/train/train.csv", header=False, index=False)
    pd.DataFrame(validation).to_csv(
        f"{base_dir}/validation/validation.csv", header=False, index=False
    )
    pd.DataFrame(test).to_csv(f"{base_dir}/test/test.csv", header=False, index=False)

Overwriting Code/preprocessing.py


In [115]:
from sagemaker.sklearn.processing import SKLearnProcessor


framework_version = "1.2-1"

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type="ml.m5.xlarge",
    instance_count=processing_instance_count,
    base_job_name="sklearn-abalone-process",
    role=role,
    sagemaker_session=pipeline_session,
)

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [116]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

processor_args = sklearn_processor.run(
    inputs=[
        ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
        ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
        ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
    ],
    code="Code/preprocessing.py",
)

step_process = ProcessingStep(name="Final_Project_Houses", step_args=processor_args)

In [117]:
# Define training step to train a model
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput

model_path = f"s3://{default_bucket}/FinalProjectTrain"
image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.0-1",
    py_version="py3",
    instance_type="ml.m5.xlarge",
)
xgb_train = Estimator(
    image_uri=image_uri,
    instance_type=instance_type,
    instance_count=1,
    output_path=model_path,
    role=role,
    sagemaker_session=pipeline_session,
)
xgb_train.set_hyperparameters(
    objective="reg:linear",
    num_round=50,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.7,
)

train_args = xgb_train.fit(
    inputs={
        "train": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
            content_type="text/csv",
        ),
        "validation": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "validation"
            ].S3Output.S3Uri,
            content_type="text/csv",
        ),
    }
)

In [118]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep


step_train = TrainingStep(
    name="FinalProjectTrain",
    step_args=train_args,
)

### Define Model Evaluation Step

In [119]:
%%writefile Code/evaluation.py
import json
import pathlib
import pickle
import tarfile

import joblib
import numpy as np
import pandas as pd
import xgboost

from sklearn.metrics import mean_squared_error


if __name__ == "__main__":
    model_path = f"/opt/ml/processing/model/model.tar.gz"
    with tarfile.open(model_path) as tar:
        tar.extractall(path=".")

    model = pickle.load(open("xgboost-model", "rb"))

    test_path = "/opt/ml/processing/test/test.csv"
    df = pd.read_csv(test_path, header=None)

    y_test = df.iloc[:, 0].to_numpy()
    df.drop(df.columns[0], axis=1, inplace=True)

    X_test = xgboost.DMatrix(df.values)

    predictions = model.predict(X_test)

    mse = mean_squared_error(y_test, predictions)
    std = np.std(y_test - predictions)
    report_dict = {
        "regression_metrics": {
            "mse": {"value": mse, "standard_deviation": std},
        },
    }

    output_dir = "/opt/ml/processing/evaluation"
    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

    evaluation_path = f"{output_dir}/evaluation.json"
    with open(evaluation_path, "w") as f:
        f.write(json.dumps(report_dict))

Overwriting Code/evaluation.py


In [120]:
# Create an instance of a ScriptProcessor processor and use it in the Processing Step
from sagemaker.processing import ScriptProcessor


script_eval = ScriptProcessor(
    image_uri=image_uri,
    command=["python3"],
    instance_type="ml.m5.xlarge",
    instance_count=1,
    base_job_name="script-final_project-eval",
    role=role,
    sagemaker_session=pipeline_session,
)

eval_args = script_eval.run(
    inputs=[
        ProcessingInput(
            source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/model",
        ),
        ProcessingInput(
            source=step_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
            destination="/opt/ml/processing/test",
        ),
    ],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"),
    ],
    code="Code/evaluation.py",
)

In [121]:
from sagemaker.workflow.properties import PropertyFile


evaluation_report = PropertyFile(
    name="EvaluationReport", output_name="evaluation", path="evaluation.json"
)
step_eval = ProcessingStep(
    name="FinalProjectEval",
    step_args=eval_args,
    property_files=[evaluation_report],
)

In [122]:
# Define a create model step to create a model
from sagemaker.model import Model

model = Model(
    image_uri=image_uri,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=pipeline_session,
    role=role,
)

In [123]:
# Define ModelStep by providing return values from model.create() as the step arguements
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.model_step import ModelStep

step_create_model = ModelStep(
    name="FinalProjectCreateModel",
    step_args=model.create(instance_type="ml.m5.large", accelerator_type="ml.eia1.medium"),
)

In [124]:
# Define Transform Step -> Perform batch transform
from sagemaker.transformer import Transformer


transformer = Transformer(
    model_name=step_create_model.properties.ModelName,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    output_path=f"s3://{default_bucket}/FinalProjectTransform",
)

In [125]:
from sagemaker.inputs import TransformInput
from sagemaker.workflow.steps import TransformStep


step_transform = TransformStep(
    name="FinalProjectTransform", transformer=transformer, inputs=TransformInput(data=batch_data)
)

In [126]:
# Define a Register Model Step to Create a Model Package
from sagemaker.model_metrics import MetricsSource, ModelMetrics

model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
        ),
        content_type="application/json",
    )
)

register_args = model.register(
    content_types=["text/csv"],
    response_types=["text/csv"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
    model_metrics=model_metrics,
)
step_register = ModelStep(name="FinalProjectRegisterModel", step_args=register_args)



In [127]:
# Define a fail step
from sagemaker.workflow.fail_step import FailStep
from sagemaker.workflow.functions import Join

step_fail = FailStep(
    name="FinalProjectMSEFail",
    error_message=Join(on=" ", values=["Execution failed due to MSE >", mse_threshold]),
)

In [128]:
# Define a condition step
from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.workflow.functions import JsonGet


cond_lte = ConditionLessThanOrEqualTo(
    left=JsonGet(
        step_name=step_eval.name,
        property_file=evaluation_report,
        json_path="regression_metrics.mse.value",
    ),
    right=mse_threshold,
)

step_cond = ConditionStep(
    name="FinalProjectMSECond",
    conditions=[cond_lte],
    if_steps=[step_register, step_create_model, step_transform],
    else_steps=[step_fail],
)

In [129]:
# Define the Pipeline
from sagemaker.workflow.pipeline import Pipeline


pipeline_name = f"FinalProjectPipeline"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_instance_count,
        instance_type,
        model_approval_status,
        input_data,
        batch_data,
        mse_threshold,
    ],
    steps=[step_process, step_train, step_eval, step_cond],
)

In [130]:
# Examine pipeline definition
import json


definition = json.loads(pipeline.definition())
definition



{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ProcessingInstanceCount',
   'Type': 'Integer',
   'DefaultValue': 1},
  {'Name': 'TrainingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.xlarge'},
  {'Name': 'ModelApprovalStatus',
   'Type': 'String',
   'DefaultValue': 'PendingManualApproval'},
  {'Name': 'InputData',
   'Type': 'String',
   'DefaultValue': 's3://sagemaker-us-east-1-004608622582/ci_cd/Final_Project/cleaned_data_no_header.csv'},
  {'Name': 'BatchData',
   'Type': 'String',
   'DefaultValue': 's3://sagemaker-us-east-1-004608622582/ci_cd/Final_Project/cleaned_data_no_header.csv'},
  {'Name': 'MseThreshold', 'Type': 'Float', 'DefaultValue': 1000.0}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'Final_Project_Houses',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.

In [131]:
# Submit pipeline to sagemaker to start execution
pipeline.upsert(role_arn=role)



{'PipelineArn': 'arn:aws:sagemaker:us-east-1:004608622582:pipeline/FinalProjectPipeline',
 'ResponseMetadata': {'RequestId': '1c88fe04-5a92-40eb-b0c1-8c7d75ca0e45',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '1c88fe04-5a92-40eb-b0c1-8c7d75ca0e45',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '88',
   'date': 'Fri, 21 Jun 2024 04:28:17 GMT'},
  'RetryAttempts': 0}}

In [132]:
# Start Pipeline
execution = pipeline.start()

In [133]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:004608622582:pipeline/FinalProjectPipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:004608622582:pipeline/FinalProjectPipeline/execution/n6jf1wk3cfe2',
 'PipelineExecutionDisplayName': 'execution-1718944098199',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2024, 6, 21, 4, 28, 18, 147000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 6, 21, 4, 28, 18, 147000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:004608622582:user-profile/d-ot3x26nvt9y2/pthai',
  'UserProfileName': 'pthai',
  'DomainId': 'd-ot3x26nvt9y2',
  'IamIdentity': {'Arn': 'arn:aws:sts::004608622582:assumed-role/LabRole/SageMaker',
   'PrincipalId': 'AROAQCEVR773FGX7Y4SZW:SageMaker'}},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:004608622582:user-profile/d-ot3x26nvt9y2/pthai',
  'UserProfileName': 'pthai',
  'DomainId': 'd-ot3x26nvt9y2',
  'IamIdentity': {'A

In [134]:
execution.wait()

WaiterError: Waiter PipelineExecutionComplete failed: Waiter encountered a terminal failure state: For expression "PipelineExecutionStatus" we matched expected path: "Failed"

In [135]:
execution.list_steps()

[{'StepName': 'FinalProjectTransform',
  'StartTime': datetime.datetime(2024, 6, 21, 4, 35, 49, 326000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 6, 21, 4, 41, 28, 527000, tzinfo=tzlocal()),
  'StepStatus': 'Failed',
  'FailureReason': 'ClientError: ClientError: See job logs for more information',
  'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-east-1:004608622582:transform-job/pipelines-n6jf1wk3cfe2-FinalProjectTransfor-EQdoYGn8qn'}},
  'AttemptCount': 1},
 {'StepName': 'FinalProjectCreateModel-CreateModel',
  'StartTime': datetime.datetime(2024, 6, 21, 4, 35, 47, 94000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 6, 21, 4, 35, 48, 483000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:004608622582:model/pipelines-n6jf1wk3cfe2-FinalProjectCreateMo-aETsFUSAgt'}},
  'AttemptCount': 1},
 {'StepName': 'FinalProjectRegisterModel-RegisterModel',
  'StartTime': datetime.datetime(2024, 6, 21, 

In [136]:
from pprint import pprint


evaluation_json = sagemaker.s3.S3Downloader.read_file(
    "{}/evaluation.json".format(
        step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
    )
)
pprint(json.loads(evaluation_json))



{'regression_metrics': {'mse': {'standard_deviation': 13.261308849389797,
                                'value': 175.8758338967943}}}


In [137]:
import time
from sagemaker.lineage.visualizer import LineageTableVisualizer


viz = LineageTableVisualizer(sagemaker.session.Session())
for execution_step in reversed(execution.list_steps()):
    print(execution_step)
    display(viz.show(pipeline_execution_step=execution_step))
    time.sleep(5)

{'StepName': 'Final_Project_Houses', 'StartTime': datetime.datetime(2024, 6, 21, 4, 28, 19, 404000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 6, 21, 4, 30, 52, 18000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:004608622582:processing-job/pipelines-n6jf1wk3cfe2-Final-Project-Houses-Bumwv7oWMU'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...340f247e2d7b697875fa269/preprocessing.py,Input,DataSet,ContributedTo,artifact
1,s3://...Final_Project/cleaned_data_no_header.csv,Input,DataSet,ContributedTo,artifact
2,68331...com/sagemaker-scikit-learn:1.2-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...wk3cfe2/Final_Project_Houses/output/test,Output,DataSet,Produced,artifact
4,s3://...2/Final_Project_Houses/output/validation,Output,DataSet,Produced,artifact
5,s3://...k3cfe2/Final_Project_Houses/output/train,Output,DataSet,Produced,artifact


{'StepName': 'FinalProjectTrain', 'StartTime': datetime.datetime(2024, 6, 21, 4, 30, 53, 200000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 6, 21, 4, 33, 12, 139000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:004608622582:training-job/pipelines-n6jf1wk3cfe2-FinalProjectTrain-tcF4cNbO8U'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...2/Final_Project_Houses/output/validation,Input,DataSet,ContributedTo,artifact
1,s3://...k3cfe2/Final_Project_Houses/output/train,Input,DataSet,ContributedTo,artifact
2,68331...naws.com/sagemaker-xgboost:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...jectTrain-tcF4cNbO8U/output/model.tar.gz,Output,Model,Produced,artifact


{'StepName': 'FinalProjectEval', 'StartTime': datetime.datetime(2024, 6, 21, 4, 33, 13, 6000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 6, 21, 4, 35, 45, 688000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:004608622582:processing-job/pipelines-n6jf1wk3cfe2-FinalProjectEval-cosNOfb7c4'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...37f7d242f2100401d0a282db0d/evaluation.py,Input,DataSet,ContributedTo,artifact
1,s3://...wk3cfe2/Final_Project_Houses/output/test,Input,DataSet,ContributedTo,artifact
2,s3://...jectTrain-tcF4cNbO8U/output/model.tar.gz,Input,Model,ContributedTo,artifact
3,68331...naws.com/sagemaker-xgboost:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
4,s3://...024-06-21-04-28-16-146/output/evaluation,Output,DataSet,Produced,artifact


{'StepName': 'FinalProjectMSECond', 'StartTime': datetime.datetime(2024, 6, 21, 4, 35, 46, 157000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 6, 21, 4, 35, 46, 420000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'Condition': {'Outcome': 'True'}}, 'AttemptCount': 1}


None

{'StepName': 'FinalProjectRegisterModel-RegisterModel', 'StartTime': datetime.datetime(2024, 6, 21, 4, 35, 47, 93000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 6, 21, 4, 35, 48, 402000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:004608622582:model-package/FinalProjectHomePrices/1'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...jectTrain-tcF4cNbO8U/output/model.tar.gz,Input,Model,ContributedTo,artifact
1,68331...naws.com/sagemaker-xgboost:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
2,FinalProjectHomePrices-1-PendingManualApproval...,Input,Approval,ContributedTo,action
3,FinalProjectHomePrices-1718944547-aws-model-pa...,Output,ModelGroup,AssociatedWith,context


{'StepName': 'FinalProjectCreateModel-CreateModel', 'StartTime': datetime.datetime(2024, 6, 21, 4, 35, 47, 94000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 6, 21, 4, 35, 48, 483000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:004608622582:model/pipelines-n6jf1wk3cfe2-FinalProjectCreateMo-aETsFUSAgt'}}, 'AttemptCount': 1}


None

{'StepName': 'FinalProjectTransform', 'StartTime': datetime.datetime(2024, 6, 21, 4, 35, 49, 326000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2024, 6, 21, 4, 41, 28, 527000, tzinfo=tzlocal()), 'StepStatus': 'Failed', 'FailureReason': 'ClientError: ClientError: See job logs for more information', 'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-east-1:004608622582:transform-job/pipelines-n6jf1wk3cfe2-FinalProjectTransfor-EQdoYGn8qn'}}, 'AttemptCount': 1}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...jectTrain-tcF4cNbO8U/output/model.tar.gz,Input,Model,ContributedTo,artifact
1,68331...naws.com/sagemaker-xgboost:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
2,s3://...Final_Project/cleaned_data_no_header.csv,Input,DataSet,ContributedTo,artifact
3,s3://...ast-1-004608622582/FinalProjectTransform,Output,DataSet,Produced,artifact


In [138]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>