In [16]:
# Load necessary extensions
%load_ext autoreload
%autoreload 2
%load_ext dotenv
%dotenv


# Standard library imports
import sys
import logging
from pathlib import Path

# Third-party library imports
import ipytest
import json

# Local imports
from src.paths import CODE_FOLDER, DATA_DIR, INFERENCE_CODE_FOLDER, PARENT_DIR

sys.path.append(str(PARENT_DIR / CODE_FOLDER))
sys.path.append(str(PARENT_DIR / INFERENCE_CODE_FOLDER))

DATA_FILE_PATH = DATA_DIR / "penguins.csv"

ipytest.autoconfig(raise_on_error=True)

# Prevent SageMaker SDK to log events related to the default
# configuration using the INFO level
logging.getLogger("sagemaker.config").setLevel(logging.ERROR)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [2]:
LOCAL_MODE = True

In [3]:
import os

bucket = os.getenv("BUCKET")
role = os.getenv("ROLE")

S3_LOCATION = f"s3://{bucket}/penguins"

In [20]:
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession, LocalPipelineSession

pipeline_session = PipelineSession(default_bucket=bucket) if not LOCAL_MODE else None

if LOCAL_MODE:
    config = {
        "session": LocalPipelineSession(default_bucket=bucket),
        "instance_type": "local",
        "image": None
    }
else:
    config = {
        "session": pipeline_session,
        "instance_type": "ml.m5.xlarge",
        "image": None
    }

config["framework_version"] = "2.11"
config["py_version"] = "py39"

In [5]:
import boto3

sagemaker_session = sagemaker.Session()
sagemaker_client = boto3.client("sagemaker")
iam_client = boto3.client("iam")
region = boto3.Session().region_name

By default, only the specified columns in transformers are transformed and combined in the output, and the non-specified columns are dropped. (default of 'drop')

## Creating the Preprocessing Script

In [6]:
%%writefile {CODE_FOLDER}/preprocessor.py
"""
This module preprocesses data for machine learning tasks. It includes functions to read data from CSV files,
split the data into training, validation, and test sets, save baseline data, transform data, and save the
processed data and models.
"""

import os
import tarfile
import tempfile
from pathlib import Path

# Import statements...
from typing import Tuple

import joblib
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler


def preprocess(base_directory: str) -> None:
    """
    Preprocess the data by loading, splitting, transforming, saving the splits, and saving the model.

    Args:
        base_directory: The base directory where the input data and outputs will be managed.
    """
    # 1. Load supplied data
    df = _read_data_from_csv_files(base_directory)

    # 2. Split data into train and test sets
    df_train, df_validation, df_test = _split_data(df)

    # 3. Save baseline data
    _save_baselines(base_directory, df_train, df_test)

    # 3. Transform the train and test sets
    target_transformer = ColumnTransformer(
        transformers=[("species", OrdinalEncoder(), [0])]
    )

    numeric_transformer = make_pipeline(
        SimpleImputer(strategy="mean"), StandardScaler()
    )

    categorical_transformer = make_pipeline(
        SimpleImputer(strategy="most_frequent"), OneHotEncoder()
    )

    features_transformer = ColumnTransformer(
        transformers=[
            (
                "numeric",
                numeric_transformer,
                make_column_selector(dtype_exclude="object"),
            ),
            ("categorical", categorical_transformer, ["island"]),
        ]
    )

    y_train = target_transformer.fit_transform(
        np.array(df_train.species.values).reshape(-1, 1)
    )
    y_validation = target_transformer.transform(
        np.array(df_validation.species.values).reshape(-1, 1)
    )
    y_test = target_transformer.transform(
        np.array(df_test.species.values).reshape(-1, 1)
    )

    df_train = df_train.drop(columns=["species"], axis=1)
    df_validation = df_validation.drop(columns=["species"], axis=1)
    df_test = df_test.drop(columns=["species"], axis=1)

    X_train = features_transformer.fit_transform(df_train)
    X_validation = features_transformer.transform(df_validation)
    X_test = features_transformer.transform(df_test)

    # 4. Save the train and test splits
    _save_splits(
        base_directory, X_train, y_train, X_validation, y_validation, X_test, y_test
    )

    # 5. Save the model (transformers) in tar.gz format
    _save_model(base_directory, target_transformer, features_transformer)


def _read_data_from_csv_files(base_directory: str) -> pd.DataFrame:
    """
    Read and concatenate data from CSV files located in the input directory.

    Args:
        base_directory: The directory where CSV files are located.

    Returns:
        A DataFrame containing the concatenated data.
    """
    input_directory = Path(base_directory) / "input"
    files = [file for file in input_directory.glob("*.csv")]

    if len(files) == 0:
        raise ValueError(f"No csv files found in {input_directory}")

    raw_data = [pd.read_csv(file) for file in files]
    df = pd.concat(raw_data)

    # Shuffle the data
    return df.sample(frac=1, random_state=42)


def _split_data(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Split the DataFrame into training, validation, and test sets.

    Args:
        df: The DataFrame to be split.

    Returns:
        A tuple containing the training, validation, and test DataFrames.
    """
    df_train, temp = train_test_split(df, test_size=0.3)
    df_validation, df_test = train_test_split(temp, test_size=0.5)

    return df_train, df_validation, df_test


def _save_baselines(
    base_directory: str, df_train: pd.DataFrame = None, df_test: pd.DataFrame = None
) -> None:
    """
    Save baseline versions of the training and test data sets.

    Args:
        base_directory: Directory where the baseline data will be saved.
        df_train: Training data DataFrame.
        df_test: Test data DataFrame.
    """
    for split, data in [("train", df_train), ("test", df_test)]:
        baseline_path = Path(base_directory) / f"{split}-baseline"
        baseline_path.mkdir(parents=True, exist_ok=True)

        df = data.copy().dropna()

        # Save header only for the train baseline
        header = True if split == "train" else False
        df.to_csv(baseline_path / f"{split}-baseline.csv", index=False, header=header)


def _save_splits(
    base_directory: str,
    X_train: np.ndarray,
    y_train: np.ndarray,
    X_validation: np.ndarray,
    y_validation: np.ndarray,
    X_test: np.ndarray,
    y_test: np.ndarray,
) -> None:
    """
    Save the training, validation, and test sets after concatenating features with their respective targets.

    Args:
        base_directory: Directory where the data splits will be saved.
        X_train: Features of the training set.
        y_train: Target of the training set.
        X_validation: Features of the validation set.
        y_validation: Target of the validation set.
        X_test: Features of the test set.
        y_test: Target of the test set.
    """
    train = np.concatenate((X_train, y_train), axis=1)
    validation = np.concatenate((X_validation, y_validation), axis=1)
    test = np.concatenate((X_test, y_test), axis=1)

    train_path = Path(base_directory) / "train"
    validation_path = Path(base_directory) / "validation"
    test_path = Path(base_directory) / "test"

    train_path.mkdir(parents=True, exist_ok=True)
    validation_path.mkdir(parents=True, exist_ok=True)
    test_path.mkdir(parents=True, exist_ok=True)

    pd.DataFrame(train).to_csv(train_path / "train.csv", index=False, header=False)
    pd.DataFrame(validation).to_csv(
        validation_path / "validation.csv", index=False, header=False
    )
    pd.DataFrame(test).to_csv(test_path / "test.csv", index=False, header=False)


def _save_model(base_directory: str, target_transformer, features_transformers) -> None:
    """
    Save the preprocessing model (transformers) in tar.gz format.

    Args:
        base_directory: Directory where the model will be saved.
        target_transformer: The transformer used for the target variable.
        features_transformers: The transformers used for the feature variables.
    """
    with tempfile.TemporaryDirectory() as directory:
        joblib.dump(target_transformer, os.path.join(directory, "target.joblib"))
        joblib.dump(features_transformers, os.path.join(directory, "features.joblib"))

        model_path = Path(base_directory) / "model"
        model_path.mkdir(parents=True, exist_ok=True)

        with tarfile.open(f"{str(model_path / 'model.tar.gz')}", "w:gz") as tar:
            tar.add(os.path.join(directory, "target.joblib"), arcname="target.joblib")
            tar.add(
                os.path.join(directory, "features.joblib"), arcname="features.joblib"
            )


if __name__ == "__main__":
    preprocess(base_directory="/opt/ml/processing")


Overwriting /Users/carlos/Projects/penguin-classifier/src/preprocessor.py


In [7]:
%%ipytest -s

import pytest
import pandas as pd
import tempfile
from pathlib import Path
import shutil
import tarfile
from preprocessor import preprocess


@pytest.fixture(scope="function", autouse=False)
def directory():
    directory = tempfile.mkdtemp()
    
    input_directory = Path(directory) / "input"
    input_directory.mkdir(parents=True, exist_ok=True)

    shutil.copy2(DATA_FILE_PATH, input_directory / "data.csv")

    directory = Path(directory)
    preprocess(directory)

    yield directory
    
    shutil.rmtree(directory)


def test_preprocess_generate_baselines(directory):
    output_directories = os.listdir(directory)

    assert "train-baseline" in output_directories
    assert "test-baseline" in output_directories


def test_preprocess_generate_data_splits(directory):
    output_directories = os.listdir(directory)

    assert "train" in output_directories
    assert "validation" in output_directories
    assert "test" in output_directories


def test_preprocess_creates_two_models(directory):
    model_path = directory / "model"

    tar = tarfile.open(model_path / "model.tar.gz", "r:gz")

    assert "target.joblib" in tar.getnames()
    assert "features.joblib" in tar.getnames()


def tests_splits_are_transformed(directory):
    train = pd.read_csv(directory / "train" / "train.csv", header=None)
    validation = pd.read_csv(directory / "validation" / "validation.csv", header=None)
    test = pd.read_csv(directory / "test" / "test.csv", header=None)

    # The number of features should be 7
    # * 3 - island (one-hot encoded)
    # * 1 - culmen_length_mm
    # * 1 - culmen_depth_mm
    # * 1 - flipper_length_mm
    # * 1 - body_mass_g
    num_features = 7

    # The number of targets should be 1
    assert train.shape[1] == num_features + 1
    assert validation.shape[1] == num_features + 1
    assert test.shape[1] == num_features + 1
    

[32m.[0m[32m.[0m[32m.[0m[32m.[0m
[32m[32m[1m4 passed[0m[32m in 0.17s[0m[0m


### Setting up the Processing Step

In [8]:
from sagemaker.workflow.steps import CacheConfig

cache_config = CacheConfig(enable_caching=True, expire_after="P15D") # type: ignore

In [9]:
from sagemaker.workflow.parameters import ParameterString

dataset_location = ParameterString(
    name="dataset_location",
    default_value=f"{S3_LOCATION}/data",
)

In [10]:
from sagemaker.sklearn.processing import SKLearnProcessor

processor = SKLearnProcessor(
    framework_version="1.2-1",
    base_job_name="preprocess-data",
    instance_type=config["instance_type"],
    instance_count=1,
    role=role,
    sagemaker_session=config["session"],
)

In [11]:
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.processing import ProcessingInput, ProcessingOutput

preprocessing_step = ProcessingStep(
    name="preprocess-data",
    step_args=processor.run(
        code=f"{CODE_FOLDER}/preprocessor.py",
        inputs=[
            ProcessingInput(
                source=dataset_location,
                destination="/opt/ml/processing/input"
            ),
        ],
        outputs=[
            ProcessingOutput(
                output_name="train",
                source="/opt/ml/processing/train",
                destination=f"{S3_LOCATION}/preprocessing/train",
            ),
            ProcessingOutput(
                output_name="validation",
                source="/opt/ml/processing/validation",
                destination=f"{S3_LOCATION}/preprocessing/validation",
            ),
            ProcessingOutput(
                output_name="test",
                source="/opt/ml/processing/test",
                destination=f"{S3_LOCATION}/preprocessing/test",
            ),
            ProcessingOutput(
                output_name="model",
                source="/opt/ml/processing/model",
                destination=f"{S3_LOCATION}/preprocessing/model",
            ),
            ProcessingOutput(
                output_name="train-baseline",
                source="/opt/ml/processing/train-baseline",
                destination=f"{S3_LOCATION}/preprocessing/train-baseline",
            ),
            ProcessingOutput(
                output_name="test-baseline",
                source="/opt/ml/processing/test-baseline",
                destination=f"{S3_LOCATION}/preprocessing/test-baseline",
            ),
        ],
    ),
    cache_config=cache_config,
)



### Creating the Pipeline

In [12]:
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_definition_config import PipelineDefinitionConfig

pipeline_definition_config = PipelineDefinitionConfig(
    use_custom_job_prefix=True
)

preprocessing_pipeline = Pipeline(
    name="preprocessing-pipeline",
    parameters=[dataset_location],
    steps=[preprocessing_step],
    sagemaker_session=config["session"],
    pipeline_definition_config=pipeline_definition_config,
)

preprocessing_pipeline.upsert(role_arn=role)

{'PipelineArn': 'preprocessing-pipeline'}

In [13]:
# %%script false --no-raise-error

preprocessing_pipeline.start()



Starting execution for pipeline preprocessing-pipeline. Execution ID is b79c70ff-11bb-4273-b1d8-316854a38b0f




Starting pipeline step: 'preprocess-data'


INFO:sagemaker.local.image:'Docker Compose' found using Docker CLI.
INFO:sagemaker.local.local_session:Starting processing job
INFO:sagemaker.local.image:Using the long-lived AWS credentials found in session
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-7gha9:
    container_name: ej2dydfzed-algo-1-7gha9
    entrypoint:
    - python3
    - /opt/ml/processing/input/code/preprocessor.py
    environment:
    - '[Masked]'
    - '[Masked]'
    image: 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-7gha9
    stdin_open: true
    tty: true
    volumes:
    - /private/var/folders/ft/p0zj8zms6wldqq4t_fyn6w300000gn/T/tmpbqijv945/algo-1-7gha9/output:/opt/ml/output
    - /private/var/folders/ft/p0zj8zms6wldqq4t_fyn6w300000gn/T/tmpbqijv945/algo-1-7gha9/config:/opt/ml/config
    - /private/var/folders/ft/p0zj8zms6wldqq4t_

 Container ej2dydfzed-algo-1-7gha9  Creating
 Container ej2dydfzed-algo-1-7gha9  Created
Attaching to ej2dydfzed-algo-1-7gha9
ej2dydfzed-algo-1-7gha9 exited with code 0
Aborting on container exit...
 Container ej2dydfzed-algo-1-7gha9  Stopping
 Container ej2dydfzed-algo-1-7gha9  Stopped
===== Job Complete =====
Pipeline step 'preprocess-data' SUCCEEDED.
Pipeline execution b79c70ff-11bb-4273-b1d8-316854a38b0f SUCCEEDED


<sagemaker.local.entities._LocalPipelineExecution at 0x148ae2210>

## Building Models and the Training Pipeline

In [14]:
%%writefile {CODE_FOLDER}/train.py

import argparse
import os

import numpy as np
import pandas as pd

from pathlib import Path
from sklearn.metrics import accuracy_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD


def train(model_directory: str, train_path: str, validation_path: str, epochs: int =50, batch_size: int=32) -> None:
    """
    Train a model using the training and validation data sets.

    Args:
        model_directory: Directory where the model will be saved.
        train_path: Path to the training data set.
        validation_path: Path to the validation data set.
        epochs: Number of epochs to train the model.
        batch_size: Batch size used during training.
    """
    # Load training and validation data sets
    X_train = pd.read_csv(Path(train_path) / "train.csv")
    y_train = X_train[X_train.columns[-1]]
    X_train.drop(X_train.columns[-1], axis=1, inplace=True)

    X_validation = pd.read_csv(Path(validation_path) / "validation.csv")
    y_validation = X_validation[X_validation.columns[-1]]
    X_validation.drop(X_validation.columns[-1], axis=1, inplace=True)

    # Build a Sequential model
    model = Sequential([
        Dense(10, activation="relu", input_shape=(X_train.shape[1],)),
        Dense(8, activation="relu"),
        Dense(3, activation="softmax")
    ])

    model.compile(
        optimizer=SGD(learning_rate=0.01),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    
    model.fit(
        X_train,
        y_train,
        validation_data=(X_validation, y_validation),
        epochs=epochs,
        batch_size=batch_size,
        verbose=2
    )

    # Make predictions
    predictions = model.predict(X_validation)
    predictions = np.argmax(predictions, axis=-1)
    print(f"Validation accuracy: {accuracy_score(y_validation, predictions)}")

    # Save the model
    model_filepath = Path(model_directory) / "001"
    model.save(model_filepath)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--epochs", type=int, default=50)
    parser.add_argument("--batch_size", type=int, default=32)

    args, _ = parser.parse_known_args()

    train(
        model_directory=os.environ["SM_MODEL_DIR"],
        train_path=os.environ["SM_CHANNEL_TRAIN"],
        validation_path=os.environ["SM_CHANNEL_VALIDATION"],

        epochs=args.epochs,
        batch_size=args.batch_size
    )

Writing /Users/carlos/Projects/penguin-classifier/src/train.py


In [17]:
%%ipytest -s

import os
import shutil
import tarfile
import pytest
import tempfile
import joblib

from preprocessor import preprocess
from train import train


@pytest.fixture(scope="function", autouse=False)
def directory():
    directory = tempfile.mkdtemp()
    input_directory = Path(directory) / "input"
    input_directory.mkdir(parents=True, exist_ok=True)
    shutil.copy2(DATA_FILE_PATH, input_directory / "data.csv")
    
    directory = Path(directory)
    
    preprocess(base_directory=directory)
    train(
        model_directory=directory / "model",
        train_path=directory / "train", 
        validation_path=directory / "validation",
        epochs=1
    )
    
    yield directory
    
    shutil.rmtree(directory)


def test_train_saves_a_folder_with_model_assets(directory):
    output = os.listdir(directory / "model")
    assert "001" in output
    
    assets = os.listdir(directory / "model" / "001")
    assert "saved_model.pb" in assets

8/8 - 1s - loss: 1.1733 - accuracy: 0.3849 - val_loss: 1.1068 - val_accuracy: 0.4314 - 894ms/epoch - 112ms/step
Validation accuracy: 0.43137254901960786


INFO:tensorflow:Assets written to: /var/folders/ft/p0zj8zms6wldqq4t_fyn6w300000gn/T/tmp3lvhjb76/model/001/assets


[32m.[0m
[32m[32m[1m1 passed[0m[32m in 1.88s[0m[0m


### Setting up the Training Step

In [21]:
from sagemaker.tensorflow import TensorFlow

estimator = TensorFlow(
    base_job_name="training",
    entry_point=f"{CODE_FOLDER}/train.py",
    hyperparameters={
        "epochs": 50,
        "batch_size": 32
    },
    metrics_definitions=[
        {"Name": "loss", "Regex": "loss: ([0-9\\.]+)"},
        {"Name": "accuracy", "Regex": "accuracy: ([0-9\\.]+)"},
        {"Name": "val_loss", "Regex": "val_loss: ([0-9\\.]+)"},
        {"Name": "val_accuracy", "Regex": "val_accuracy: ([0-9\\.]+)"}
    ],
    image_uri=config["image"],
    framework_version=config["framework_version"],
    py_version=config["py_version"],
    instance_type=config["instance_type"],
    instance_count=1,
    disable_profiler=True,
    sagemaker_session=config["session"],
    role=role,
)

In [38]:
from sagemaker.workflow.steps import TrainingStep
from sagemaker.inputs import TrainingInput

train_model_step = TrainingStep(
    name="train-model",
    step_args=estimator.fit(
        inputs={
            "train": TrainingInput(
                s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv"
            ),
            "validation": TrainingInput(
                s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv"
            ),
        }
    ),
    cache_config=cache_config,
)



In [39]:
USE_TUNING_STEP = False and not LOCAL_MODE

In [40]:
from sagemaker.tuner import HyperparameterTuner
from sagemaker.parameter import IntegerParameter

tuner = HyperparameterTuner(
    estimator,
    objective_metric_name="val_accuracy",
    objective_type="Maximize",
    hyperparameter_ranges={
        "epochs": IntegerParameter(10, 50),
    },
    metric_definitions=[{"Name": "val_accuracy", "Regex": "val_accuracy: ([0-9\\.]+)"}],
    max_jobs=3,
    max_parallel_jobs=3,
)

In [41]:
from sagemaker.workflow.steps import TuningStep

tune_model_step = TuningStep(
    name="tune-model",
    step_args=tuner.fit(
        inputs={
            "train": TrainingInput(
                s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    ),
    cache_config=cache_config,
)

In [43]:
training_pipeline = Pipeline(
    name="training-pipeline",
    parameters=[dataset_location],
    steps=[
        preprocessing_step,
        tune_model_step if USE_TUNING_STEP else train_model_step,
    ],
    pipeline_definition_config=pipeline_definition_config,
    sagemaker_session=config["session"],
)

training_pipeline.upsert(role_arn=role)

{'PipelineArn': 'training-pipeline'}

In [44]:
# %%script false --no-raise-error

training_pipeline.start()

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


Starting execution for pipeline training-pipeline. Execution ID is 74ff83f6-3a19-494c-873c-75b6285d5977


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


Starting pipeline step: 'preprocess-data'


INFO:sagemaker.local.image:'Docker Compose' found using Docker CLI.
INFO:sagemaker.local.local_session:Starting processing job
INFO:sagemaker.local.image:Using the long-lived AWS credentials found in session
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-sm9z4:
    container_name: 7sl65b6h3l-algo-1-sm9z4
    entrypoint:
    - python3
    - /opt/ml/processing/input/code/preprocessor.py
    environment:
    - '[Masked]'
    - '[Masked]'
    image: 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-sm9z4
    stdin_open: true
    tty: true
    volumes:
    - /private/var/folders/ft/p0zj8zms6wldqq4t_fyn6w300000gn/T/tmpek9y26g5/algo-1-sm9z4/output:/opt/ml/output
    - /private/var/folders/ft/p0zj8zms6wldqq4t_fyn6w300000gn/T/tmpek9y26g5/algo-1-sm9z4/config:/opt/ml/config
    - /private/var/folders/ft/p0zj8zms6wldqq4t_

 Container 7sl65b6h3l-algo-1-sm9z4  Creating
 Container 7sl65b6h3l-algo-1-sm9z4  Created
Attaching to 7sl65b6h3l-algo-1-sm9z4
7sl65b6h3l-algo-1-sm9z4 exited with code 0
Aborting on container exit...
 Container 7sl65b6h3l-algo-1-sm9z4  Stopping
 Container 7sl65b6h3l-algo-1-sm9z4  Stopped
===== Job Complete =====
Pipeline step 'preprocess-data' SUCCEEDED.
Starting pipeline step: 'train-model'


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.local.image:'Docker Compose' found using Docker CLI.
INFO:sagemaker.local.local_session:Starting training job
INFO:sagemaker.local.image:Using the long-lived AWS credentials found in session
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-6qyoe:
    command: train
    container_name: k7nk09vkpw-algo-1-6qyoe
    environment:
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    image: 763104351884.dkr.ecr.us-east-2.amazonaws.com/tensorflow-training:2.11-cpu-py39
    networks:
      sagemaker-local:
        aliases:
        - algo-1-6qyoe
    stdin_open: true
    tty: true
    volumes:
    - /private/var/folders/ft/p0zj8zms6wldqq4t_fyn6w300000gn/T/tmpsvwwmy6a/algo-1-6qyoe/output:/opt/ml/output
    - /private/var/folders/ft/p0zj8zms6wldqq4t_fyn6w300000gn/T/tmpsvwwmy6a/al

Login Succeeded
