In [None]:
# Install required libraries
%pip -q install sagemaker==2.219.0 boto3 pandas

import boto3
import sagemaker
import os
import pandas as pd
from sagemaker.estimator import Estimator

In [None]:
session = sagemaker.Session()
region = session.boto_region_name
account = boto3.client("sts").get_caller_identity()["Account"]
role = sagemaker.get_execution_role()

In [None]:
repository = "bank-marketing-custom"
ecr = boto3.client("ecr", region_name=region)


try:
    ecr.create_repository(repositoryName=repository)
except ecr.exceptions.RepositoryAlreadyExistsException:
    pass

ecr_uri = f"{account}.dkr.ecr.{region}.amazonaws.com/{repository}:latest"
print("ECR URI:", ecr_uri)

In [None]:
!aws ecr get-login-password --region {region} | docker login --username AWS --password-stdin {account}.dkr.ecr.{region}.amazonaws.com

In [None]:
!cd sagemaker/custom_container && docker build -t {repository}:latest .
!docker tag {repository}:latest {ecr_uri}
!docker push {ecr_uri}

In [None]:
DATA_PATH = "data/bank-additional.csv"
df = pd.read_csv(DATA_PATH, sep=";")


df['y'] = df['y'].map({'yes': 1, 'no': 0})


df.to_csv("train.csv", index=False)

bucket = session.default_bucket()
prefix = "bank-marketing-custom"


s3_train = session.upload_data("train.csv", bucket=bucket, key_prefix=f"{prefix}/input/train")
print("S3 Train URI:", s3_train)

In [None]:
est = Estimator(
    image_uri=ecr_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    sagemaker_session=session,
    output_path=f"s3://{bucket}/{prefix}/output",
    environment={
        "SM_MODEL_DIR": "/opt/ml/model",       
        "SM_CHANNEL_TRAIN": "/opt/ml/input/data/train",
    },
)


est.fit({"train": s3_train})
