In [33]:
import boto3
import sagemaker
import time
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.sklearn.processing import SKLearnProcessor


sess = sagemaker.Session()
role = sagemaker.get_execution_role()

processing_instance_type = "ml.t3.medium"
processing_instance_count = 1
region = boto3.Session().region_name

print(f"{role} {region}")


sm = boto3.Session().client(service_name="sagemaker", region_name=region)
s3 = boto3.Session().client(service_name="s3", region_name=region)

arn:aws:iam::354918397522:role/service-role/AmazonSageMaker-ExecutionRole-20241215T191004 eu-north-1


In [34]:
!pip install sagemaker-experiments



In [35]:
!aws s3 ls s3://dkohlsdorf-experiments

2024-12-15 19:51:43   51253380 historical.csv


In [36]:
timestamp = int(time.time())

experiment = Experiment.create(
    experiment_name="Daniels-Demand-{}".format(timestamp),
    description="Demand Prediction",
    sagemaker_boto_client=sm,
)

experiment_name = experiment.experiment_name
print("Experiment name: {}".format(experiment_name))


Experiment name: Daniels-Demand-1734294252


In [37]:
timestamp = int(time.time())

trial = Trial.create(
    trial_name="trial-{}".format(timestamp), experiment_name=experiment_name, sagemaker_boto_client=sm
)

trial_name = trial.trial_name
print("Trial name: {}".format(trial_name))

Trial name: trial-1734294253


In [38]:
processor = SKLearnProcessor(
    framework_version="0.23-1",
    role=role,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    env={"AWS_DEFAULT_REGION": region},
    max_runtime_in_seconds=7200,
)

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [39]:
experiment_config = {
    "ExperimentName": experiment_name,
    "TrialName": trial_name,
    "TrialComponentDisplayName": "prepare",
}

In [40]:
processor.run(
    code="preprocessing.py",
    inputs=[
        ProcessingInput(
            input_name="raw-input-data",
            source='s3://dkohlsdorf-experiments/historical.csv',
            destination="/opt/ml/processing/input/data/",
            s3_data_distribution_type="ShardedByS3Key",
        )
    ],
    outputs=[
        ProcessingOutput(
            output_name="output-data", s3_upload_mode="EndOfJob", source="/opt/ml/processing/output/"
        ),
    ],
    arguments=[
        "--input",
        "/opt/ml/processing/input/data/historical.csv",
        "--output"
        "/opt/ml/processing/output/x_notebook.csv"
    ],
    experiment_config=experiment_config,
    logs=True,
    wait=False,
)


INFO:sagemaker:Creating processing-job with name sagemaker-scikit-learn-2024-12-15-20-24-13-996


ClientError: An error occurred (ValidationException) when calling the CreateProcessingJob operation: 