In [8]:
import time
import uuid
import boto3
import sagemaker

sess = sagemaker.Session()
region = boto3.Session().region_name
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()

In [9]:
#model_training_s3_uri = 's3://dsoaws/nyc-taxi-orig-cleaned-dropped-parquet-per-year-multiple-files/'

model_training_s3_uri = 's3://dsoaws/nyc-taxi-orig-cleaned-dropped-parquet-all-years-multiple-files-1TB/'

### Configure the algorithm and training job

The Training Job hyperparameters are set. For more information on XGBoost Hyperparameters, 
see https://xgboost.readthedocs.io/en/latest/parameter.html.

In [10]:
container = sagemaker.image_uris.retrieve("xgboost", region, "1.5-1")

output_content_type = "parquet"

train_content_type = (
    "application/x-parquet" if output_content_type.upper() == "PARQUET"
    else "text/csv"
)
train_input = sagemaker.inputs.TrainingInput(
    s3_data=model_training_s3_uri,
    content_type=train_content_type,
    distribution='ShardedByS3Key',
    input_mode='FastFile'
)

hyperparameters = {
    "eta": "0.2",
    "gamma": "4",
    "max_depth": "5",
    "min_child_weight": "6",
    "num_round": "50",
    "objective": "reg:squarederror",
    "subsample": "0.7"
}

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


### Start the Training Job

The TrainingJob configurations are set using the SageMaker Python SDK Estimator, and which is fit using 
the training data from the Processing Job that was run earlier.

In [11]:
estimator = sagemaker.estimator.Estimator(
    container,
    role,
    hyperparameters=hyperparameters,
    instance_count=6,
    instance_type="ml.m5.24xlarge",
    volume_size=200,
)

In [12]:
import time
import sagemaker

# m5.24xlarge (6 incl leader node) - 12.85 minutes
training_job_results = estimator.fit({"train": train_input})
print(training_job_results)

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-03-08-01-26-56-469


2023-03-08 01:26:56 Starting - Starting the training job...
2023-03-08 01:27:13 Starting - Preparing the instances for training......
2023-03-08 01:28:26 Downloading - Downloading input data......
2023-03-08 01:29:02 Training - Training image download completed. Training in progress...[36m[2023-03-08 01:29:28.748 ip-10-0-234-157.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[36m[2023-03-08 01:29:28.806 ip-10-0-234-157.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[36m[2023-03-08:01:29:29:INFO] Imported framework sagemaker_xgboost_container.training[0m
[36m[2023-03-08:01:29:29:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[36mReturning the value itself[0m
[36m[2023-03-08:01:29:29:INFO] No GPUs detected (normal if no gpus installed)[0m
[36m[2023-03-08:01:29:29:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[36m[2023-03-08:01:29:29:INFO] Determined 0 GPU(s) available on t