# Dependencies

## Imports

In [2]:
import sagemaker
from sagemaker import image_uris
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput

## Entities

In [9]:
region = 'us-east-1'
bucket_name = 'jpx-stock-prediction'
prefix = 'xgboost-builtin'

In [12]:
role=sagemaker.get_execution_role()
print(role)

arn:aws:iam::949672723150:role/qte4288_SageMakerExecutionRole


# Model

## Find an XGBoost image URI and build an XGBoost Container

In [6]:
xgboost_container = sagemaker.image_uris.retrieve("xgboost", region, "1.7-1")
display(xgboost_container)

'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1'

## Initialize Parameters

In [7]:
hyperparameters = {
        "max_depth":"5",
        "eta":"0.2",
        "gamma":"4",
        "min_child_weight":"6",
        "subsample":"0.7",
        "objective":"reg:squarederror",
        "num_round":"50"}

## Output path to save the trained model

In [10]:
output_path = 's3://{}/{}/{}/'.format(bucket_name,prefix,'output')
print(output_path)

s3://jpx-stock-prediction/xgboost-builtin/output/


## Sagemaker estimator to call the XGBoost container

In [13]:
estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container,
                                          hyperparameters=hyperparameters,
                                          role=role,
                                          instance_count=1,
                                          instance_type='ml.g4dn.2xlarge',
                                          volume_size=5,
                                          output_path=output_path,
                                          use_spot_instances = True,
                                          max_run = 300,
                                          max_wait = 600)

## Training and Validation Set Path

In [14]:
content_type = "csv"
train_input = TrainingInput("s3://{}/{}/{}/".format(bucket_name,prefix,'train'),content_type=content_type)
test_input = TrainingInput("s3://{}/{}/{}/".format(bucket_name,prefix,'test'),content_type=content_type)

## Fit

In [15]:
estimator.fit({'train':train_input,'validation':test_input})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-08-24-09-36-23-747


2024-08-24 09:36:24 Starting - Starting the training job...
2024-08-24 09:36:39 Starting - Preparing the instances for training...
2024-08-24 09:37:13 Downloading - Downloading input data.........
2024-08-24 09:38:45 Training - Training image download completed. Training in progress.[34m[2024-08-24 09:38:48.237 ip-10-2-73-133.ec2.internal:14 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2024-08-24 09:38:48.258 ip-10-2-73-133.ec2.internal:14 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2024-08-24:09:38:48:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2024-08-24:09:38:48:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34m[2024-08-24:09:38:48:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2024-08-24:09:38:48:INFO] Determined 1 GPU(s) available on the instance.[0m
[34m[2024-08-24:09:38:48:INFO] Determined delimiter of CSV input is