# Creación de Experimentos y Trials.

In [None]:
import datetime

import pandas as pd
import sklearn

In [3]:
import sagemaker
import json
import boto3

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name

bucket = sess.default_bucket()
prefix = 'module_4/part_1'

print(role)
print(sess)
print(region)
print(bucket)
print(prefix)

arn:aws:iam::467432373215:role/service-role/AmazonSageMaker-ExecutionRole-20221206T164397
<sagemaker.session.Session object at 0x7fda165985d0>
eu-west-1
sagemaker-eu-west-1-467432373215
module_4/part_1


In [None]:
pip install sagemaker-experiments

In [4]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from botocore.exceptions import ClientError

#### Preparación de los datos

- https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html
- *Many Amazon SageMaker algorithms support training with data in CSV format. To use data in CSV format for training, in the input data channel specification, specify text/csv as the ContentType. Amazon SageMaker requires that a CSV file does not have a header record and that the target variable is in the first column.*


#### Entrenamiento del modelo linear learner
- https://docs.aws.amazon.com/sagemaker/latest/dg/linear-learner.html

In [5]:
image = sagemaker.image_uris.retrieve(region=region, framework="linear-learner")
print(image)

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:1


In [6]:
s3_train_data = f's3://{bucket}/{prefix}/data/train.csv'
s3_validation_data = f's3://{bucket}/{prefix}/data/validation.csv'

print(s3_train_data)
print(s3_validation_data)

s3://sagemaker-eu-west-1-467432373215/module_4/part_1/data/train.csv
s3://sagemaker-eu-west-1-467432373215/module_4/part_1/data/validation.csv


In [7]:
train_input = sagemaker.TrainingInput(
    s3_train_data, 
    content_type="text/csv",
)
validation_input = sagemaker.TrainingInput(
    s3_validation_data,
    content_type="text/csv",
)

data_channels = {'train': train_input, 
                 'validation': validation_input}

In [8]:
experiment_name = 'module-4-part-2-linear-learner'

try:
    experiment = Experiment.create(
        experiment_name=experiment_name, 
        description='Training a linear-learner model using boston dataset.'
    )
except ClientError as e:
    print(f'{experiment_name} experiment already exists! Reusing the existing experiment.')

In [9]:
now = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
jobname = f'{experiment_name}-{now}'
jobname

'module-4-part-2-linear-learner-2022-12-12-12-16-56'

In [10]:
s3_output_location = f's3://{bucket}/{prefix}/output'

linear = sagemaker.estimator.Estimator(
    image_uri=image,
    role=role,
    instance_count=1,
    instance_type="ml.c4.xlarge",
    output_path=s3_output_location,
    sagemaker_session=sess,
)

#### Seleccionamos los hyperparámetros
- https://docs.aws.amazon.com/sagemaker/latest/dg/ll_hyperparameters.html

In [11]:
linear.set_hyperparameters(predictor_type="regressor", mini_batch_size=50)

In [12]:
# Creating a new trial for the experiment
exp_trial = Trial.create(experiment_name=experiment_name, 
                         trial_name=jobname)

experiment_config={'ExperimentName': experiment_name,
                   'TrialName': exp_trial.trial_name,
                   'TrialComponentDisplayName': 'Training'}

In [13]:
linear.fit(inputs=data_channels,
              job_name=jobname,
              experiment_config=experiment_config,
              logs=True)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: module-4-part-2-linear-learner-2022-12-12-12-16-56


2022-12-12 12:17:08 Starting - Starting the training job...
2022-12-12 12:17:31 Starting - Preparing the instances for trainingProfilerReport-1670847427: InProgress
...............
2022-12-12 12:20:04 Downloading - Downloading input data...
2022-12-12 12:20:35 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[12/12/2022 12:20:38 INFO 140639283193664] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss':