In [1]:
# --- Celda 1: Setup y Configuración ---
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, ContinuousParameter, CategoricalParameter

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = 'preauth-challenge-ai-20252' 

input_s3_path = f's3://{bucket}/final_data/credir_risk_reto_classified.csv'
output_s3_path = f's3://{bucket}/hpo_outputs/'

print(f"Input data: {input_s3_path}")
print(f"HPO output prefix: {output_s3_path}")

max_jobs = 6 
max_parallel_jobs = 3 

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Input data: s3://preauth-challenge-ai-20252/final_data/credir_risk_reto_classified.csv
HPO output prefix: s3://preauth-challenge-ai-20252/hpo_outputs/


In [2]:
# Cell 2: Estimator Definition 
estimator = PyTorch(
    entry_point='train.py',
    source_dir='./',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='2.0.0',
    py_version='py310',
    hyperparameters={'max-len': 64}
)

In [3]:
# Cell 3: Hyperparameter Tuner Definition (VERSIÓN REFINADA)

hyperparameter_ranges = {
    'learning-rate': ContinuousParameter(1e-5, 8e-5),  
    'epochs': IntegerParameter(3, 5),                  
    'train-batch-size': CategoricalParameter([8, 16]),
    'warmup-steps': IntegerParameter(0, 50)            
}

objective_metric_name = 'validation:accuracy'

metric_definitions = [{
    'Name': objective_metric_name,
    'Regex': r"Validation Accuracy: (\S+)"
}]

tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    metric_definitions=metric_definitions,
    max_jobs=max_jobs,
    max_parallel_jobs=max_parallel_jobs,
    objective_type='Maximize'
)


In [4]:
# Cell 4: Launch HPO and Monitor Progress
import boto3
from time import sleep
from tqdm import tqdm

sm_client = boto3.client('sagemaker')

job_name = 'credit-risk-hpo-v7-cpu-final' 
tuner.fit({'training': input_s3_path}, job_name=job_name, wait=False, logs=False)
print(f"HPO job '{job_name}' launched. Supervising progress...")

completed = 0
bar = tqdm(total=max_jobs, desc='Completed trials')

while True:
    hpo_desc = sm_client.describe_hyper_parameter_tuning_job(
        HyperParameterTuningJobName=job_name
    )
    status = hpo_desc['HyperParameterTuningJobStatus']
    
    job_counters = hpo_desc['TrainingJobStatusCounters']
    new_completed = job_counters.get('Completed', 0)

    if new_completed > completed:
        bar.update(new_completed - completed)
        completed = new_completed

    if status in ['Completed', 'Failed', 'Stopped']:
        if bar.n < max_jobs:
             bar.update(max_jobs - bar.n)
        break
        
    sleep(60)

bar.close()
print(f'\nHyperparameter tuning finished with status: {status}')

if status == 'Completed':
    best_job = tuner.best_training_job()
    print(f"Best training job: {best_job}")
    print(f"Artifacts for the best job are in: s3://{bucket}/{best_job}/output/model.tar.gz")
else:
    print("HPO did not complete successfully. No best job to show.")

No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


HPO job 'credit-risk-hpo-v7-cpu-final' launched. Supervising progress...


Completed trials: 100%|██████████| 6/6 [31:04<00:00, 310.72s/it]


Hyperparameter tuning finished with status: Completed
Best training job: credit-risk-hpo-v7-cpu-final-002-048de319
Artifacts for the best job are in: s3://preauth-challenge-ai-20252/credit-risk-hpo-v7-cpu-final-002-048de319/output/model.tar.gz





In [5]:
# Cell 5: Obtener artefactos del mejor modelo
import boto3

sm_client = boto3.client('sagemaker')

try:
    tuning_job_name = tuner.latest_tuning_job.job_name if 'tuner' in locals() and tuner.latest_tuning_job else job_name
    tuner_description = sm_client.describe_hyper_parameter_tuning_job(
        HyperParameterTuningJobName=tuning_job_name
    )
    
    if tuner_description['HyperParameterTuningJobStatus'] == 'Completed':
        best_training_job_name = tuner_description['BestTrainingJob']['TrainingJobName']
        print(f"Mejor job de entrenamiento: {best_training_job_name}")

        job_description = sm_client.describe_training_job(TrainingJobName=best_training_job_name)
        model_artifacts_s3_path = job_description['ModelArtifacts']['S3ModelArtifacts']

        print(f"\nEl MEJOR MODELO se encuentra en: {model_artifacts_s3_path}")

        %store model_artifacts_s3_path
        print("\nLa ruta a los artefactos ha sido guardada para el siguiente notebook.")
    else:
        print("El trabajo de HPO no se completó. No se puede obtener el mejor modelo.")

except Exception as e:
    print(f"Error al obtener los artefactos: {e}")

Mejor job de entrenamiento: credit-risk-hpo-v7-cpu-final-002-048de319

El MEJOR MODELO se encuentra en: s3://sagemaker-us-east-1-318399884747/credit-risk-hpo-v7-cpu-final-002-048de319/output/model.tar.gz
Stored 'model_artifacts_s3_path' (str)

La ruta a los artefactos ha sido guardada para el siguiente notebook.
