In [1]:
import sys
print(sys.executable)

/mnt/d/AWS/fraud-detection/.env/bin/python3


### Prepare data

In [2]:
import boto3
import os

from sagemaker.local import LocalSession

sagemaker_session = LocalSession()
sagemaker_session.config = {'local': {'local_code': True}}


os.makedirs("local_output", exist_ok=True)




sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/alber/.config/sagemaker/config.yaml


In [5]:
from sagemaker import image_uris

region = boto3.Session().region_name
version = "1.3-1"

image_uri = image_uris.retrieve("xgboost", region, version)
image_uri

'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1'

In [6]:
# demo create data

import pandas as pd
import numpy as np

# Crear datos de ejemplo
data = np.random.rand(100, 10)  # 100 filas, 10 características
labels = np.random.randint(2, size=100)  # 100 valores binarios

df = pd.DataFrame(data, columns=[f"feature_{i}" for i in range(10)])
df["label"] = labels

# Guardar los datos como CSV
os.makedirs("data", exist_ok=True)
df.to_csv("data/train.csv", index=False, header=False)
df.to_csv("data/validation.csv", index=False, header=False)

### Initializate model

In [8]:
from sagemaker.estimator import Estimator

role = "arn:aws:iam::390844780979:role/SageMakerRole"

hyperparameters = {
    'max_depth': 5,
    'eta': 0.2,
    'objective': 'reg:squarederror',
    'num_round': 50
}

estimator = Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="local",
    output_path="file://local_output",
    sagemaker_session=sagemaker_session,
    hyperparameters=hyperparameters
    
)



In [11]:
from urllib.parse import urlparse
import os

train_uri = 'file:///mnt/d/AWS/fraud-detection/src/train/data/train.csv'
valid_uri = 'file:///mnt/d/AWS/fraud-detection/src/train/data/validation.csv'

print(os.path.exists(urlparse(train_uri).path))
print(os.path.exists(urlparse(valid_uri).path))

True
True


### Training model

In [3]:
from sagemaker.mxnet import MXNet

# Configure an MXNet Estimator (no training happens yet)
mxnet_estimator = MXNet('train.py',
                        role='SageMakerRole',
                        instance_type='local',
                        instance_count=1,
                        framework_version='1.2.1')

# In Local Mode, fit will pull the MXNet container Docker image and run it locally
mxnet_estimator.fit("s3://fraud-detection-data-ars/train_data/train.csv")

# Alternatively, you can train using data in your local file system. This is only supported in Local mode.
#mxnet_estimator.fit('file:///tmp/my_training_data')

# Deploys the model that was generated by fit() to local endpoint in a container
#mxnet_predictor = mxnet_estimator.deploy(initial_instance_count=1, instance_type='local')

# Serializes data and makes a prediction request to the local endpoint
#response = mxnet_predictor.predict(data)

# Tears down the endpoint container and deletes the corresponding endpoint configuration
#mxnet_predictor.delete_endpoint()

# Deletes the model
#mxnet_predictor.delete_model()

In [9]:
from sagemaker.inputs import TrainingInput

estimator.fit(
    {
        "train": TrainingInput(
            "s3://fraud-detection-data-ars/train_data/train.csv",
            content_type="csv"
        ),
        "validation": TrainingInput(
            "s3://fraud-detection-data-ars/train_data/validation.csv",
            content_type="csv"
            
        )
    }
)

 Container dhf9etdhc3-algo-1-9kaf0  Creating
 Container dhf9etdhc3-algo-1-9kaf0  Created
Attaching to dhf9etdhc3-algo-1-9kaf0
dhf9etdhc3-algo-1-9kaf0  | [2025-04-04 15:08:39.039 b8294b5138bd:1 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None
dhf9etdhc3-algo-1-9kaf0  | [2025-04-04 15:08:39.086 b8294b5138bd:1 INFO profiler_config_parser.py:111] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.
dhf9etdhc3-algo-1-9kaf0  | [2025-04-04:15:08:39:INFO] Imported framework sagemaker_xgboost_container.training
dhf9etdhc3-algo-1-9kaf0  | [2025-04-04:15:08:39:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.
dhf9etdhc3-algo-1-9kaf0  | Returning the value itself
dhf9etdhc3-algo-1-9kaf0  | [2025-04-04:15:08:39:INFO] No GPUs detected (normal if no gpus installed)
dhf9etdhc3-algo-1-9kaf0  | [2025-04-04:15:08:39:INFO] Running XGBoost Sagemaker in algorithm mode
dhf9etdhc3-algo-1-9kaf0  | [2025-04-04:15:08:39:ERROR] Reporting traini