In [4]:
import sagemaker
from sagemaker.session import Session
from sagemaker import get_execution_role
from sagemaker import LinearLearner
import numpy as np
import pandas as pd

# Configuración inicial
role = get_execution_role()
bucket = 'trabajofinallaboratorio'
data_key = 'synthetic_customer_data.csv'
data_location = f's3://{bucket}/{data_key}'



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [5]:
# Cargar el dataset desde S3
df = pd.read_csv(data_location)


severe performance issues, see also https://github.com/dask/dask/issues/10276

To fix, you should specify a lower version bound on s3fs, or
update the current installation.



In [9]:
!pip install --upgrade s3fs



In [6]:
# Preprocesamiento del dataset
# 1. Eliminar la columna 'customer_id'
df = df.drop(columns=['customer_id'])

# 2. Transformar la columna 'gender' en valores binarios
df['gender'] = df['gender'].apply(lambda x: 1 if x == 'Male' else 0)

# 3. Codificar la columna 'customer_segment' como numérica
df['customer_segment'] = df['customer_segment'].astype('category')
customer_segment_map = dict(enumerate(df['customer_segment'].cat.categories))
df['customer_segment'] = df['customer_segment'].cat.codes

# Imprimir el significado de los códigos asignados
print("Significado de los valores en 'customer_segment':")
for code, category in customer_segment_map.items():
    print(f"{code}: {category}")


# Dividir el dataset en conjuntos de entrenamiento y prueba
customer_data = df.values
num_train = int(customer_data.shape[0] * 0.80)  # 80% para entrenamiento

X_train = customer_data[:num_train, :-1]  # Todas las columnas excepto la última
y_train = customer_data[:num_train, -1]   # Última columna (target)

X_test = customer_data[num_train:, :-1]
y_test = customer_data[num_train:, -1]


Significado de los valores en 'customer_segment':
0: high_value
1: low_value
2: medium_value


In [13]:
# Crear la ruta en S3 para guardar el modelo
s3_prefix = 'customer-segmentation'
output_path = f's3://{bucket}/{s3_prefix}/'

In [14]:
# Instanciar el modelo Linear Learner
session = sagemaker.Session()
linear = LinearLearner(
    role=role,
    train_instance_count=1,
    train_instance_type='ml.m4.xlarge',
    predictor_type='multiclass_classifier',  # Cambiado a clasificación multiclase
    num_classes=len(df['customer_segment'].unique()),  # Número de clases
    output_path=output_path,
    sagemaker_session=session,
    epochs=20
)

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [15]:
# Convertir el conjunto de datos en RecordSet
training_recordset = linear.record_set(train=X_train.astype('float32'), labels=y_train.astype('float32'))

In [16]:
# Entrenar el modelo
linear.fit(training_recordset)

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: linear-learner-2024-12-08-15-40-04-463


2024-12-08 15:40:05 Starting - Starting the training job...
2024-12-08 15:40:20 Starting - Preparing the instances for training...
2024-12-08 15:40:50 Downloading - Downloading input data...
2024-12-08 15:41:20 Downloading - Downloading the training image......
2024-12-08 15:42:31 Training - Training image download completed. Training in progress....
2024-12-08 15:43:02 Uploading - Uploading generated training model[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[12/08/2024 15:42:54 INFO 140469236098880] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'i

In [17]:
# Desplegar el modelo en un endpoint
linear_predictor = linear.deploy(initial_instance_count=1, instance_type='ml.t2.medium')


INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating model with name: linear-learner-2024-12-08-15-44-17-941
INFO:sagemaker:Creating endpoint-config with name linear-learner-2024-12-08-15-44-17-941
INFO:sagemaker:Creating endpoint with name linear-learner-2024-12-08-15-44-17-941


------------------!

AttributeError: 'str' object has no attribute 'shape'

In [18]:
# Imprimir el nombre del endpoint
print(f"Endpoint name: {linear_predictor.endpoint_name}")

Endpoint name: linear-learner-2024-12-08-15-44-17-941


In [20]:
import boto3

# Crear cliente de SageMaker
sagemaker_client = boto3.client('sagemaker')

# Listar todos los endpoints
endpoints = sagemaker_client.list_endpoints()

# Imprimir detalles de los endpoints
for endpoint in endpoints['Endpoints']:
    print(f"Nombre: {endpoint['EndpointName']}")
    print(f"Estado: {endpoint['EndpointStatus']}")
    print(f"Creado: {endpoint['CreationTime']}")
    print("-------------------")



Nombre: linear-learner-2024-12-08-15-44-17-941
Estado: InService
Creado: 2024-12-08 15:44:19.242000+00:00
-------------------


In [1]:
import boto3
import csv
import io

# Configuración
runtime = boto3.client('sagemaker-runtime')
endpoint_name = 'linear-learner-2024-12-08-15-44-17-941'

# Datos de prueba
test_sample = [30, 50090, 20000, 50, 400, 70, 1, 10, 30, 5]

# Convertir a CSV
buf = io.StringIO()
writer = csv.writer(buf)
writer.writerow(test_sample)
payload = buf.getvalue()

# Realizar predicción
try:
    response = runtime.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType='text/csv',
        Body=payload
    )
    result = response['Body'].read().decode()
    print(f"Input: {test_sample}")
    print(f"Predicción: {result}")
except Exception as e:
    print(f"Error: {str(e)}")

Input: [30, 50090, 20000, 50, 400, 70, 1, 10, 30, 5]
Predicción: {"predictions": [{"score": [1.000000013351432e-10, 1.0, 1.000000013351432e-10], "predicted_label": 1}]}


In [26]:
import boto3
sagemaker_client = boto3.client('sagemaker')
response = sagemaker_client.list_endpoints()
for endpoint in response['Endpoints']:
    print(endpoint['EndpointName'], endpoint['EndpointStatus'])

linear-learner-2024-12-08-15-44-17-941 InService


In [29]:
response = sagemaker_client.describe_endpoint(EndpointName='linear-learner-2024-12-08-15-44-17-941')
print(response['EndpointStatus'])

InService


In [7]:
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


X_test shape: (200, 10)
y_test shape: (200,)


In [9]:
# Evaluación del modelo con el conjunto de prueba
import json
import boto3
from sklearn.metrics import classification_report, f1_score, accuracy_score
import numpy as np

# Convertir el conjunto de prueba a CSV para enviar al endpoint
def get_predictions_from_endpoint(endpoint_name, X_test):
    predictions = []
    runtime = boto3.client('sagemaker-runtime')
    
    for row in X_test:
        buf = io.StringIO()
        writer = csv.writer(buf)
        writer.writerow(row)
        payload = buf.getvalue()
        
        response = runtime.invoke_endpoint(
            EndpointName=endpoint_name,
            ContentType='text/csv',
            Body=payload
        )
        result = json.loads(response['Body'].read().decode())
        # Suponiendo que 'predicted_label' contiene la clase predicha
        predictions.append(int(result['predictions'][0]['predicted_label']))
    
    return predictions

# Obtener las predicciones del endpoint
y_pred = get_predictions_from_endpoint(endpoint_name, X_test)

# Calcular métricas de evaluación
print("Reporte de clasificación:")
print(classification_report(y_test, y_pred, target_names=list(customer_segment_map.values())))

# F1-score general
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"F1-score (ponderado): {f1}")

# Precisión global
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión: {accuracy}")


Reporte de clasificación:
              precision    recall  f1-score   support

  high_value       1.00      0.48      0.65        25
   low_value       0.00      0.00      0.00         2
medium_value       0.92      1.00      0.96       173

    accuracy                           0.93       200
   macro avg       0.64      0.49      0.54       200
weighted avg       0.92      0.93      0.91       200

F1-score (ponderado): 0.9101392528262334
Precisión: 0.925


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
