In [1]:
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension --sys-prefix

usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: console dejavu events execute fileid kernel kernelspec
lab labextension labhub migrate nbconvert notebook run server troubleshoot
trust

Jupyter command `jupyter-nbextension` not found.


In [2]:
# Celda 1: Imports, configuración de cliente Bedrock y variables S3
import boto3
import pandas as pd
import json
import os
from tqdm.auto import tqdm  # Detecta automáticamente si usar widgets o consola de texto

# Parámetros de S3
default_bucket = "preauth-challenge-ai-20252"
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME", default_bucket)
S3_INPUT_PATH = f"s3://{S3_BUCKET_NAME}/training_data/credir_risk_reto.csv"
S3_OUTPUT_PATH = f"s3://{S3_BUCKET_NAME}/generated_data/credir_risk_reto_generated.csv"

# Inicializar Bedrock Runtime (Amazon Nova Micro)
print("Inicializando cliente de AWS Bedrock (Bedrock Runtime)...")
bedrock_runtime = boto3.client(
    service_name='bedrock-runtime',
    region_name='us-east-1'
)
MODEL_ID = 'amazon.nova-micro-v1:0'
print(f"Configuración completada. Bucket S3: {S3_BUCKET_NAME}, Modelo: {MODEL_ID}")

# Mapeo de códigos de empleo a texto legible
job_map = {
    0: "unskilled and non-resident",
    1: "unskilled and resident",
    2: "skilled",
    3: "highly skilled"
}

Inicializando cliente de AWS Bedrock (Bedrock Runtime)...
Configuración completada. Bucket S3: preauth-challenge-ai-20252, Modelo: amazon.nova-micro-v1:0


In [3]:
# Celda 2: Carga de datos desde S3
def load_dataset(path):
    print(f"Intentando cargar el dataset desde: {path}")
    try:
        df = pd.read_csv(path)
        print(f"Dataset original cargado exitosamente. Registros: {df.shape[0]}")
        return df
    except Exception as e:
        raise RuntimeError(
            f"No se pudo cargar el archivo desde S3. Verifica bucket, ruta y permisos. Detalle: {e}"
        )

# Carga el DataFrame
df_original = load_dataset(S3_INPUT_PATH)
display(df_original.head())

Intentando cargar el dataset desde: s3://preauth-challenge-ai-20252/training_data/credir_risk_reto.csv
Dataset original cargado exitosamente. Registros: 1000


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose
0,67,male,2,own,,little,1169,6,radio/TV
1,22,female,2,own,little,moderate,5951,48,radio/TV
2,49,male,1,own,little,,2096,12,education
3,45,male,2,free,little,little,7882,42,furniture/equipment
4,53,male,2,free,little,little,4870,24,car


In [6]:
# Celda 3: Definición de la función de generación 
def generate_credit_description(row, client, model_id):
    """
    Genera una breve descripción del perfil de riesgo crediticio usando Amazon Nova Micro (API Converse).
    """
    user_text = (
        "Provide a brief description of the credit risk profile in two short sentences. "
        + f"A {row.Age}-year-old {row.Sex.lower()} customer, employed as a {job_map[row.Job]}, "
        + f"lives in a {row.Housing.lower()} housing situation. They have a savings account categorized as "
        + f"'{row['Saving accounts']}' and a checking account listed as '{row['Checking account']}'. "
        + f"The customer has applied for a credit amount of {row['Credit amount']} to be repaid over "
        + f"{row['Duration']} months for the purpose of '{row['Purpose']}'."
    )
    user_message = {"role": "user", "content": [{"text": user_text}]}

    try:
        response = client.converse(
            modelId=model_id,
            messages=[user_message],
            inferenceConfig={
                "maxTokens": 64,
                "temperature": 1.0,  
                "topP": 0.9
            }
        )
        return response["output"]["message"]["content"][0]["text"].strip()
    except Exception as e:
        raise RuntimeError(f"Error al invocar Converse: {e}")

In [9]:
# Celda 4: Generación y guardado de descripciones
def generate_and_save(df):
    to_process = df
    total = to_process.shape[0]
    print(f"Iniciando la generación de descripciones para {total} registros...")
    descriptions = []
    for idx, row in tqdm(to_process.iterrows(), total=total, desc="Generando Descripciones"):
        try:
            descriptions.append(generate_credit_description(row, bedrock_runtime, MODEL_ID))
        except Exception as e:
            print(f"Error en fila {idx}: {e}")
            break
    df['Description'] = descriptions
    print("Generación completada.")
    df.to_csv(S3_OUTPUT_PATH, index=False)
    print(f"Archivo guardado en: {S3_OUTPUT_PATH}")

generate_and_save(df_original)

Iniciando la generación de descripciones para 1000 registros...


Generando Descripciones:   0%|          | 0/1000 [00:00<?, ?it/s]

Generación completada.
Archivo guardado en: s3://preauth-challenge-ai-20252/generated_data/credir_risk_reto_generated.csv


In [13]:
display(df_original["Description"])

0      The 67-year-old male customer, employed and re...
1      The credit risk profile for this 22-year-old f...
2      The credit risk profile for this 49-year-old m...
3      The 45-year-old male customer, employed in a s...
4      The 53-year-old skilled male, with limited sav...
                             ...                        
995    The credit risk profile for this 31-year-old f...
996    The credit risk profile for this 40-year-old m...
997    The credit risk profile for this 38-year-old s...
998    The credit risk profile for this 23-year-old m...
999    The credit risk profile for this 27-year-old m...
Name: Description, Length: 1000, dtype: object