Este pipeline toma historias de usuario del dataset salony_train.csv y las descompone
en tareas de desarrollo

In [5]:
import pandas as pd
import argparse
from pathlib import Path
from typing import Dict

from simple_pipeline import SimplePipeline
from simple_pipeline.steps import LoadDataFrame, OllamaLLMStep, OllamaJudgeStep, AddColumn

In [6]:
def create_task_generation_prompt(row: Dict) -> str:
    """
    Crea el prompt para generar tareas a partir de una historia de usuario del dataset Salony.
    
    Args:
        row: Fila del DataFrame con la columna 'input' que contiene la historia
    
    Returns:
        Prompt formateado
    """
    user_story = row['input'].strip()
    
    prompt = f"""Below is an instruction that describes a task, paired with an input that provides a user story.

Write a response that appropriately completes the request.


Instruction:

Break this user story into smaller development tasks to help the developers implement it efficiently. You can divide this user story into as many tasks as needed, depending on its complexity. Each task must be unique, actionable, and non-overlapping.

Use the following format for the response:

1. summary: ‚Äπtask summary 1‚Ä∫
description: ‚Äπtask description 1‚Ä∫
2. summary: ‚Äπtask summary 2‚Ä∫
description: ‚Äπtask description 2‚Ä∫

N. summary: ‚Äπtask summary N‚Ä∫
description: ‚Äπtask description N‚Ä∫


Input:

{user_story}


Response:"""
    
    return prompt

In [7]:
def run_salony_pipeline(
    output_csv: str,
    model_name: str = "llama3.1:8b",
    judge_model_name: str = "llama3.1:8b", 
    batch_size: int = 2,
    temperature: float = 0.3,
    num_predict: int = 1000,
    sample_size: int = None,
    use_judge: bool = True,
    judge_threshold: float = 35.0
):
    """
    Ejecuta el pipeline de generaci√≥n de tareas para historias de usuario Salony.
    
    Args:
        output_csv: Ruta donde guardar el resultado
        model_name: Modelo de Ollama a usar para generaci√≥n de tareas
        judge_model_name: Modelo de Ollama a usar para validaci√≥n (juez)
        batch_size: N√∫mero de historias a procesar simult√°neamente
        temperature: Temperatura para generaci√≥n
        num_predict: Tokens m√°ximos a generar
        sample_size: Si se especifica, procesa solo N historias (para pruebas)
        use_judge: Si activar validaci√≥n con LLM juez
        judge_threshold: Umbral de aprobaci√≥n del juez (0-50)
    """
    
    print(f"\n{'='*80}")
    print("üöÄ SALONY USER STORIES TO TASKS PIPELINE")
    if use_judge:
        print("üîç CON VALIDACI√ìN LLM JUEZ ACTIVADA")
    print(f"{'='*80}\n")
    
    # Cargar datos - Usar ruta relativa desde el notebook
    input_csv = Path("../data/salony_train.csv")
    print(f"üì• Cargando datos desde: {input_csv}")
    
    if not input_csv.exists():
        raise FileNotFoundError(f"No se encontr√≥ el archivo: {input_csv}")
    
    df = pd.read_csv(input_csv)
    
    # Eliminar la primera columna si es un √≠ndice
    if df.columns[0] == 'Unnamed: 0' or df.columns[0] == '':
        df = df.iloc[:, 1:]
    
    print(f"   ‚úì {len(df)} historias cargadas")
    
    # Verificar columna 'input'
    if 'input' not in df.columns:
        raise ValueError("El CSV debe tener una columna 'input' con las historias de usuario")
    
    # Aplicar sampling si se solicita
    if sample_size:
        df = df.head(sample_size)
        print(f"   ‚ÑπÔ∏è  Procesando solo {sample_size} historias (modo muestra)")
    
    # Limpiar datos
    df = df.dropna(subset=['input'])
    df['input'] = df['input'].str.strip()
    
    # Crear pipeline
    print(f"\n‚öôÔ∏è Configurando pipeline:")
    print(f"   Modelo generador: {model_name}")
    if use_judge:
        print(f"   Modelo juez: {judge_model_name}")
        print(f"   Umbral de aprobaci√≥n: {judge_threshold}/50")
    print(f"   Batch size: {batch_size}")
    print(f"   Temperature: {temperature}")
    print(f"   Historias a procesar: {len(df)}")
    
    pipeline = SimplePipeline(
        name="salony-tasks-pipeline-with-judge",
        description="Pipeline para generar y validar tareas de desarrollo del dataset Salony"
    )
    
    # Paso 1: Cargar datos
    pipeline.add_step(
        LoadDataFrame(name="load", df=df)
    )
    
    # Paso 2: Agregar columna con nombre del modelo generador
    pipeline.add_step(
        AddColumn(
            name="add_generator_model",
            input_columns=[],  # No necesita columnas de entrada
            output_column="generator_model_name",
            func=lambda: model_name
        )
    )
    
    # Paso 3: Generar tareas
    pipeline.add_step(
        OllamaLLMStep(
            name="generate_tasks",
            model_name=model_name,
            prompt_column="input",
            output_column="tasks",
            prompt_template=create_task_generation_prompt,
            system_prompt="You are an expert software development lead who excels at breaking down user stories into clear, actionable development tasks.",
            batch_size=batch_size,
            generation_kwargs={
                "temperature": temperature,
                "num_predict": num_predict
            },
        )
    )
    
    # Paso 4: Validar tareas con LLM juez (opcional)
    if use_judge:
        # Agregar columna con nombre del modelo juez
        pipeline.add_step(
            AddColumn(
                name="add_judge_model",
                input_columns=[],  # No necesita columnas de entrada
                output_column="judge_model_name",
                func=lambda: judge_model_name
            )
        )
        
        pipeline.add_step(
            OllamaJudgeStep(
                name="validate_tasks",
                model_name=judge_model_name,
                historia_usuario_column="input",
                tareas_generadas_column="tasks",
                approval_threshold=judge_threshold,
                batch_size=max(1, batch_size // 2),  # Batch m√°s peque√±o para juez
                generation_kwargs={
                    "temperature": 0.2,  # Temperatura baja para juez m√°s consistente
                    "num_predict": 800
                }
            )
        )
    
    # Ejecutar
    print(f"\nüîÑ Procesando historias...\n")
    result_df = pipeline.run(use_cache=False)
    
    # Guardar
    print(f"\nüíæ Guardando resultados...")
    result_df.to_csv(output_csv, index=False)
    print(f"   ‚úì CSV guardado: {output_csv}")
    print(f"   ‚úì {len(result_df)} historias procesadas")
    
    
    return result_df

In [8]:
# Ejemplo 1: Pipeline completo con validaci√≥n LLM juez
result_df = run_salony_pipeline(
    output_csv="salony_tasks_with_validation.csv",
    model_name="llama3.1:8b",
    judge_model_name="llama3.1:8b", 
    batch_size=2,
    temperature=0.3,
    num_predict=1000,
    sample_size=3,
    use_judge=True,
    judge_threshold=35.0
)

2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Added step: load
2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Added step: add_generator_model
2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Added step: generate_tasks
2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Added step: add_judge_model
2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Added step: validate_tasks
2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Starting pipeline: salony-tasks-pipeline-with-judge
2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Number of steps: 5
2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Executing generator step: load
2025-11-11 12:55:34 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Executing step: add_generator_model
2025-11-11 12:55:34 


üöÄ SALONY USER STORIES TO TASKS PIPELINE
üîç CON VALIDACI√ìN LLM JUEZ ACTIVADA

üì• Cargando datos desde: ../data/salony_train.csv
   ‚úì 1999 historias cargadas
   ‚ÑπÔ∏è  Procesando solo 3 historias (modo muestra)

‚öôÔ∏è Configurando pipeline:
   Modelo generador: llama3.1:8b
   Modelo juez: llama3.1:8b
   Umbral de aprobaci√≥n: 35.0/50
   Batch size: 2
   Temperature: 0.3
   Historias a procesar: 3

üîÑ Procesando historias...



Processing generate_tasks: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:37<00:00, 12.53s/it]
2025-11-11 12:56:11 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO -   ‚úì Complete (3 rows, 3 columns)
2025-11-11 12:56:11 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Executing step: add_judge_model
2025-11-11 12:56:11 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO -   ‚úì Complete (3 rows, 4 columns)
2025-11-11 12:56:11 - SimplePipeline.salony-tasks-pipeline-with-judge - INFO - Executing step: validate_tasks
Validating validate_tasks:   0%|          | 0/3 [00:00<?, ?it/s]2025-11-11 12:56:32 - OllamaJudgeStep.validate_tasks - INFO - Validaci√≥n para fila 0: aprobado=True, total=44
Validating validate_tasks:  33%|‚ñà‚ñà‚ñà‚ñé      | 1/3 [00:20<00:41, 20.59s/it]2025-11-11 12:56:53 - OllamaJudgeStep.validate_tasks - INFO - Validaci√≥n para fila 1: aprobado=True, total=44
Validating validate_tasks:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 2/3 [00:42<00:21, 21.08s/it]2025-1


üíæ Guardando resultados...
   ‚úì CSV guardado: salony_tasks_with_validation.csv
   ‚úì 3 historias procesadas


In [11]:
# Mostrar estad√≠sticas de validaci√≥n si se us√≥ juez
if 'validacion_aprobado' in result_df.columns:
    aprobadas = result_df['validacion_aprobado'].sum()
    total = len(result_df)
    print(f"\nüìä Estad√≠sticas de validaci√≥n:")
    print(f"   ‚úÖ Aprobadas: {aprobadas}/{total} ({aprobadas/total*100:.1f}%)")
    print(f"   ‚ùå Rechazadas: {total-aprobadas}/{total} ({(total-aprobadas)/total*100:.1f}%)")
    
    if 'validacion_total' in result_df.columns:
        avg_score = result_df['validacion_total'].mean()
        print(f"   üìà Puntuaci√≥n promedio: {avg_score:.1f}/50")


üìä Estad√≠sticas de validaci√≥n:
   ‚úÖ Aprobadas: 2/3 (66.7%)
   ‚ùå Rechazadas: 1/3 (33.3%)
   üìà Puntuaci√≥n promedio: 44.0/50


In [13]:
result_df

Unnamed: 0,input,generator_model_name,tasks,judge_model_name,validacion_coherencia,validacion_completitud,validacion_viabilidad,validacion_formato,validacion_granularidad,validacion_total,validacion_aprobado,validacion_problemas,validacion_recomendaciones
0,"As a user, I want to be able to check transact...",llama3.1:8b,1. summary: Retrieve Transaction History Data\...,llama3.1:8b,9,8,9,10,8,44,True,['Tarea 4: Enhance User Interface for Better N...,['Revisar la tarea 4 para asegurarse de que no...
1,"As a researcher, I want to have the ability to...",llama3.1:8b,1. summary: Research and Document Required Gre...,llama3.1:8b,9,8,9,10,8,44,True,['Posible falta de documentaci√≥n sobre los s√≠m...,['Incluir un paso para probar y validar la com...
2,"As a DigitalRecords Archivist, I want to have ...",llama3.1:8b,1. summary: Extract Embargo Release Date from ...,llama3.1:8b,9,8,9,10,8,44,False,['Falta un paso cr√≠tico de validaci√≥n y pruebas'],['A√±adir un paso para la validaci√≥n y las prue...
