# Step Tracker Helper
Functions for tracking step execution.
Run this with: `%run /home/jovyan/work/system/helpers/03_StepTracker.ipynb`

In [None]:
# Load dependencies
if 'execute_query' not in globals():
    %run /home/jovyan/work/system/helpers/01_Database.ipynb

In [None]:
class StepTracker:
    """Track step execution and manage step runs."""
    
    def __init__(self, cycle_name: str, stage_num: int, step_num: int, 
                 idempotent: bool = False, auto_start: bool = True):
        self.cycle_name = cycle_name
        self.stage_num = stage_num
        self.step_num = step_num
        self.idempotent = idempotent
        self.step_id = None
        self.run_id = None
        self.can_execute = False
        
        # Initialize step
        self._initialize()
        
        # Auto-start if requested and allowed
        if auto_start and self.can_execute:
            self.start_run()
    
    def _initialize(self):
        """Initialize step tracking."""
        # Get cycle ID
        cycle_id = execute_scalar(
            "SELECT id FROM irp_cycle WHERE cycle_name = ? AND status = 'active'",
            (self.cycle_name,)
        )
        
        if not cycle_id:
            print(f"❌ Active cycle '{self.cycle_name}' not found")
            return
        
        # Get stage ID
        stage_id = execute_scalar(
            "SELECT id FROM irp_stage WHERE cycle_id = ? AND stage_num = ?",
            (cycle_id, self.stage_num)
        )
        
        if not stage_id:
            print(f"❌ Stage {self.stage_num} not found in cycle '{self.cycle_name}'")
            return
        
        # Get or create step
        self.step_id = execute_scalar(
            "SELECT id FROM irp_step WHERE stage_id = ? AND step_num = ?",
            (stage_id, self.step_num)
        )
        
        if not self.step_id:
            print(f"❌ Step {self.step_num} not found in stage {self.stage_num}")
            return
        
        # Update idempotent flag if different
        execute_command(
            "UPDATE irp_step SET is_idempotent = ? WHERE id = ?",
            (1 if self.idempotent else 0, self.step_id)
        )
        
        # Check if step can be executed
        self._check_can_execute()
    
    def _check_can_execute(self):
        """Check if the step can be executed."""
        # Get last run
        last_run = execute_query(
            """SELECT TOP 1 * FROM irp_step_run 
               WHERE step_id = ? 
               ORDER BY started_ts DESC""",
            (self.step_id,)
        )
        
        if last_run.empty:
            self.can_execute = True
            return
        
        last_status = last_run.iloc[0]['status']
        
        # Check based on status and idempotency
        if last_status == 'running':
            print("⚠️  Step is currently running")
            self.can_execute = False
        elif last_status == 'completed' and not self.idempotent:
            print("ℹ️  Step already completed (non-idempotent)")
            print(f"    Last run: {last_run.iloc[0]['completed_ts']}")
            self.can_execute = False
        else:
            self.can_execute = True
    
    def start_run(self) -> bool:
        """Start a new run of this step."""
        if not self.can_execute:
            print("❌ Cannot execute step")
            return False
        
        # Get run number
        run_number = execute_scalar(
            "SELECT ISNULL(MAX(run_number), 0) + 1 FROM irp_step_run WHERE step_id = ?",
            (self.step_id,)
        )
        
        # Insert new run
        query = """INSERT INTO irp_step_run 
                  (step_id, run_number, status, started_by) 
                  VALUES (?, ?, 'running', ?)"""
        execute_command(query, (self.step_id, run_number, SYSTEM_USER))
        
        # Get run ID
        self.run_id = execute_scalar(
            "SELECT id FROM irp_step_run WHERE step_id = ? AND run_number = ?",
            (self.step_id, run_number)
        )
        
        print(f"▶️  Started Step {self.stage_num}.{self.step_num} - Run #{run_number}")
        return True
    
    def complete_run(self, output_data: Dict = None):
        """Mark the current run as completed."""
        if not self.run_id:
            print("❌ No active run to complete")
            return
        
        output_json = json.dumps(output_data) if output_data else None
        
        query = """UPDATE irp_step_run 
                  SET status = 'completed', 
                      completed_ts = GETUTCDATE(),
                      output_data = ?
                  WHERE id = ?"""
        execute_command(query, (output_json, self.run_id))
        
        print(f"✅ Step {self.stage_num}.{self.step_num} completed")
    
    def fail_run(self, error_message: str):
        """Mark the current run as failed."""
        if not self.run_id:
            print("❌ No active run to fail")
            return
        
        query = """UPDATE irp_step_run 
                  SET status = 'failed', 
                      completed_ts = GETUTCDATE(),
                      error_message = ?
                  WHERE id = ?"""
        execute_command(query, (error_message, self.run_id))
        
        print(f"❌ Step {self.stage_num}.{self.step_num} failed: {error_message}")
    
    def skip_run(self, reason: str = None):
        """Mark the step as skipped."""
        # Get run number
        run_number = execute_scalar(
            "SELECT ISNULL(MAX(run_number), 0) + 1 FROM irp_step_run WHERE step_id = ?",
            (self.step_id,)
        )
        
        query = """INSERT INTO irp_step_run 
                  (step_id, run_number, status, started_by, completed_ts, output_data) 
                  VALUES (?, ?, 'skipped', ?, GETUTCDATE(), ?)"""
        
        output_data = json.dumps({'reason': reason}) if reason else None
        execute_command(query, (self.step_id, run_number, SYSTEM_USER, output_data))
        
        print(f"⏭️  Step {self.stage_num}.{self.step_num} skipped")
        if reason:
            print(f"    Reason: {reason}")

def get_step_history(cycle_name: str, stage_num: int = None, step_num: int = None) -> pd.DataFrame:
    """Get step execution history."""
    query = """
    SELECT 
        sg.stage_num,
        sg.stage_name,
        st.step_num,
        st.step_name,
        sr.run_number,
        sr.status,
        sr.started_ts,
        sr.completed_ts,
        sr.started_by,
        CASE 
            WHEN sr.completed_ts IS NOT NULL 
            THEN DATEDIFF(second, sr.started_ts, sr.completed_ts)
            ELSE NULL
        END as duration_seconds
    FROM irp_step_run sr
    INNER JOIN irp_step st ON sr.step_id = st.id
    INNER JOIN irp_stage sg ON st.stage_id = sg.id
    INNER JOIN irp_cycle c ON sg.cycle_id = c.id
    WHERE c.cycle_name = ?
    """
    
    params = [cycle_name]
    
    if stage_num is not None:
        query += " AND sg.stage_num = ?"
        params.append(stage_num)
    
    if step_num is not None:
        query += " AND st.step_num = ?"
        params.append(step_num)
    
    query += " ORDER BY sr.started_ts DESC"
    
    return execute_query(query, tuple(params))

def get_current_progress(cycle_name: str) -> pd.DataFrame:
    """Get current progress of all steps in a cycle."""
    query = """
    WITH LastRun AS (
        SELECT 
            step_id,
            MAX(started_ts) as last_run_ts
        FROM irp_step_run
        GROUP BY step_id
    )
    SELECT 
        sg.stage_num,
        sg.stage_name,
        st.step_num,
        st.step_name,
        st.is_idempotent,
        sr.status as last_status,
        sr.started_ts as last_run,
        sr.completed_ts as completed_at
    FROM irp_step st
    INNER JOIN irp_stage sg ON st.stage_id = sg.id
    INNER JOIN irp_cycle c ON sg.cycle_id = c.id
    LEFT JOIN LastRun lr ON st.id = lr.step_id
    LEFT JOIN irp_step_run sr ON st.id = sr.step_id AND sr.started_ts = lr.last_run_ts
    WHERE c.cycle_name = ?
    ORDER BY sg.stage_num, st.step_num
    """
    
    return execute_query(query, (cycle_name,))

print("✅ Step Tracker loaded")