# End-to-End Pipeline Test

Tests the complete flow:
1. Generate test transcripts (emulate scraping)
2. Run preprocessing (summarization)
3. Run processing (categorization)
4. Verify results

**Note:** Each run creates NEW unique test files (timestamped).
To clean up test data, run: `cleanup_test_data()`


In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))


## Step 1: Generate Test Transcripts


In [None]:
print("="*60)
print("STEP 1: Generate Test Transcripts")
print("="*60)

from tests.mock_scrape_flow import mock_scrape_flow, cleanup_test_data

# Optional: Clean up old test data
# cleanup_test_data()

# Generate 3 test transcripts (creates new unique files every run)
result = mock_scrape_flow(num_items=3)
print(f"✓ Generated test transcripts")


## Step 2: Check Pipeline State


In [None]:
print("\n" + "="*60)
print("STEP 2: Check Pipeline State")
print("="*60)

from pipeline.pipeline_state import PipelineStateManager

manager = PipelineStateManager()

# Get items ready for summarization
summarize_items = manager.get_next_stage_tasks("summarize")
print(f"Items ready for summarization: {len(summarize_items)}")
for item in summarize_items[:3]:
    print(f"  - {item.id[:8]}... | {item.raw_file_path}")


## Step 3: Run Preprocessing (Summarization)


In [None]:
print("\n" + "="*60)
print("STEP 3: Run Preprocessing (Summarization)")
print("="*60)

from flows.preprocessing_flow import preprocessing_flow

preprocessing_flow()
print(f"✓ Preprocessing complete")


## Step 4: Check Items Ready for Categorization


In [None]:
print("\n" + "="*60)
print("STEP 4: Check Items Ready for Categorization")
print("="*60)

categorize_items = manager.get_next_stage_tasks("categorize")
print(f"Items ready for categorization: {len(categorize_items)}")
for item in categorize_items[:3]:
    print(f"  - {item.id[:8]}... | Stage: {item.latest_completed_stage}")


## Step 5: Run Processing (Categorization)


In [None]:
print("\n" + "="*60)
print("STEP 5: Run Processing (Categorization)")
print("="*60)

from flows.processing_flow import processing_flow

processing_flow()
print(f"✓ Processing complete")


## Step 6: Verify Pipeline Completion


In [None]:
print("\n" + "="*60)
print("STEP 6: Verify Pipeline Completion")
print("="*60)

# Check for completed items
all_states = manager._read_all_states()
completed = [s for s in all_states if s.get('next_stage') is None]
print(f"✅ Completed items: {len(completed)}")

# Show summary
for state in completed[:3]:
    print(f"  - {state['id'][:8]}... | {state['latest_completed_stage']} → DONE")


## Summary


In [None]:
print("\n" + "="*60)
print("END-TO-END PIPELINE TEST COMPLETE")
print("="*60)
print(f"Total items processed: {len(completed)}")
print("Pipeline stages: RAW → SUMMARIZE → CATEGORIZE → COMPLETE")
