# RubriCheck Complete Pipeline
============================
Integrates all three modules:
1. Essay Preprocessor (essay_preprocessor.ipynb)
2. Rubric Parser (rubric_parser_prompt.ipynb) 
3. Grading Engine (grading_engine.ipynb)

## Usage:
- **Command Line**: `python rubriCheck_pipeline.py --rubric path/to/rubric.pdf --essay path/to/essay.txt`
- **Programmatic**: Use the `RubriCheckPipeline` class in your code


In [17]:
import os
import sys
import json
import argparse
from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import asdict


# Add current directory to path for imports (Jupyter notebook compatible)
import os
current_dir = os.getcwd()
if current_dir not in sys.path:
    sys.path.append(current_dir)


In [18]:
# Import all three modules
try:
    # Import from essay_preprocessor.py
    from essay_preprocessor import EssayPreprocessor, PreprocessOptions, ProcessedEssay
    
    # Import from rubric_parser_prompt.py  
    from rubric_parser_prompt import parse_rubric_file, demo_parse_rubric
    
    # Import from grading_engine.py
    from grading_engine import grade_essay, GradeSummary
    
    print("✅ All modules imported successfully!")
    
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Make sure all three Python modules are in the same directory")
    print("Available files:")
    import os
    for f in os.listdir('.'):
        if f.endswith('.py'):
            print(f"  - {f}")
    sys.exit(1)


✅ All modules imported successfully!


## RubriCheckPipeline Class
The main pipeline class that integrates all three modules.


In [None]:
class RubriCheckPipeline:
    """
    Complete pipeline that integrates all three RubriCheck modules.
    """
    
    def __init__(self, api_key_file: str = r"C:\Users\Leo\AI projects\_api.txt"):
        """Initialize the pipeline with API key configuration."""
        self.api_key_file = api_key_file
        self._setup_api_key()
        
    def _setup_api_key(self):
        """Set up API key from file."""
        try:
            with open(self.api_key_file, 'r', encoding='utf-8') as f:
                for line in f:
                    if line.strip().startswith('rubriCheck:'):
                        api_key = line.strip().split(':', 1)[1].strip()
                        os.environ["OPENAI_API_KEY"] = api_key
                        print("✅ API key loaded successfully")
                        return
            raise ValueError("rubriCheck API key not found in file")
        except FileNotFoundError:
            print(f"❌ API file not found at {self.api_key_file}")
            sys.exit(1)
        except Exception as e:
            print(f"❌ Error loading API key: {e}")
            sys.exit(1)
    
    def process_essay(self, essay_path: str, options: Optional[PreprocessOptions] = None) -> ProcessedEssay:
        """Step 1: Process essay using essay_preprocessor module."""
        print(f"📝 Processing essay: {essay_path}")
        
        if not os.path.exists(essay_path):
            raise FileNotFoundError(f"Essay file not found: {essay_path}")
            
        with open(essay_path, 'r', encoding='utf-8', errors='ignore') as f:
            essay_text = f.read()
        
        preprocessor = EssayPreprocessor()
        options = options or PreprocessOptions()
        processed_essay = preprocessor.run(essay_text, options)
        
        print(f"✅ Essay processed: {len(processed_essay.paragraphs)} paragraphs, {processed_essay.metadata.word_count} words")
        return processed_essay
    
    def parse_rubric(self, rubric_path: str) -> Dict[str, Any]:
        """Step 2: Parse rubric using rubric_parser_prompt module."""
        print(f"📋 Parsing rubric: {rubric_path}")
        
        if not os.path.exists(rubric_path):
            raise FileNotFoundError(f"Rubric file not found: {rubric_path}")
        
        rubric = demo_parse_rubric(rubric_path)
        
        if not rubric:
            raise ValueError("Failed to parse rubric")
            
        print(f"✅ Rubric parsed: {len(rubric.get('criteria', []))} criteria")
        return rubric
    
    def grade_essay(self, rubric: Dict[str, Any], processed_essay: ProcessedEssay, 
                  max_span_chars: int = 240) -> GradeSummary:
        """Step 3: Grade essay using grading_engine module."""
        print("🤖 Grading essay with AI...")
        
        essay_paragraphs = [p.text for p in processed_essay.paragraphs if p.text.strip()]
        converted_rubric = self._convert_rubric_format(rubric)
        summary = grade_essay(converted_rubric, essay_paragraphs, max_span_chars)
        
        print(f"✅ Grading complete: {summary.numeric_score} ({summary.letter})")
        return summary
    
    def _convert_rubric_format(self, rubric: Dict[str, Any]) -> Dict[str, Any]:
        """Convert rubric from parser format to grader format."""
        converted = {
            "criteria": [],
            "grading": {
                "numeric": True,
                "letter_bands": [
                    {"min": 90, "max": 100, "letter": "A+"},
                    {"min": 85, "max": 89.99, "letter": "A"},
                    {"min": 80, "max": 84.99, "letter": "A-"},
                    {"min": 0, "max": 79.99, "letter": "B or below"}
                ],
                "categorical_points_map": {"Excellent": 4, "Good": 3, "Fair": 2, "Poor": 1}
            }
        }
        
        for i, criterion in enumerate(rubric.get('criteria', [])):
            converted_criterion = {
                "id": f"criterion_{i}",
                "name": criterion.get('name', f'Criterion {i+1}'),
                "descriptors": criterion.get('descriptor_by_level', {}),
                "valid_levels": rubric.get('scale', {}).get('levels', ['Excellent', 'Good', 'Fair', 'Poor']),
                "weight": criterion.get('weight', 1.0) / sum(c.get('weight', 1.0) for c in rubric.get('criteria', [])),
                "level_scale_note": " → ".join(rubric.get('scale', {}).get('levels', []))
            }
            converted["criteria"].append(converted_criterion)
        
        return converted
    
    def run_complete_pipeline(self, rubric_path: str, essay_path: str, 
                            output_path: Optional[str] = None,
                            essay_options: Optional[PreprocessOptions] = None) -> Dict[str, Any]:
        """Run the complete pipeline: essay preprocessing → rubric parsing → AI grading."""
        print("🚀 Starting RubriCheck Complete Pipeline")
        print("=" * 50)
        
        try:
            # Step 1: Process essay
            processed_essay = self.process_essay(essay_path, essay_options)
            
            # Step 2: Parse rubric
            rubric = self.parse_rubric(rubric_path)
            
            # Step 3: Grade essay
            grade_summary = self.grade_essay(rubric, processed_essay)
            
            # Compile results
            results = {
                "pipeline_info": {
                    "rubric_file": rubric_path,
                    "essay_file": essay_path,
                    "timestamp": str(Path().cwd()),
                    "version": "1.0"
                },
                "essay_metadata": asdict(processed_essay.metadata),
                "rubric_info": {
                    "title": rubric.get('title'),
                    "scale_type": rubric.get('scale', {}).get('type'),
                    "criteria_count": len(rubric.get('criteria', [])),
                    "source_parse": rubric.get('source_parse', {})
                },
                "grading_results": {
                    "per_criterion": [asdict(r) for r in grade_summary.per_criterion],
                    "numeric_score": grade_summary.numeric_score,
                    "letter_grade": grade_summary.letter,
                    "categorical_points": grade_summary.categorical_points,
                    "reliability_flags": grade_summary.notes
                },
                "warnings": processed_essay.warnings + rubric.get('source_parse', {}).get('warnings', [])
            }
            
            # Save results if output path provided
            if output_path:
                with open(output_path, 'w', encoding='utf-8') as f:
                    json.dump(results, f, indent=2, ensure_ascii=False)
                print(f"💾 Results saved to: {output_path}")
            
            print("\n🎉 Pipeline completed successfully!")
            return results
            
        except Exception as e:
            print(f"❌ Pipeline failed: {e}")
            raise


### Step 1: Essay Processing
Process essays using the essay_preprocessor module.


In [None]:
# This method is now part of the RubriCheckPipeline class above


### Step 2: Rubric Parsing
Parse rubrics using the rubric_parser_prompt module.


In [None]:
# This method is now part of the RubriCheckPipeline class above


### Step 3: AI Grading
Grade essays using the grading_engine module.


In [None]:
# This method is now part of the RubriCheckPipeline class above


### Helper Methods
Utility methods for the pipeline.


In [None]:
# This method is now part of the RubriCheckPipeline class above


### Complete Pipeline
Run the complete pipeline: essay preprocessing → rubric parsing → AI grading.


In [None]:
# This method is now part of the RubriCheckPipeline class above


## Usage Examples

### Example 1: Basic Pipeline Usage


In [33]:
# Example 1: Basic Pipeline Usage
def example_basic_usage():
    """Basic example of using the pipeline."""
    print("🔬 Example: Basic Pipeline Usage")
    print("=" * 40)
    
    # Initialize pipeline
    pipeline = RubriCheckPipeline()
    
    # Example file paths (adjust these to your actual files)
    rubric_path = "test_file/test_rubric.docx"
    essay_path = "test_essay.txt"  # You'll need to create this
    
    # Create a sample essay if it doesn't exist
    if not os.path.exists(essay_path):
        sample_essay = """
        This essay argues that renewable energy is essential to national security by reducing dependence on volatile fuel markets.
        
        Several reports show countries with higher renewable portfolios experience less price shock; however, grid stability challenges remain.
        
        Opponents claim costs are prohibitive; this essay demonstrates recent cost curves and policy mechanisms that offset initial investment.
        """
        with open(essay_path, 'w', encoding='utf-8') as f:
            f.write(sample_essay)
        print(f"📝 Created sample essay: {essay_path}")
    
    try:
        # Run complete pipeline
        results = pipeline.run_complete_pipeline(
            rubric_path=rubric_path,
            essay_path=essay_path,
            output_path="example_results.json"
        )
        
        # Print results
        print("\n📊 Results:")
        grading = results["grading_results"]
        print(f"Score: {grading['numeric_score']} ({grading['letter_grade']})")
        
        return results
        
    except Exception as e:
        print(f"❌ Error: {e}")
        return None

# Run the basic example
example_basic_usage()


🔬 Example: Basic Pipeline Usage
✅ API key loaded successfully
📝 Created sample essay: test_essay.txt
❌ Error: 'RubriCheckPipeline' object has no attribute 'run_complete_pipeline'


### Example 2: Step-by-Step Execution


In [None]:
# Example 2: Step-by-Step Execution
def example_step_by_step():
    """Example showing step-by-step pipeline execution."""
    print("🔍 Example: Step-by-Step Execution")
    print("=" * 35)
    
    pipeline = RubriCheckPipeline()
    
    try:
        # Step 1: Process essay
        print("Step 1: Processing essay...")
        processed_essay = pipeline.process_essay("test_essay.txt")
        print(f"   - {len(processed_essay.paragraphs)} paragraphs")
        print(f"   - {processed_essay.metadata.word_count} words")
        print(f"   - Language: {processed_essay.metadata.language_detected}")
        
        # Step 2: Parse rubric
        print("\nStep 2: Parsing rubric...")
        rubric = pipeline.parse_rubric("test_file/test_rubric.docx")
        print(f"   - {len(rubric.get('criteria', []))} criteria")
        print(f"   - Scale type: {rubric.get('scale', {}).get('type')}")
        
        # Step 3: Grade essay
        print("\nStep 3: Grading essay...")
        grade_summary = pipeline.grade_essay(rubric, processed_essay)
        print(f"   - Score: {grade_summary.numeric_score}")
        print(f"   - Letter: {grade_summary.letter}")
        print(f"   - Criteria evaluated: {len(grade_summary.per_criterion)}")
        
        # Show detailed results
        print("\n📋 Detailed Results:")
        for i, criterion in enumerate(grade_summary.per_criterion, 1):
            print(f"   {i}. {criterion.criterion_id}: {criterion.level}")
            if criterion.justification:
                print(f"      Justification: {criterion.justification[:80]}...")
        
    except Exception as e:
        print(f"❌ Error: {e}")

# Run the step-by-step example
# example_step_by_step()


## Summary

This notebook provides the complete RubriCheck pipeline that integrates all three modules:

1. **Essay Preprocessor** (`essay_preprocessor.ipynb`) - Processes and structures student essays
2. **Rubric Parser** (`rubric_parser_prompt.ipynb`) - Extracts and parses rubric documents  
3. **Grading Engine** (`grading_engine.ipynb`) - Grades essays using AI against rubrics

### Key Features:
- **Complete Integration**: All three modules work together seamlessly
- **Flexible Usage**: Can run complete pipeline or individual steps
- **Multiple Formats**: Supports various file formats (PDF, DOCX, TXT, images)
- **Comprehensive Results**: Detailed grading with justifications and suggestions
- **Error Handling**: Robust error handling and informative messages

### Usage:
- **Programmatic**: Use the `RubriCheckPipeline` class in your code
- **Interactive**: Run individual cells to test specific functionality
- **Examples**: Uncomment the example functions to see the pipeline in action
