# AI Vocabulary Components Generator

This notebook:
1. Reads .md files from the vocab content directory
2. Extracts title and summary from each file
3. Uses Anthropic's API to 2-shot generate React components
4. Saves the components as .tsx files

For testing purposes, it processes only 3 random files.

In [None]:
import os
import yaml
import anthropic
import logging
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Optional
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

@dataclass
class AccessibilityRequirements:
    """Simplified accessibility requirements tracking"""
    screen_reader_content: Dict[str, str]
    aria_labels: Dict[str, str]
    color_contrast_pairs: List[tuple[str, str]]

class Config:
    # API Configuration
    ANTHROPIC_API_KEY = "your-api-here"  # Replace with your actual API key
    MODEL = "claude-3-5-sonnet-20241022"
    MAX_TOKENS = 6000
    TEMPERATURE = 0.7
    
    # Base project directory
    BASE_DIR = Path("/Users/kemi/Documents/GitHub/vocab")
    
    # File Paths - Using absolute paths
    INPUT_DIR = BASE_DIR / "src/content"
    OUTPUT_DIR = BASE_DIR / "src/components/articles"
    METADATA_DIR = BASE_DIR / "src/scripts/components-generator-metadata"
    
    # Component Requirements
    MAX_LINES = 200
    REQUIRED_HOOKS = ["useState", "useEffect"]
    
    # Accessibility Requirements
    MIN_CONTRAST_RATIO = 4.5
    
    @classmethod
    def initialize(cls):
        """Initialize configuration and create necessary directories"""
        # Create directories if they don't exist
        cls.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        cls.METADATA_DIR.mkdir(parents=True, exist_ok=True)
        
        # Verify input directory exists
        if not cls.INPUT_DIR.exists():
            raise FileNotFoundError(f"Input directory not found: {cls.INPUT_DIR}")
            
        # Set up API key
        if not os.getenv('ANTHROPIC_API_KEY'):
            os.environ['ANTHROPIC_API_KEY'] = cls.ANTHROPIC_API_KEY
        
        return cls

# Initialize configuration
config = Config.initialize()

# Initialize Anthropic client
client = anthropic.Client(api_key=os.getenv('ANTHROPIC_API_KEY'))

logger.info("Configuration and imports initialized successfully")

: 

In [None]:
class FirstShotHandler:
    def __init__(self, client: anthropic.Client):
        self.client = client
        
    def create_system_prompt(self) -> str:
        """Generate the system prompt for the first shot - focusing on educational design"""
        return """You are an expert in creating educational React components for teaching AI concepts to 15-18 year olds.
Your task is to plan the educational approach for explaining an AI concept through metaphors and examples.

Return your response as a JSON object with this exact structure:
{
    "concept_title": "string",
    "concept_summary": "string",
    "learning_objectives": {
        "primary": "string",
        "secondary": ["string"]
    },
    "metaphors": [{
        "title": "string",
        "description": "string",
        "visual_elements": ["string"]
    }],
    "accessibility": {
        "screen_reader_text": {"element": "description"},
        "aria_labels": {"element": "label"},
        "color_pairs": [["color1", "color2"]]
    }
}"""

    def create_user_prompt(self, title: str, summary: str) -> str:
        """Generate the user prompt for the first shot"""
        return f"""Create an educational plan for teaching the concept of {title}.

CONCEPT:
Title: {title}
Summary: {summary}

REQUIREMENTS:
1. Break down the concept into clear learning objectives
2. Create 2-3 clear metaphors that map to the concept
3. Plan visual elements and interactions
4. Consider accessibility needs
5. Keep everything age-appropriate for 15-18 year olds

Format your response according to the JSON structure specified in the system prompt."""

    async def generate_educational_plan(self, title: str, summary: str) -> dict:
        """Generate the educational plan using Claude"""
        try:
            response = self.client.messages.create(
                model=Config.MODEL,
                max_tokens=Config.MAX_TOKENS,
                temperature=Config.TEMPERATURE,
                system=self.create_system_prompt(),
                messages=[{
                    "role": "user", 
                    "content": self.create_user_prompt(title, summary)
                }]
            )
            
            # Parse and validate response
            content = response.content[0].text
            plan = json.loads(content)
            
            # Basic validation
            required_fields = ["concept_title", "concept_summary", "learning_objectives", "metaphors", "accessibility"]
            if not all(field in plan for field in required_fields):
                raise ValueError("Missing required fields in educational plan")
                
            return plan
            
        except Exception as e:
            logger.error(f"Error generating educational plan: {str(e)}")
            raise

# Initialize handler
first_shot_handler = FirstShotHandler(client)
logger.info("First shot handler initialized successfully")

In [None]:
class BridgeHandler:
    """Simplified bridge between educational plan and technical implementation"""
    
    def create_bridge(self, educational_plan: dict) -> dict:
        """Create a bridge between first and second shots"""
        try:
            # Extract key information from educational plan
            technical_requirements = {
                "component_type": self._determine_component_type(educational_plan),
                "visual_elements": self._process_visual_elements(educational_plan["metaphors"]),
                "accessibility": educational_plan["accessibility"]
            }
            
            # Create bridge object
            bridge = {
                "educational_plan": educational_plan,
                "technical_requirements": technical_requirements
            }
            
            return bridge
            
        except Exception as e:
            logger.error(f"Error creating bridge: {str(e)}")
            raise
            
    def _determine_component_type(self, plan: dict) -> str:
        """Determine if component should be interactive, animated, or static"""
        total_elements = sum(len(m["visual_elements"]) for m in plan["metaphors"])
        
        if total_elements > 5:
            return "interactive"
        elif total_elements > 2:
            return "animated"
        else:
            return "static"
            
    def _process_visual_elements(self, metaphors: List[dict]) -> List[dict]:
        """Process visual elements from metaphors"""
        elements = []
        
        for metaphor in metaphors:
            for element in metaphor["visual_elements"]:
                elements.append({
                    "id": element.lower().replace(" ", "_"),
                    "label": element,
                    "icon": self._map_to_icon(element)
                })
                
        return elements
        
    def _map_to_icon(self, element: str) -> str:
        """Map element to appropriate Lucide icon name"""
        # Simple mapping of common elements to Lucide icons
        icon_map = {
            "brain": "Brain",
            "network": "Network",
            "data": "Database",
            "model": "Box",
            "input": "ArrowRight",
            "output": "ArrowLeft",
            "process": "Cpu"
        }
        
        # Try to find exact match
        for key in icon_map:
            if key in element.lower():
                return icon_map[key]
        
        # Default icon if no match found
        return "Circle"

# Initialize bridge handler
bridge_handler = BridgeHandler()
logger.info("Bridge handler initialized successfully")

In [None]:
class SecondShotHandler:
    def __init__(self, client: anthropic.Client):
        self.client = client
        
    def create_system_prompt(self) -> str:
        """Generate the system prompt for the second shot - focusing on technical implementation"""
        return """
    You are an expert React developer creating educational components. 
    Follow these technical requirements exactly:

    1. Required Component Structure:
    MUST START WITH:
    "use client";
    
    import { useState, useEffect } from "react";
    import { YourIcon } from "lucide-react";

    interface Props {}
    
    const YourComponent: React.FC<Props> = () => {
        // Component code here
    };
    
    export default YourComponent;

    2. Technical Requirements:
    - Only use useState and useEffect hooks
    - Only use Lucide icons
    - Only use Tailwind CSS for styling
    - Must use TypeScript FC type
    - Must include Props interface even if empty

    3. Styling Requirements:
    - Only use core Tailwind classes
    - No arbitrary values (e.g., h-[500px])
    - Use consistent color scheme
    - Proper contrast ratios for accessibility

    4. Accessibility:
    - Proper ARIA labels on all interactive elements
    - Keyboard navigation with onKeyDown handlers
    - Screen reader friendly content structure
    - Minimum contrast ratio 4.5:1
    - All images must have alt text

    Return only the complete TSX component code."""

    def create_user_prompt(self, bridge: dict) -> str:
        """Generate the user prompt for the second shot"""
        plan = bridge["educational_plan"]
        tech = bridge["technical_requirements"]
        
        return f"""Create a React component that teaches {plan['concept_title']}.

CONCEPT:
{plan['concept_summary']}

LEARNING OBJECTIVES:
Primary: {plan['learning_objectives']['primary']}
Secondary:
{self._format_list(plan['learning_objectives']['secondary'])}

IMPLEMENTATION:
Component Type: {tech['component_type']}

Visual Elements:
{self._format_visual_elements(tech['visual_elements'])}

Accessibility Requirements:
{self._format_accessibility(tech['accessibility'])}

Create a complete React component that implements these requirements."""

    def _format_list(self, items: List[str]) -> str:
        return "\n".join(f"- {item}" for item in items)

    def _format_visual_elements(self, elements: List[dict]) -> str:
        return "\n".join(f"- {elem['label']}: {elem['icon']}" for elem in elements)

    def _format_accessibility(self, accessibility: dict) -> str:
        sections = []
        
        if accessibility.get("screen_reader_text"):
            sections.append("Screen Reader Content:")
            sections.extend(f"- {k}: {v}" for k, v in accessibility["screen_reader_text"].items())
            
        if accessibility.get("aria_labels"):
            sections.append("\nARIA Labels:")
            sections.extend(f"- {k}: {v}" for k, v in accessibility["aria_labels"].items())
            
        if accessibility.get("color_pairs"):
            sections.append("\nColor Contrast Pairs:")
            sections.extend(f"- {a} → {b}" for a, b in accessibility["color_pairs"])
            
        return "\n".join(sections)

    async def generate_component(self, bridge: dict) -> str:
        """Generate the React component using Claude"""
        try:
            response = self.client.messages.create(
                model=Config.MODEL,
                max_tokens=Config.MAX_TOKENS,
                temperature=Config.TEMPERATURE,
                system=self.create_system_prompt(),
                messages=[{
                    "role": "user", 
                    "content": self.create_user_prompt(bridge)
                }]
            )
            
            return response.content[0].text
            
        except Exception as e:
            logger.error(f"Error generating component: {str(e)}")
            raise

# Initialize handler
second_shot_handler = SecondShotHandler(client)
logger.info("Second shot handler initialized successfully")

In [None]:
import re
from dataclasses import dataclass
from typing import List, Dict

@dataclass
class ValidationMessage:
    level: str  # 'error' or 'warning'
    message: str
    suggestion: str

class ComponentValidator:
    def __init__(self):
        # Required patterns that should be present
        self.required_patterns = {
            'use_client': (r'\"use client\";', 'Missing "use client" directive'),
            'react_imports': (r'import.*useState.*useEffect.*from.*react', 'Missing React hooks imports'),
            'typescript': (r'React\.FC<.*>', 'Missing TypeScript FC type'),
            'export': (r'export default', 'Missing default export'),
            'accessibility': (r'aria-label|role=', 'Missing accessibility attributes')
        }
        
        # Patterns that shouldn't be present
        self.forbidden_patterns = {
            'arbitrary_tailwind': (r'className="[^"]*\[[^\]]*\]"', 'Using arbitrary Tailwind values'),
            'inline_styles': (r'style={{', 'Using inline styles'),
            'settimeout': (r'setTimeout', 'Using setTimeout directly'),
            'any_type': (r':\s*any[\s,}]', 'Using any type')
        }

    def validate_component(self, code: str) -> List[ValidationMessage]:
        """Validate generated component code"""
        messages = []
        
        # Check required patterns
        for name, (pattern, message) in self.required_patterns.items():
            if not re.search(pattern, code, re.MULTILINE):
                messages.append(ValidationMessage(
                    level='error',
                    message=message,
                    suggestion=self._get_suggestion(name)
                ))

        # Check forbidden patterns
        for name, (pattern, message) in self.forbidden_patterns.items():
            if re.search(pattern, code, re.MULTILINE):
                messages.append(ValidationMessage(
                    level='error',
                    message=message,
                    suggestion=self._get_suggestion(name)
                ))

        # Validate accessibility
        messages.extend(self._validate_accessibility(code))
        
        # Validate code length
        if len(code.splitlines()) > Config.MAX_LINES:
            messages.append(ValidationMessage(
                level='warning',
                message=f'Component exceeds {Config.MAX_LINES} lines',
                suggestion='Consider breaking into smaller components'
            ))

        return messages

    def _validate_accessibility(self, code: str) -> List[ValidationMessage]:
        """Validate accessibility requirements"""
        messages = []
        
        # Check for ARIA labels on interactive elements
        buttons = re.finditer(r'<button[^>]*>', code)
        for button in buttons:
            if 'aria-label' not in button.group():
                messages.append(ValidationMessage(
                    level='warning',
                    message='Button missing aria-label',
                    suggestion='Add aria-label to button element'
                ))

        # Check for role attributes
        divs_with_click = re.finditer(r'<div[^>]*onClick[^>]*>', code)
        for div in divs_with_click:
            if 'role=' not in div.group():
                messages.append(ValidationMessage(
                    level='warning',
                    message='Clickable div missing role attribute',
                    suggestion='Add appropriate role attribute to clickable div'
                ))

        # Check for keyboard event handlers
        if 'onKeyDown' not in code and 'onKeyPress' not in code:
            messages.append(ValidationMessage(
                level='warning',
                message='No keyboard event handlers found',
                suggestion='Add keyboard event handlers for accessibility'
            ))

        return messages

    def _get_suggestion(self, pattern_name: str) -> str:
        """Get suggestion for fixing validation issue"""
        suggestions = {
            'use_client': 'Add "use client"; as the first line',
            'react_imports': 'Add: import { useState, useEffect } from "react";',
            'typescript': 'Add: const Component: React.FC<Props> = () => {...}',
            'arbitrary_tailwind': 'Use standard Tailwind classes instead of arbitrary values',
            'inline_styles': 'Use Tailwind classes instead of inline styles',
            'settimeout': 'Use useEffect for timing-related operations',
            'any_type': 'Define proper TypeScript types instead of using any'
        }
        return suggestions.get(pattern_name, 'Review documentation for proper implementation')

# Initialize validator
component_validator = ComponentValidator()
logger.info("Component validator initialized successfully")

In [None]:
import asyncio
import yaml
from pathlib import Path
from datetime import datetime
import time
import json
import random

class ComponentGenerator:
    def __init__(
        self,
        first_shot_handler: FirstShotHandler,
        bridge_handler: BridgeHandler,
        second_shot_handler: SecondShotHandler,
        validator: ComponentValidator
    ):
        self.first_shot = first_shot_handler
        self.bridge = bridge_handler
        self.second_shot = second_shot_handler
        self.validator = validator

    async def generate_component(self, md_file: Path) -> tuple[bool, str, List[ValidationMessage]]:
        """Generate a complete educational React component"""
        try:
            # Extract metadata from markdown
            metadata = self._extract_frontmatter(md_file)
            if not metadata:
                raise ValueError(f"Could not extract metadata from {md_file}")

            # First shot: Generate educational plan
            logger.info(f"Generating educational plan for: {metadata['title']}")
            educational_plan = await self.first_shot.generate_educational_plan(
                metadata['title'],
                metadata['summary']
            )

            # Create bridge
            logger.info("Creating context bridge")
            bridge = self.bridge.create_bridge(educational_plan)

            # Second shot: Generate component
            logger.info("Generating component implementation")
            component_code = await self.second_shot.generate_component(bridge)

            # Add "use client" directive if missing
            if '"use client";' not in component_code:
                component_code = '"use client";\n\n' + component_code

            # Validate component
            logger.info("Validating component")
            validation_messages = self.validator.validate_component(component_code)

            # Always consider it a success (we'll save with warnings)
            success = True

            return success, component_code, validation_messages

        except Exception as e:
            logger.error(f"Error in component generation: {str(e)}")
            raise

    def _extract_frontmatter(self, md_file: Path) -> Optional[Dict[str, str]]:
        """Extract title and summary from markdown frontmatter"""
        try:
            with open(md_file, 'r', encoding='utf-8') as f:
                content = f.read()

            # Split on first and second '---'
            parts = content.split('---', 2)
            if len(parts) < 3:
                return None

            # Parse YAML frontmatter
            frontmatter = yaml.safe_load(parts[1])
            
            # Ensure required fields exist
            if 'title' not in frontmatter or 'summary' not in frontmatter:
                return None

            return frontmatter

        except Exception as e:
            logger.error(f"Error extracting frontmatter: {str(e)}")
            return None

class MainExecutor:
    def __init__(self, component_generator: ComponentGenerator):
        self.generator = component_generator
        self.results = []

    async def process_file(self, md_file: Path) -> None:
        """Process a single markdown file"""
        start_time = time.time()
        result = {
            "file": md_file.name,
            "timestamp": datetime.now().isoformat(),
            "success": False,
            "validation_messages": [],
            "processing_time": 0
        }
        
        try:
            # Generate component
            success, component_code, validation_messages = await self.generator.generate_component(md_file)

            # Always save the component regardless of validation
            output_filename = md_file.stem + '.tsx'
            output_path = Config.OUTPUT_DIR / output_filename

            # Save the component
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(component_code)

            # Update result tracking
            result["success"] = True
            result["validation_messages"] = [
                {"level": msg.level, "message": msg.message, "suggestion": msg.suggestion}
                for msg in validation_messages
            ]

            # Log status and messages
            if validation_messages:
                logger.warning(f"⚠️ Component saved with validation notes: {output_filename}")
                for msg in validation_messages:
                    log_func = logger.warning if msg.level == 'warning' else logger.error
                    log_func(f"{msg.level.upper()}: {msg.message}")
                    log_func(f"Suggestion: {msg.suggestion}")
            else:
                logger.info(f"✅ Successfully generated: {output_filename}")

        except Exception as e:
            error_msg = str(e)
            logger.error(f"❌ Error processing {md_file.name}: {error_msg}")
            result["error"] = error_msg

        finally:
            elapsed = time.time() - start_time
            result["processing_time"] = elapsed
            logger.info(f"⌛ Processing time: {elapsed:.2f} seconds")
            self.results.append(result)

    async def run(self, max_files: int = 3) -> None:
        """Main execution function"""
        try:
            # Get all markdown files
            md_files = list(Config.INPUT_DIR.glob('*.md'))
            total_files = len(md_files)

            if total_files == 0:
                logger.info("No markdown files found to process")
                return

            # Randomly select files to process
            files_to_process = random.sample(md_files, min(max_files, total_files))
            logger.info(f"Processing {len(files_to_process)} of {total_files} files")

            for md_file in files_to_process:
                logger.info(f"\nProcessing: {md_file.name}")
                await self.process_file(md_file)

            # Save execution report
            self._save_report()

        except Exception as e:
            logger.error(f"Error in main execution: {str(e)}")
            raise

    def _save_report(self):
        """Save execution report to metadata directory"""
        report = {
            "timestamp": datetime.now().isoformat(),
            "results": self.results,
            "summary": {
                "total_processed": len(self.results),
                "successful": sum(1 for r in self.results if r["success"]),
                "failed": sum(1 for r in self.results if not r["success"]),
                "total_time": sum(r["processing_time"] for r in self.results)
            }
        }
        
        report_path = Config.METADATA_DIR / f"execution_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        with open(report_path, 'w', encoding='utf-8') as f:
            json.dump(report, f, indent=2)
            
        logger.info(f"📊 Execution report saved to {report_path}")

# Initialize components
component_generator = ComponentGenerator(
    first_shot_handler,
    bridge_handler,
    second_shot_handler,
    component_validator
)

# Create executor
executor = MainExecutor(component_generator)

# Run the process
await executor.run(max_files=3)