In [102]:
# Cell 1: Imports and Setup
# All necessary imports and API key setup
import os
import yaml
import anthropic
import time
import random
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from lucideIconList import icons
from config import ANTHROPIC_API_KEY

print(f"Anthropic version: {anthropic.__version__}")

# API Key Setup
os.environ['ANTHROPIC_API_KEY'] = ANTHROPIC_API_KEY

# Directory paths
INPUT_DIR = "/Users/kemi/Documents/GitHub/vocab/src/content/articles"
OUTPUT_DIR = "/Users/kemi/Documents/GitHub/vocab/src/components/articles"
CHECK0_DIR = "/Users/kemi/Documents/GitHub/vocab/src/components/articles/0"
CHECK1_DIR = "/Users/kemi/Documents/GitHub/vocab/src/components/articles/1-ok"
CHECK2_DIR = "/Users/kemi/Documents/GitHub/vocab/src/components/articles/2-adjust"
CHECK3_DIR = "/Users/kemi/Documents/GitHub/vocab/src/components/articles/3-fixError"

# Constants for Claude API
#MODEL_NAME = "claude-3-5-sonnet-20240620" 
MODEL_NAME = "claude-3-5-sonnet-20241022"
MAX_TOKENS1 = 2000
MAX_TOKENS2 = 6000
TEMPERATURE = 0.7

LUCIDEICONS = icons

# Print setup confirmation
print("🔧 Environment setup complete")
print(f"📁 Input directory: {INPUT_DIR}")
print(f"📁 Output directory: {OUTPUT_DIR}")

Anthropic version: 0.37.1
🔧 Environment setup complete
📁 Input directory: /Users/kemi/Documents/GitHub/vocab/src/content/articles
📁 Output directory: /Users/kemi/Documents/GitHub/vocab/src/components/articles


In [103]:
# Cell 2 - User Prompts

def create_concept_prompt(title: str, summary: str) -> str:
    """
    Creates the first-shot prompt focusing on conceptual understanding and metaphors.
    
    Args:
        title (str): The concept title
        summary (str): Brief summary of the concept
        
    Returns:
        str: Formatted conceptual prompt
    """
    prompt = f'''
help me explain the following AI concept in a didactic react component which helps someone learn about it, either with interaction or animation. keep it simple. use lucide icons where possible. don't use shadcn only tailwind. make it intuitive. user shouldn't have to type anything. an animation can suffice if it's illustrative of the concept in the context of AI. The interactions might include, but not limited to, moving objects, selecting objects given a criteria, sliding objects, Scroll or Pinch-to-zoom, Swipe navigation, Drag and Drop Operations, Scroll-Based Interactions, Drawing, Music notes. Use humor and relatable situations when appropriate. aim for beautiful and artistic animations. 

CONCEPT BREAKDOWN:
1. Name of the concept: {title}
2. Core Principle: {summary}

'''
    return prompt

def create_implementation_prompt(title: str, summary: str, concept_response: str) -> str:
    """
    Creates the second-shot prompt for component implementation.
    
    Args:
        title (str): The concept title
        summary (str): Brief summary of the concept
        concept_response (str): Claude's response from the first shot
        
    Returns:
        str: Formatted implementation prompt
    """
    prompt = f'''
The execution of the react component is underwhelming and confusing. 
I know you can do better. 
1. More beautiful and artistic. 
2. Put short helper texts or tooltips to help convey the information better.
3. Responsive Design: Implement font and element resizing from tailwind so the component is more legible in narrower smaller screens.
4. Add system-based theme switching using the prefers-color-scheme media query and Tailwind's dark mode feature.

CONCEPT BREAKDOWN:
1. Name of the concept: {title}
2. Core Principle: {summary}
3. Claude 3.5 Sonnet's 1st try: 
{concept_response}

'''
    return prompt

# Print confirmation
print("✅ User Prompts loaded")

✅ User Prompts loaded


In [104]:
# Cell 3 - Base Functions

def extract_frontmatter(content: str) -> Optional[Dict]:
    """
    Extract YAML frontmatter from markdown content.
    
    Args:
        content (str): Full markdown file content
        
    Returns:
        dict or None: Extracted frontmatter as dictionary, None if extraction fails
    """
    if content.startswith('---'):
        parts = content.split('---', 2)[1:]
        if len(parts) >= 1:
            try:
                return yaml.safe_load(parts[0])
            except yaml.YAMLError as e:
                print(f"  ⚠️ Error parsing YAML frontmatter: {str(e)}")
                return None
    return None

def format_time(seconds: float) -> str:
    """
    Format seconds into minutes and seconds.
    
    Args:
        seconds (float): Number of seconds
        
    Returns:
        str: Formatted string like "1m 30s"
    """
    return f"{int(seconds // 60)}m {int(seconds % 60)}s"

def get_existing_component_names_in_dir(directory: str) -> set:
    """
    Get names of existing components in a single directory.
    
    Args:
        directory (str): Path to components directory
        
    Returns:
        set: Set of component names (without .tsx extension)
    """
    existing_names = set()
    if os.path.exists(directory):
        for file in os.listdir(directory):
            if file.endswith('.tsx'):
                existing_names.add(file[:-4])
    return existing_names

def get_all_existing_component_names(directories: list[str]) -> set:
    """
    Get names of existing components across multiple directories.
    
    Args:
        directories (list[str]): List of directory paths to check
        
    Returns:
        set: Combined set of component names found in any directory
    """
    all_existing_names = set()
    
    for directory in directories:
        dir_components = get_existing_component_names_in_dir(directory)
        all_existing_names.update(dir_components)
    
    return all_existing_names

# Print confirmation
print("✅ Base functions loaded")

✅ Base functions loaded


In [105]:
# Cell 4 - Component Generation Core

def generate_concept_understanding(client, title: str, prompt: str) -> str:
    """
    Generate conceptual understanding using Claude API (First shot).
    
    Args:
        client: Anthropic client instance
        title: The concept title
        prompt: Generated first-shot prompt
        
    Returns:
        str: concept explanation
    """
    print(f"\n  ⌛ Stage 1: Generating conceptual understanding for {title}...")
    
    system_prompt_concept = '''
    You are a creative artistic expert React developer and AI professor specializing in educational components for 15 to 18-year-old humans. 
Your components must strictly follow these technical requirements:

1. Architecture:
- "use client" directive at start (first line)
- import { useState, useEffect } from "react"; as the second line
- Only useState and useEffect hooks
- Only Lucide icons for visuals.
- Only Tailwind CSS for styling
- No external libraries/components
- File extension: .tsx

2. TypeScript Implementation:
interface ComponentProps {
    // Define if needed, empty interface required
}
// All state must use explicit types
const [state, setState] = useState<StateType>(initialValue);
// Event handlers must be typed
const handleEvent = (e: React.MouseEvent<HTMLButtonElement>) => {...};
// Constants outside component
const SCENARIOS: ScenarioType[] = [...];

3. Effects & Cleanup:
useEffect(() => {
    // Effect logic
    return () => {
    // Cleanup required
    };
}, [dependencies]);

4. Styling Standards:
- Only core Tailwind classes
- No arbitrary values (e.g., h-[500px])
- Transitions: duration-300 to duration-500
- Color scheme:
    • Blue (#3B82F6) - active/focus
    • Gray (#6B7280) - background
    • Green (#22C55E) - success
- Avoid minimalistic and underwhelming design. I know you can do better. Be more beautiful and artistic.
- Account for light and dark mode.

5. Code Organization:
- Max 200 lines per component
- Early returns with type guards
- JSDoc component documentation
- Proper hooks cleanup
- No inline styles
- No setTimeout/setInterval (use useEffect)
- Write the complete code, no comments or commented sections

Return only raw TSX code without explanations or markdown.
    '''
    
    try:
        response = client.messages.create(
            model=MODEL_NAME,
            max_tokens=MAX_TOKENS1,
            temperature=TEMPERATURE,
            system=system_prompt_concept,
            messages=[{"role": "user", "content": prompt}]
        )
        
        concept_response = response.content[0].text
        print(f"\n  💬 Stage 1 response: {concept_response}")
        
        return concept_response
        
    except Exception as e:
        print(f"  ❌ Error generating concept understanding: {str(e)}")
        return None

def generate_component_implementation(
    client,
    title: str,
    prompt: str,
    concept_response: str
) -> str:
    """
    Generate React component implementation using Claude API (Second shot).
    
    Args:
        client: Anthropic client instance
        title: The concept title
        prompt: Generated second-shot prompt
        concept_response: Response from first shot
        
    Returns:
        str: component_code
    """
    print(f"\n  ⌛ Stage 2: Generating component implementation for {title}...")
    
    system_prompt_implementation = '''
You are a creative artistic expert React developer and AI professor specializing in educational components for 15 to 18-year-old humans. 
Your components must strictly follow these technical requirements:

1. Architecture:
- "use client" directive at start (first line)
- import { useState, useEffect } from "react"; as the second line
- Only useState and useEffect hooks
- Only Lucide icons for visuals.
- Only Tailwind CSS for styling
- No external libraries/components
- File extension: .tsx

2. TypeScript Implementation:
interface ComponentProps {
    // Define if needed, empty interface required
}
// All state must use explicit types
const [state, setState] = useState<StateType>(initialValue);
// Event handlers must be typed
const handleEvent = (e: React.MouseEvent<HTMLButtonElement>) => {...};
// Constants outside component
const SCENARIOS: ScenarioType[] = [...];

3. Effects & Cleanup:
useEffect(() => {
    // Effect logic
    return () => {
    // Cleanup required
    };
}, [dependencies]);

4. Styling Standards:
- Only core Tailwind classes
- No arbitrary values (e.g., h-[500px])
- Transitions: duration-300 to duration-500
- Color scheme:
    • Blue (#3B82F6) - active/focus
    • Gray (#6B7280) - background
    • Green (#22C55E) - success
- Avoid minimalistic and underwhelming design. I know you can do better. Be more beautiful and artistic.

5. Code Organization:
- Max 200 lines per component
- Early returns with type guards
- JSDoc component documentation
- Proper hooks cleanup
- No inline styles
- No setTimeout/setInterval (use useEffect)
- Write the complete code, no comments or commented sections

Return only raw TSX code without explanations or markdown.
'''

# - Only Lucide icons for visuals. Limit yourself to these icons: ''' + str(LUCIDEICONS) + '''

    try:
        response = client.messages.create(
            model=MODEL_NAME,
            max_tokens=MAX_TOKENS2,
            temperature=TEMPERATURE,
            system=system_prompt_implementation,
            messages=[{
                "role": "user", 
                "content": f"{prompt}\n\nFirst-shot understanding:\n{concept_response}"
            }]
        )
        
        component_code = response.content[0].text
        
        # Clean up the code if it's wrapped in markdown
        if component_code.startswith('```'):
            first_newline = component_code.find('\n')
            if first_newline != -1:
                component_code = component_code[first_newline + 1:]
            if component_code.strip().endswith('```'):
                component_code = component_code.strip()[:-3]
        
        return component_code
        
    except Exception as e:
        print(f"  ❌ Error generating component implementation: {str(e)}")
        return None

def generate_component_with_refinement(
    client,
    title: str,
    summary: str
) -> Tuple[str, List[str]]:
    """
    Two-stage component generation with conceptual understanding and implementation.
    
    Args:
        client: Anthropic client instance
        title: Component title for logging
        summary: Brief summary of the concept
    
    Returns:
        tuple: (final_component_code, validation_issues)
    """
    # Stage 1: Conceptual Understanding
    concept_prompt = create_concept_prompt(title, summary)
    concept_response = generate_concept_understanding(
        client,
        title,
        concept_prompt
    )
    
    if not concept_response:
        return None, ["Concept generation failed"]
    
    # Stage 2: Implementation
    implementation_prompt = create_implementation_prompt(
        title,
        summary,
        concept_response
    )
    
    component_code = generate_component_implementation(
        client,
        title,
        implementation_prompt,
        concept_response
    )
    
    if not component_code:
        return None, ["Implementation generation failed"]
    
    # Validate the generated code
    issues = validate_component(component_code)
    
    if not issues:
        print("  ✅ Component validation passed")
    else:
        print("\n  ⚠️ Component validation issues found:")
        for issue in issues:
            print(f"    {issue}")
    
    return component_code, issues

# Print confirmation
print("✅ Component generation functions loaded")

✅ Component generation functions loaded


In [106]:
# Cell 5 - Component Validation and Fixes

def save_tsx_file(
    content: str,
    md_filename: str,
    output_dir: str
) -> None:
    """
    Save the API response as a .tsx file with minimal validation.
    
    Args:
        content (str): The component code to save
        md_filename (str): Original markdown filename with .md extension
        output_dir (str): Directory to save the TSX file
    """
    os.makedirs(output_dir, exist_ok=True)
    
    # Convert .md to .tsx while preserving exact filename
    tsx_filename = md_filename.replace('.md', '.tsx')
    filepath = os.path.join(output_dir, tsx_filename)
    
    # Clean the content
    cleaned_content = content
    if content.startswith('```'):
        first_newline = content.find('\n')
        if first_newline != -1:
            cleaned_content = content[first_newline + 1:]
        if cleaned_content.strip().endswith('```'):
            cleaned_content = cleaned_content.strip()[:-3]
    
    # Check and ensure "use client" directive
    cleaned_content = cleaned_content.strip()
    if not cleaned_content.startswith('"use client"'):
        cleaned_content = '"use client"\n\n' + cleaned_content
        print("  🔧 Added missing 'use client' directive")
    
    # Save the file
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(cleaned_content)
    print(f"  ✓ Saved: {tsx_filename}")

# Print confirmation
print("✅ Component save function loaded")

✅ Component save function loaded


In [107]:
# Cell 6 - Main Execution

def get_generality_score(md_file):
    """
    Extract the highest generality score from a markdown file's frontmatter.
    
    Args:
        md_file (Path): Path object for the markdown file
        
    Returns:
        float: Highest generality score, or -1 if not found
    """
    try:
        with open(md_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        metadata = extract_frontmatter(content)
        if metadata and 'generality' in metadata:
            # Get highest score from generality list
            return max(metadata['generality'])
        return -1  # Return -1 if no generality scores found
    except Exception as e:
        print(f"  ⚠️ Error reading generality from {md_file.name}: {str(e)}")
        return -1

def process_file(client, md_file, metadata):
    """
    Process a single file to generate its component.
    
    Args:
        client: Anthropic client instance
        md_file: Path object for the markdown file
        metadata: Dictionary containing file metadata
        
    Returns:
        bool: success status
    """
    try:
        print(f"  ⌛ Creating prompts for: {metadata['title']}")
        
        # First shot - Conceptual Understanding
        concept_prompt = create_concept_prompt(metadata['title'], metadata['summary'])
        concept_response = generate_concept_understanding(
            client,
            metadata['title'],
            concept_prompt
        )
        
        if not concept_response:
            print(f"  ❌ Failed to generate concept understanding for: {md_file.name}")
            return False
            
        # Second shot - Implementation
        implementation_prompt = create_implementation_prompt(
            metadata['title'],
            metadata['summary'],
            concept_response
        )
        
        component_code = generate_component_implementation(
            client,
            metadata['title'],
            implementation_prompt,
            concept_response
        )
        
        if component_code:
            save_tsx_file(
                component_code,
                md_file.name,
                OUTPUT_DIR
            )
            return True
        else:
            print(f"  ❌ Failed to generate component for: {md_file.name}")
            return False
            
    except Exception as e:
        print(f"  ❌ Error processing file: {str(e)}")
        return False

def main():
    """Main execution function for the component generator."""
    print("\n🚀 Starting AI Component Generator...\n")
    start_time_total = time.time()
    
    try:
        # Initialize Anthropic client
        client = anthropic.Client(api_key=os.getenv('ANTHROPIC_API_KEY'))
        
        # Check directories
        print("📂 Checking directories...")
        if not os.path.exists(INPUT_DIR):
            raise Exception(f"Input directory not found: {INPUT_DIR}")
        if not os.path.exists(OUTPUT_DIR):
            os.makedirs(OUTPUT_DIR)
            print(f"  ✓ Created output directory: {OUTPUT_DIR}")
        
        # Get existing component names from all directories
        print("\n📂 Checking existing components...")
        dirs_to_check = [OUTPUT_DIR, CHECK0_DIR, CHECK1_DIR, CHECK2_DIR, CHECK3_DIR]
        existing_components = get_all_existing_component_names(dirs_to_check)
        
        if existing_components:
            print(f"  ✓ Found {len(existing_components)} existing components across all directories")
            
        # Get list of all .md files that don't have corresponding .tsx files
        all_md_files = []
        for md_file in Path(INPUT_DIR).glob('*.md'):
            if md_file.stem not in existing_components:
                all_md_files.append(md_file)
        
        total_available = len(all_md_files)
        print(f"\n📁 Found {total_available} unprocessed files")
        
        if total_available == 0:
            print("❌ No new files to process")
            return
        
        # Sort files by generality score (highest first)
        print("\n📊 Sorting files by generality score...")
        md_files_with_scores = [(f, get_generality_score(f)) for f in all_md_files]
        sorted_md_files = [f for f, score in sorted(md_files_with_scores, key=lambda x: x[1], reverse=True)]
        
        # Select top 50 files (or all if less than 50 available) ⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️
        num_files = min(50, total_available)
        md_files = sorted_md_files[:num_files]
        
        print(f"\n📈 Selected top {num_files} files by generality score")
        
        # Track statistics
        successful = 0
        failed = 0
        
        # Process each file
        for index, md_file in enumerate(md_files, 1):
            print(f"\n📝 Processing file {index}/{num_files}: {md_file.name}")
            start_time_file = time.time()
            
            try:
                print("  ⌛ Reading file...")
                with open(md_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                print("  ⌛ Extracting metadata...")
                metadata = extract_frontmatter(content)
                if not metadata:
                    print("  ❌ Could not extract metadata")
                    failed += 1
                    continue
                
                # Process the file
                success = process_file(client, md_file, metadata)
                if success:
                    successful += 1
                else:
                    failed += 1
                
                elapsed_time = time.time() - start_time_file
                print(f"  ⏱️ Time taken: {format_time(elapsed_time)}")
                
            except Exception as e:
                print(f"  ❌ Error: {str(e)}")
                failed += 1
        
        # Print summary
        total_time = time.time() - start_time_total
        print("\n====== Summary ======")
        print(f"✅ Successfully processed: {successful}")
        print(f"❌ Failed: {failed}")
        print(f"⏱️ Total time: {format_time(total_time)}")
        
    except Exception as e:
        print(f"\n❌ Fatal error: {str(e)}")
        raise
    
    print("\n✨ Process completed!")

In [108]:
# Cell 7 - Run Main
if __name__ == "__main__":
    main()


🚀 Starting AI Component Generator...

📂 Checking directories...

📂 Checking existing components...
  ✓ Found 177 existing components across all directories

📁 Found 676 unprocessed files

📊 Sorting files by generality score...

📈 Selected top 50 files by generality score

📝 Processing file 1/50: algorithm.md
  ⌛ Reading file...
  ⌛ Extracting metadata...
  ⌛ Creating prompts for: Algorithm

  ⌛ Stage 1: Generating conceptual understanding for Algorithm...

  💬 Stage 1 response: ```tsx
"use client"
import { useState, useEffect } from "react"
import { ChefHat, ArrowRight, Coffee, Cake, Bowl, Egg } from "lucide-react"

interface Step {
  id: number
  icon: JSX.Element
  text: string
  complete: boolean
}

/**
 * AlgorithmVisualizer demonstrates the concept of algorithms through a baking analogy
 * with animated steps showing how algorithms follow a precise sequence of instructions
 */
export default function AlgorithmVisualizer() {
  const [currentStep, setCurrentStep] = useState<number>