In [1]:
# Setup and imports
import sys
import time
import os
from typing import Optional
from dataclasses import dataclass
from IPython.display import HTML, display, Markdown
import re

sys.path.insert(0, '..')
from dotenv import load_dotenv
load_dotenv('../.env')

# Import clients
from openai import OpenAI
import anthropic

from src.minimax_client import MiniMaxClient

# Check for pandas
try:
    import pandas as pd
    HAS_PANDAS = True
except ImportError:
    HAS_PANDAS = False
    print("⚠️ pandas not installed - tables will use text format")


In [2]:
@dataclass
class CompletionResult:
    """Stores results from a model completion."""
    model_name: str
    provider: str
    content: str
    completion_time: float
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
    error: Optional[str] = None
    
    @property
    def tokens_per_second(self) -> float:
        return self.completion_tokens / self.completion_time if self.completion_time > 0 else 0
    
    @property
    def success(self) -> bool:
        return self.error is None
    
    def summary(self) -> str:
        if self.error:
            return f"""
╔══════════════════════════════════════════════════════════════╗
║  {self.provider}: {self.model_name}
╠══════════════════════════════════════════════════════════════╣
║  ❌ Error: {self.error[:50]}...
╚══════════════════════════════════════════════════════════════╝
"""
        return f"""
╔══════════════════════════════════════════════════════════════╗
║  {self.provider}: {self.model_name}
╠══════════════════════════════════════════════════════════════╣
║  ⏱️  Completion Time:     {self.completion_time:.2f}s
║  📝 Prompt Tokens:        {self.prompt_tokens:,}
║  ✍️  Completion Tokens:   {self.completion_tokens:,}
║  📊 Total Tokens:         {self.total_tokens:,}
║  ⚡ Speed:                {self.tokens_per_second:.1f} tokens/sec
╚══════════════════════════════════════════════════════════════╝
"""


In [3]:
class MultiModelClient:
    """Client for comparing completions across multiple LLM providers."""
    
    def __init__(self):
        # MiniMax
        try:
            self.minimax = MiniMaxClient()
            self.minimax_ready = True
        except Exception as e:
            self.minimax = None
            self.minimax_ready = False
            print(f"⚠️ MiniMax init failed: {e}")
        
        # OpenAI
        openai_key = os.getenv("OPENAI_API_KEY")
        self.openai = OpenAI(api_key=openai_key) if openai_key else None
        self.openai_ready = openai_key is not None
        
        # Anthropic
        anthropic_key = os.getenv("ANTHROPIC_API_KEY")
        self.anthropic = anthropic.Anthropic(api_key=anthropic_key) if anthropic_key else None
        self.anthropic_ready = anthropic_key is not None
        
        print("🔌 Initialized clients:")
        print(f"   {'✓' if self.minimax_ready else '✗'} MiniMax: {'Ready' if self.minimax_ready else 'No API key'}")
        print(f"   {'✓' if self.openai_ready else '✗'} OpenAI: {'Ready' if self.openai_ready else 'No API key'}")
        print(f"   {'✓' if self.anthropic_ready else '✗'} Anthropic: {'Ready' if self.anthropic_ready else 'No API key'}")
    
    def complete_minimax(self, prompt: str, system: str, model: str = "MiniMax-M2.1", max_tokens: int = 16000) -> CompletionResult:
        if not self.minimax_ready:
            return CompletionResult(model_name=model, provider="MiniMax", content="", completion_time=0, 
                                   prompt_tokens=0, completion_tokens=0, total_tokens=0, error="Client not initialized")
        
        messages = [{"role": "system", "content": system}, {"role": "user", "content": prompt}]
        
        try:
            start = time.perf_counter()
            response = self.minimax.chat(messages, model=model, max_tokens=max_tokens, temperature=0.7)
            elapsed = time.perf_counter() - start
            
            return CompletionResult(
                model_name=model, provider="MiniMax",
                content=response.choices[0].message.content,
                completion_time=elapsed,
                prompt_tokens=response.usage.prompt_tokens,
                completion_tokens=response.usage.completion_tokens,
                total_tokens=response.usage.total_tokens
            )
        except Exception as e:
            return CompletionResult(model_name=model, provider="MiniMax", content="", completion_time=0,
                                   prompt_tokens=0, completion_tokens=0, total_tokens=0, error=str(e))
    
    def complete_openai(self, prompt: str, system: str, model: str = "gpt-4o", max_tokens: int = 16000) -> CompletionResult:
        if not self.openai_ready:
            return CompletionResult(model_name=model, provider="OpenAI", content="", completion_time=0,
                                   prompt_tokens=0, completion_tokens=0, total_tokens=0, error="Set OPENAI_API_KEY")
        
        is_o1 = model.startswith("o1")
        messages = [{"role": "user", "content": f"{system}\n\n{prompt}"}] if is_o1 else [
            {"role": "system", "content": system}, {"role": "user", "content": prompt}
        ]
        
        try:
            start = time.perf_counter()
            kwargs = {"model": model, "messages": messages, "max_completion_tokens": max_tokens}
            if not is_o1:
                kwargs["temperature"] = 0.7
            response = self.openai.chat.completions.create(**kwargs)
            elapsed = time.perf_counter() - start
            
            return CompletionResult(
                model_name=model, provider="OpenAI",
                content=response.choices[0].message.content,
                completion_time=elapsed,
                prompt_tokens=response.usage.prompt_tokens,
                completion_tokens=response.usage.completion_tokens,
                total_tokens=response.usage.total_tokens
            )
        except Exception as e:
            return CompletionResult(model_name=model, provider="OpenAI", content="", completion_time=0,
                                   prompt_tokens=0, completion_tokens=0, total_tokens=0, error=str(e))
    
    def complete_anthropic(self, prompt: str, system: str, model: str = "claude-sonnet-4-20250514", max_tokens: int = 16000) -> CompletionResult:
        if not self.anthropic_ready:
            return CompletionResult(model_name=model, provider="Anthropic", content="", completion_time=0,
                                   prompt_tokens=0, completion_tokens=0, total_tokens=0, error="Set ANTHROPIC_API_KEY")
        
        try:
            start = time.perf_counter()
            response = self.anthropic.messages.create(
                model=model, max_tokens=max_tokens, system=system,
                messages=[{"role": "user", "content": prompt}]
            )
            elapsed = time.perf_counter() - start
            
            return CompletionResult(
                model_name=model, provider="Anthropic",
                content=response.content[0].text,
                completion_time=elapsed,
                prompt_tokens=response.usage.input_tokens,
                completion_tokens=response.usage.output_tokens,
                total_tokens=response.usage.input_tokens + response.usage.output_tokens
            )
        except Exception as e:
            return CompletionResult(model_name=model, provider="Anthropic", content="", completion_time=0,
                                   prompt_tokens=0, completion_tokens=0, total_tokens=0, error=str(e))

# Initialize client
client = MultiModelClient()


🔌 Initialized clients:
   ✓ MiniMax: Ready
   ✓ OpenAI: Ready
   ✓ Anthropic: Ready


SYSTEM_PROMPT = """You are an elite full-stack developer specializing in Next.js 14+, React 18+, TypeScript, and complex interactive applications.

Generate production-ready code following these standards:
- Use Next.js App Router (app/ directory structure)
- TypeScript with strict types - absolutely no 'any' types
- React Server Components where appropriate
- Client Components with 'use client' directive when needed
- Tailwind CSS for all styling
- Proper error handling and loading states
- Accessible HTML (ARIA labels, semantic elements)
- Performance optimized (useMemo, useCallback, React.memo where needed)
- Custom hooks for reusable logic

Output format: Provide complete file contents with clear file path comments.
Use this format for each file:
// === FILE: path/to/file.tsx ===
<file contents>
// === END FILE ===

Generate ALL required files for a fully working application. Include package.json with all dependencies."""

USER_PROMPT = """Create "SYNTH CANVAS" - A complete real-time collaborative infinite whiteboard with physics simulation and creative tools.

## CORE FEATURES:

### 1. Infinite Canvas System
- Infinite pan/zoom canvas using HTML5 Canvas + React
- Minimap showing viewport position on the canvas
- Smooth zoom (0.1x to 10x) with mouse wheel + pinch gesture
- Pan with middle mouse button or two-finger drag
- Grid background that scales with zoom level
- Viewport coordinates display

### 2. Drawing Tools (implement ALL):
a) **Pen Tool**: Pressure-sensitive strokes (simulate with speed), smoothing algorithm
b) **Shape Tool**: Rectangle, Ellipse, Triangle, Line, Arrow - with shift for perfect shapes
c) **Text Tool**: Click to add text, editable inline, font size/color options
d) **Eraser**: Removes strokes it touches
e) **Selection Tool**: Click to select, drag to move, resize handles on corners
f) **Sticky Notes**: Colored notes that can be moved and edited
g) **Connector Lines**: Lines that connect between objects and stay connected when moved

### 3. Physics Simulation Mode (toggleable):
- Objects can be given physics properties
- Gravity affects objects when enabled
- Objects collide with each other
- Friction and bounce properties
- "Throw" objects by dragging and releasing
- Constraints/springs between objects
- Use requestAnimationFrame for smooth 60fps physics

### 4. Layer System:
- Multiple layers support
- Layer visibility toggle
- Layer opacity control
- Layer reordering (drag to reorder)
- Lock layers
- Layer groups

### 5. Real-time Collaboration (simulated with local state):
- Multiple cursor display (show 3 fake collaborators moving around)
- User presence indicators
- Object locking when someone is editing
- Change highlighting (flash objects when modified)
- User color coding

### 6. Advanced Features:
a) **Undo/Redo**: Full history with Ctrl+Z / Ctrl+Shift+Z (implement with command pattern)
b) **Copy/Paste**: Ctrl+C/V with offset pasting
c) **Keyboard Shortcuts**: 
   - V: Select, P: Pen, R: Rectangle, E: Ellipse, T: Text, N: Sticky Note
   - Delete/Backspace: Remove selected
   - Ctrl+A: Select all
   - Ctrl+D: Duplicate
d) **Export**: Export visible area as PNG
e) **Auto-save**: Save canvas state to localStorage every 5 seconds

### 7. UI Components Required:

```
app/
├── layout.tsx (main layout with dark theme)
├── page.tsx (canvas page)
├── globals.css
components/
├── canvas/
│   ├── InfiniteCanvas.tsx (main canvas with pan/zoom)
│   ├── CanvasRenderer.tsx (renders all objects)
│   ├── Grid.tsx (background grid)
│   ├── Minimap.tsx (minimap component)
│   └── SelectionBox.tsx (selection rectangle)
├── tools/
│   ├── Toolbar.tsx (floating toolbar)
│   ├── ToolButton.tsx
│   ├── ColorPicker.tsx
│   ├── StrokeWidthPicker.tsx
│   └── LayerPanel.tsx
├── objects/
│   ├── StrokeObject.tsx
│   ├── ShapeObject.tsx
│   ├── TextObject.tsx
│   ├── StickyNote.tsx
│   └── ConnectorLine.tsx
├── physics/
│   ├── PhysicsEngine.tsx
│   └── PhysicsControls.tsx
├── collaboration/
│   ├── CursorOverlay.tsx
│   └── PresenceIndicator.tsx
└── ui/
    ├── Button.tsx
    ├── Slider.tsx
    ├── Popover.tsx
    └── Tooltip.tsx
context/
├── CanvasContext.tsx (canvas state: objects, viewport, selection)
├── ToolContext.tsx (current tool, color, stroke width)
├── HistoryContext.tsx (undo/redo stack)
└── PhysicsContext.tsx (physics simulation state)
hooks/
├── useCanvas.ts
├── usePanZoom.ts
├── useDrawing.ts
├── useSelection.ts
├── usePhysics.ts
├── useKeyboardShortcuts.ts
└── useHistory.ts
lib/
├── canvas-utils.ts (coordinate transforms, hit testing)
├── physics-utils.ts (collision detection, physics math)
├── geometry.ts (shape calculations)
└── export.ts (PNG export)
types/
└── index.ts
```

### 8. Type Definitions (types/index.ts):
```typescript
type Tool = 'select' | 'pen' | 'rectangle' | 'ellipse' | 'triangle' | 'line' | 'arrow' | 'text' | 'sticky' | 'connector' | 'eraser';

interface Point { x: number; y: number; }
interface Bounds { x: number; y: number; width: number; height: number; }

interface CanvasObject {
  id: string;
  type: 'stroke' | 'shape' | 'text' | 'sticky' | 'connector';
  layerId: string;
  locked: boolean;
  physics?: PhysicsBody;
  createdAt: number;
  updatedAt: number;
}

interface StrokeObject extends CanvasObject {
  type: 'stroke';
  points: Point[];
  color: string;
  width: number;
  opacity: number;
}

interface ShapeObject extends CanvasObject {
  type: 'shape';
  shapeType: 'rectangle' | 'ellipse' | 'triangle' | 'line' | 'arrow';
  bounds: Bounds;
  color: string;
  fill: string | null;
  strokeWidth: number;
  rotation: number;
}

interface TextObject extends CanvasObject {
  type: 'text';
  position: Point;
  content: string;
  fontSize: number;
  fontFamily: string;
  color: string;
}

interface StickyNote extends CanvasObject {
  type: 'sticky';
  bounds: Bounds;
  content: string;
  backgroundColor: string;
}

interface PhysicsBody {
  velocity: Point;
  acceleration: Point;
  mass: number;
  friction: number;
  restitution: number;
  isStatic: boolean;
}

interface Layer {
  id: string;
  name: string;
  visible: boolean;
  locked: boolean;
  opacity: number;
  order: number;
}

interface Viewport {
  x: number;
  y: number;
  zoom: number;
}

interface HistoryEntry {
  type: 'add' | 'remove' | 'modify' | 'move';
  objects: CanvasObject[];
  previousState?: CanvasObject[];
}
```

### 9. Styling Requirements:
- Dark theme: bg-slate-950, panels bg-slate-900/80 with backdrop-blur
- Accent colors: violet-500 for primary, cyan-400 for secondary
- Floating toolbar with glassmorphism effect
- Smooth transitions (150ms) on all interactions
- Tool icons using SVG (create simple geometric icons)
- Cursor changes based on current tool
- Selection handles with corner resize cursors
- Neon glow effect on selected objects

### 10. Performance Requirements:
- Canvas should handle 1000+ objects smoothly
- Use object spatial indexing for hit testing
- Throttle canvas redraws
- Only render objects in viewport
- Use OffscreenCanvas for complex operations if supported

Generate a complete, fully functional application. This should be an impressive demo of complex canvas manipulation, state management, and physics simulation."""

print("📝 Prompt ready!")
print(f"   System prompt: {len(SYSTEM_PROMPT):,} chars")
print(f"   User prompt: {len(USER_PROMPT):,} chars")
print(f"   Total: {len(SYSTEM_PROMPT) + len(USER_PROMPT):,} chars")

In [4]:
# ═══════════════════════════════════════════════════════════════════════════════
# 🎛️ CONFIGURATION: Select which models to compare
# ═══════════════════════════════════════════════════════════════════════════════

MODELS_TO_COMPARE = {
    # MiniMax
    "minimax": [
        "MiniMax-M2.1",
    ],
    
    # OpenAI - uncomment models you want to test
    "openai": [
        "gpt-4o",
        # "gpt-4o-mini",        # Faster, cheaper
        # "gpt-4-turbo",        # Previous flagship
        # "o1",                 # Reasoning model (very slow for code gen)
    ],
    
    # Anthropic - no free tier, uncomment if you have API access
    "anthropic": [
        # "claude-sonnet-4-20250514",   # Latest Sonnet
        "claude-3-5-sonnet-20241022", # Previous Sonnet
        # "claude-3-opus-20240229",     # Most capable (expensive)
        # "claude-3-5-haiku-20241022",  # Fast and cheap
    ],
}

print("📌 Models selected for comparison:")
total = 0
for provider, models in MODELS_TO_COMPARE.items():
    if models:
        print(f"\n  {provider.upper()}:")
        for model in models:
            print(f"    • {model}")
            total += 1
print(f"\n  Total: {total} models")


📌 Models selected for comparison:

  MINIMAX:
    • MiniMax-M2.1

  OPENAI:
    • gpt-4o

  Total: 2 models


## 📝 Next.js Application Prompt

A complex Next.js 14 application with:
- TypeScript throughout
- App Router architecture
- Multiple reusable components
- State management with Context
- Tailwind CSS styling
- Kanban board with drag & drop


In [5]:
SYSTEM_PROMPT = """You are a senior full-stack developer specializing in Next.js 14+, React 18+, and TypeScript.

Generate production-ready code following these standards:
- Use Next.js App Router (app/ directory structure)
- TypeScript with strict types - no 'any' types
- React Server Components where appropriate
- Client Components with 'use client' directive when needed
- Tailwind CSS for all styling
- Proper error handling and loading states
- Accessible HTML (ARIA labels, semantic elements)
- Performance optimized (lazy loading, memoization where needed)

Output format: Provide complete file contents with clear file path comments.
Use this format for each file:
// === FILE: path/to/file.tsx ===
<file contents>
// === END FILE ===

Generate ALL required files for a working application."""

USER_PROMPT = """Create a complete Next.js 14 application: "TaskFlow" - A modern task management dashboard.

## Required Features:

### 1. Layout & Navigation (app/layout.tsx)
- Dark theme with gradient accents (slate-900 to slate-800)
- Responsive sidebar navigation with icons
- Collapsible sidebar on mobile
- Header with search bar and user avatar dropdown
- Breadcrumb navigation

### 2. Dashboard Page (app/page.tsx)
- Overview cards: Total Tasks, Completed, In Progress, Overdue (with animated counters)
- Task completion chart (simple CSS-based bar chart)
- Recent activity feed with timestamps
- Quick-add task floating button

### 3. Task Board Page (app/tasks/page.tsx)
- Kanban-style board with 4 columns: Backlog, To Do, In Progress, Done
- Drag and drop tasks between columns (implement with React state, no external libs)
- Task cards showing: title, priority badge, due date, assignee avatar
- Filter by priority and search
- Add new task modal

### 4. Components Required:
- TaskCard.tsx - Individual task with hover effects, priority colors
- KanbanColumn.tsx - Column with task count and drop zone styling
- Modal.tsx - Reusable modal with animations
- Button.tsx - Styled button with variants (primary, secondary, danger)
- Badge.tsx - Priority/status badges
- Avatar.tsx - User avatar with fallback initials
- SearchInput.tsx - Animated search with icon
- Sidebar.tsx - Navigation with active states

### 5. State Management:
- Use React Context for task state (TaskContext.tsx)
- Custom hooks: useTask, useDragDrop
- Persist to localStorage

### 6. Types (types/index.ts):
```typescript
interface Task {
  id: string;
  title: string;
  description?: string;
  status: 'backlog' | 'todo' | 'in-progress' | 'done';
  priority: 'low' | 'medium' | 'high' | 'urgent';
  dueDate?: string;
  assignee?: User;
  createdAt: string;
}

interface User {
  id: string;
  name: string;
  avatar?: string;
}
```

### 7. Styling Requirements:
- Glassmorphism cards with backdrop-blur
- Smooth transitions on all interactive elements (300ms)
- Gradient borders on focused inputs
- Hover lift effect on cards (transform + shadow)
- Loading skeletons for async content
- Toast notifications for actions

### 8. Sample Data:
Include realistic sample tasks (8-10 tasks across all columns) with varied priorities and due dates.

Generate a complete, working Next.js application with all files needed."""

print("📝 Prompt ready!")
print(f"   System prompt: {len(SYSTEM_PROMPT):,} chars")
print(f"   User prompt: {len(USER_PROMPT):,} chars")
print(f"   Total: {len(SYSTEM_PROMPT) + len(USER_PROMPT):,} chars")


📝 Prompt ready!
   System prompt: 781 chars
   User prompt: 2,385 chars
   Total: 3,166 chars


In [6]:
# Run the comparison
results: dict[str, CompletionResult] = {}

print("🚀 Starting Next.js application generation comparison...")
print("=" * 70)
print("⚠️  This is a complex prompt - expect 30-120 seconds per model!\n")

# MiniMax
for model in MODELS_TO_COMPARE.get("minimax", []):
    print(f"\n⏳ Running MiniMax: {model}...")
    result = client.complete_minimax(USER_PROMPT, SYSTEM_PROMPT, model=model)
    results[f"minimax_{model}"] = result
    print(result.summary())

# OpenAI
for model in MODELS_TO_COMPARE.get("openai", []):
    print(f"\n⏳ Running OpenAI: {model}...")
    result = client.complete_openai(USER_PROMPT, SYSTEM_PROMPT, model=model)
    results[f"openai_{model}"] = result
    print(result.summary())

# Anthropic
for model in MODELS_TO_COMPARE.get("anthropic", []):
    print(f"\n⏳ Running Anthropic: {model}...")
    result = client.complete_anthropic(USER_PROMPT, SYSTEM_PROMPT, model=model)
    results[f"anthropic_{model}"] = result
    print(result.summary())

print("\n" + "=" * 70)
print(f"✅ Completed {len(results)} model comparisons!")


🚀 Starting Next.js application generation comparison...
⚠️  This is a complex prompt - expect 30-120 seconds per model!


⏳ Running MiniMax: MiniMax-M2.1...



╔══════════════════════════════════════════════════════════════╗
║  MiniMax: MiniMax-M2.1
╠══════════════════════════════════════════════════════════════╣
║  ⏱️  Completion Time:     143.34s
║  📝 Prompt Tokens:        744
║  ✍️  Completion Tokens:   16,000
║  📊 Total Tokens:         16,744
║  ⚡ Speed:                111.6 tokens/sec
╚══════════════════════════════════════════════════════════════╝


⏳ Running OpenAI: gpt-4o...



╔══════════════════════════════════════════════════════════════╗
║  OpenAI: gpt-4o
╠══════════════════════════════════════════════════════════════╣
║  ⏱️  Completion Time:     56.24s
║  📝 Prompt Tokens:        742
║  ✍️  Completion Tokens:   2,620
║  📊 Total Tokens:         3,362
║  ⚡ Speed:                46.6 tokens/sec
╚══════════════════════════════════════════════════════════════╝


✅ Completed 2 model comparisons!


In [7]:
# Comparison summary
successful = {k: v for k, v in results.items() if v.success}

if successful:
    display(Markdown("## 📊 Comparison Summary"))
    
    if HAS_PANDAS:
        data = [{
            'Provider': r.provider,
            'Model': r.model_name,
            'Time (s)': f"{r.completion_time:.2f}",
            'Tokens': f"{r.completion_tokens:,}",
            'Speed (tok/s)': f"{r.tokens_per_second:.1f}",
            'Output': f"{len(r.content):,} chars"
        } for r in successful.values()]
        display(pd.DataFrame(data))
    else:
        print(f"{'Provider':<12} {'Model':<28} {'Time':<10} {'Tokens':<10} {'Speed':<12}")
        print("-" * 75)
        for r in successful.values():
            print(f"{r.provider:<12} {r.model_name:<28} {r.completion_time:.2f}s     {r.completion_tokens:<10,} {r.tokens_per_second:.1f} tok/s")
    
    # Winners
    fastest = min(successful.values(), key=lambda x: x.completion_time)
    most_output = max(successful.values(), key=lambda x: x.completion_tokens)
    highest_speed = max(successful.values(), key=lambda x: x.tokens_per_second)
    
    print(f"\n🏆 Results:")
    print(f"   ⚡ Fastest: {fastest.provider} {fastest.model_name} ({fastest.completion_time:.2f}s)")
    print(f"   🚀 Highest speed: {highest_speed.provider} {highest_speed.model_name} ({highest_speed.tokens_per_second:.1f} tok/s)")
    print(f"   📝 Most code: {most_output.provider} {most_output.model_name} ({most_output.completion_tokens:,} tokens)")
else:
    print("❌ No successful completions.")


## 📊 Comparison Summary

Unnamed: 0,Provider,Model,Time (s),Tokens,Speed (tok/s),Output
0,MiniMax,MiniMax-M2.1,143.34,16000,111.6,"54,049 chars"
1,OpenAI,gpt-4o,56.24,2620,46.6,"10,437 chars"



🏆 Results:
   ⚡ Fastest: OpenAI gpt-4o (56.24s)
   🚀 Highest speed: MiniMax MiniMax-M2.1 (111.6 tok/s)
   📝 Most code: MiniMax MiniMax-M2.1 (16,000 tokens)


In [8]:
# Code quality analysis
def analyze_code(content: str) -> dict:
    """Analyze Next.js code output for completeness."""
    content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
    lower = content.lower()
    
    files = re.findall(r'(?:// ===|/\*\*|###|File:)\s*(?:FILE:?)?\s*([\w./\-]+\.(?:tsx?|js|css|json))', content, re.I)
    
    return {
        'Lines': len(content.split('\n')),
        'Characters': len(content),
        'Files Found': len(set(files)) if files else '?',
        'TypeScript': '✓' if ('.tsx' in content or 'interface ' in content) else '✗',
        'App Router': '✓' if ('app/' in content) else '✗',
        'Client Components': '✓' if ("'use client'" in content) else '✗',
        'React Hooks': '✓' if any(h in lower for h in ['usestate', 'useeffect', 'usecontext']) else '✗',
        'Custom Hooks': '✓' if ('function use' in lower or 'const use' in lower) else '✗',
        'Tailwind': '✓' if any(c in content for c in ['className=', 'bg-', 'flex ', 'grid ']) else '✗',
        'Context API': '✓' if 'createcontext' in lower else '✗',
        'Type Definitions': '✓' if ('interface ' in content or 'type ' in content) else '✗',
        'Animations': '✓' if ('transition' in lower or 'animate' in lower) else '✗',
        'Accessibility': '✓' if ('aria-' in lower or 'role=' in lower) else '✗',
    }

display(Markdown("## 🔍 Code Quality Analysis"))

for name, result in results.items():
    if result.success:
        print(f"\n{result.provider} - {result.model_name}")
        print("-" * 55)
        analysis = analyze_code(result.content)
        for metric, value in analysis.items():
            print(f"  {metric:<20}: {value}")


## 🔍 Code Quality Analysis


MiniMax - MiniMax-M2.1
-------------------------------------------------------
  Lines               : 1680
  Characters          : 52630
  Files Found         : 18
  TypeScript          : ✓
  App Router          : ✓
  Client Components   : ✓
  React Hooks         : ✓
  Custom Hooks        : ✓
  Tailwind            : ✓
  Context API         : ✓
  Type Definitions    : ✓
  Animations          : ✓
  Accessibility       : ✗

OpenAI - gpt-4o
-------------------------------------------------------
  Lines               : 394
  Characters          : 10437
  Files Found         : 19
  TypeScript          : ✓
  App Router          : ✓
  Client Components   : ✓
  React Hooks         : ✓
  Custom Hooks        : ✓
  Tailwind            : ✓
  Context API         : ✓
  Type Definitions    : ✓
  Animations          : ✓
  Accessibility       : ✓


In [9]:
# Visual comparison charts
def bar_chart(data: dict, title: str, unit: str = "", width: int = 40):
    """Simple ASCII bar chart."""
    if not data:
        return
    max_val = max(data.values())
    max_label = max(len(k) for k in data.keys())
    
    print(f"\n{title}")
    print("=" * (max_label + width + 15))
    for label, val in sorted(data.items(), key=lambda x: -x[1]):
        bar = "█" * int((val / max_val) * width) if max_val > 0 else ""
        print(f"{label:<{max_label}} │ {bar} {val:.2f}{unit}")

if successful:
    display(Markdown("## 📈 Visual Comparison"))
    
    bar_chart({f"{r.provider} {r.model_name[:18]}": r.completion_time for r in successful.values()},
              "⏱️  Completion Time (lower = better)", "s")
    
    bar_chart({f"{r.provider} {r.model_name[:18]}": r.tokens_per_second for r in successful.values()},
              "\n⚡ Generation Speed (higher = better)", " tok/s")
    
    bar_chart({f"{r.provider} {r.model_name[:18]}": len(r.content)/1000 for r in successful.values()},
              "\n📝 Output Size", " KB")
    
    bar_chart({f"{r.provider} {r.model_name[:18]}": r.completion_tokens for r in successful.values()},
              "\n🔢 Tokens Generated", "")


## 📈 Visual Comparison


⏱️  Completion Time (lower = better)
MiniMax MiniMax-M2.1 │ ████████████████████████████████████████ 143.34s
OpenAI gpt-4o        │ ███████████████ 56.24s


⚡ Generation Speed (higher = better)
MiniMax MiniMax-M2.1 │ ████████████████████████████████████████ 111.62 tok/s
OpenAI gpt-4o        │ ████████████████ 46.58 tok/s


📝 Output Size
MiniMax MiniMax-M2.1 │ ████████████████████████████████████████ 54.05 KB
OpenAI gpt-4o        │ ███████ 10.44 KB


🔢 Tokens Generated
MiniMax MiniMax-M2.1 │ ████████████████████████████████████████ 16000.00
OpenAI gpt-4o        │ ██████ 2620.00


In [10]:
# Save generated code to files
output_dir = "generated_nextjs"
os.makedirs(output_dir, exist_ok=True)

display(Markdown("## 💾 Save Generated Code"))

saved = []
for name, result in results.items():
    if result.success:
        safe = result.model_name.replace('.', '_').replace('-', '_').replace(' ', '_')
        filename = f"{output_dir}/{result.provider.lower()}_{safe}.md"
        
        content = re.sub(r'<think>.*?</think>', '', result.content, flags=re.DOTALL).strip()
        
        with open(filename, 'w') as f:
            f.write(f"# TaskFlow - Generated by {result.provider} {result.model_name}\n\n")
            f.write(f"**Time:** {result.completion_time:.2f}s | **Tokens:** {result.completion_tokens:,}\n\n---\n\n")
            f.write(content)
        
        saved.append(filename)
        print(f"✅ {filename}")

print(f"\n📁 Saved {len(saved)} files to '{output_dir}/'")
print("   Open these files to compare the generated code!")


## 💾 Save Generated Code

✅ generated_nextjs/minimax_MiniMax_M2_1.md
✅ generated_nextjs/openai_gpt_4o.md

📁 Saved 2 files to 'generated_nextjs/'
   Open these files to compare the generated code!


In [11]:
# Save benchmark results
import json
import os
from datetime import datetime

os.makedirs("benchmark_results", exist_ok=True)

# Build results from the comparison
successful = {k: v for k, v in results.items() if v.success}

model_results = []
for name, result in successful.items():
    # Get code analysis metrics
    analysis = analyze_code(result.content)
    
    model_results.append({
        "key": name,
        "provider": result.provider,
        "model": result.model_name,
        "completion_time": round(result.completion_time, 2),
        "prompt_tokens": result.prompt_tokens,
        "completion_tokens": result.completion_tokens,
        "total_tokens": result.total_tokens,
        "tokens_per_second": round(result.tokens_per_second, 1),
        "output_chars": len(result.content),
        "code_analysis": {
            "lines": analysis.get("Lines", 0),
            "files_found": analysis.get("Files Found", 0),
            "has_typescript": analysis.get("TypeScript") == "✓",
            "has_app_router": analysis.get("App Router") == "✓",
            "has_react_hooks": analysis.get("React Hooks") == "✓",
            "has_tailwind": analysis.get("Tailwind") == "✓",
            "has_context_api": analysis.get("Context API") == "✓",
            "has_type_definitions": analysis.get("Type Definitions") == "✓",
            "has_animations": analysis.get("Animations") == "✓",
            "has_accessibility": analysis.get("Accessibility") == "✓"
        }
    })

# Calculate winners
if successful:
    fastest = min(successful.values(), key=lambda x: x.completion_time)
    most_output = max(successful.values(), key=lambda x: x.completion_tokens)
    highest_speed = max(successful.values(), key=lambda x: x.tokens_per_second)
    most_files = max(model_results, key=lambda x: x["code_analysis"]["files_found"])
    
    winners = {
        "fastest": f"{fastest.provider} {fastest.model_name}",
        "most_output": f"{most_output.provider} {most_output.model_name}",
        "highest_throughput": f"{highest_speed.provider} {highest_speed.model_name}",
        "most_files": f"{most_files['provider']} {most_files['model']}"
    }
else:
    winners = {}

# Build final results
benchmark_results = {
    "notebook": "05_nextjs_comparison",
    "timestamp": datetime.now().isoformat(),
    "task": "nextjs_application_generation",
    "summary": {
        "models_compared": len(model_results),
        "providers": list(set(r["provider"] for r in model_results)),
        "winners": winners
    },
    "models": model_results,
    "minimax_performance": next((r for r in model_results if r["provider"] == "MiniMax"), None)
}

# Save to JSON
output_path = "benchmark_results/05_nextjs_comparison.json"
with open(output_path, 'w') as f:
    json.dump(benchmark_results, f, indent=2)

print(f"✅ Results saved to {output_path}")
print(f"\n📊 Summary:")
print(f"   Models compared: {benchmark_results['summary']['models_compared']}")
print(f"   Providers: {', '.join(benchmark_results['summary']['providers'])}")
if winners:
    print(f"\n🏆 Winners:")
    for category, winner in winners.items():
        print(f"   {category}: {winner}")


✅ Results saved to benchmark_results/05_nextjs_comparison.json

📊 Summary:
   Models compared: 2
   Providers: OpenAI, MiniMax

🏆 Winners:
   fastest: OpenAI gpt-4o
   most_output: MiniMax MiniMax-M2.1
   highest_throughput: MiniMax MiniMax-M2.1
   most_files: OpenAI gpt-4o
