# Lab 3.4.3: DeepSeek-R1 Exploration - SOLUTIONS

This notebook contains complete solutions to all exercises from Lab 3.4.3.

In [None]:
import ollama
import json
import re
import time
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass

# Model configuration
models = ollama.list()
model_names = [m['name'] for m in models.get('models', [])]

# Find R1 model or use fallback
MODEL = None
for name in model_names:
    if 'r1' in name.lower():
        MODEL = name
        break

if not MODEL:
    MODEL = model_names[0] if model_names else "llama3.1:8b"

print(f"Using model: {MODEL}")

## Solution: ThinkingResult Dataclass and Parser

In [None]:
@dataclass
class ThinkingResult:
    """Parsed result from R1-style response."""
    thinking: str
    answer: str
    thinking_tokens: int
    answer_tokens: int
    raw_response: str
    
    @property
    def thinking_ratio(self) -> float:
        """Ratio of thinking tokens to total tokens."""
        total = self.thinking_tokens + self.answer_tokens
        return self.thinking_tokens / total if total > 0 else 0


def parse_r1_response(response: str) -> ThinkingResult:
    """
    Parse DeepSeek-R1 style thinking tokens from a response.
    """
    think_pattern = r'<think>(.*?)</think>'
    
    thinking_matches = re.findall(think_pattern, response, re.DOTALL)
    thinking_content = "\n".join(thinking_matches)
    
    answer_content = re.sub(think_pattern, '', response, flags=re.DOTALL)
    answer_content = answer_content.strip()
    
    thinking_tokens = len(thinking_content) // 4
    answer_tokens = len(answer_content) // 4
    
    return ThinkingResult(
        thinking=thinking_content.strip(),
        answer=answer_content,
        thinking_tokens=thinking_tokens,
        answer_tokens=answer_tokens,
        raw_response=response,
    )

## Solution: Enhanced Thinking Visualizer

Complete solution for the "Build a Thinking Visualizer" challenge:

In [None]:
class ThinkingVisualizer:
    """
    Solution: Advanced visualization of R1's thinking process.
    
    Features:
    - Step detection and numbering
    - Pattern recognition (restating, calculating, verifying)
    - Color-coded output (for terminals that support it)
    - Summary statistics
    """
    
    # Patterns that indicate different types of thinking
    PATTERNS = {
        'restating': ['let me understand', 'the problem asks', 'so we have', 'given that'],
        'planning': ['first', 'then', 'next', 'finally', 'my approach'],
        'calculating': ['calculate', 'compute', '=', 'equals', 'multiply', 'divide', 'add', 'subtract'],
        'verifying': ['verify', 'check', 'confirm', 'let me make sure', 'double check'],
        'correcting': ['wait', 'no', 'actually', 'mistake', 'wrong', 'let me reconsider'],
    }
    
    def __init__(self):
        self.analyses = []
    
    def analyze_thinking(self, thinking: str) -> Dict:
        """Analyze the thinking content for patterns."""
        thinking_lower = thinking.lower()
        
        pattern_counts = {}
        for pattern_type, keywords in self.PATTERNS.items():
            count = sum(1 for kw in keywords if kw in thinking_lower)
            pattern_counts[pattern_type] = count
        
        # Count explicit steps
        step_patterns = [
            r'^\d+[.)\s]',
            r'^Step\s+\d+',
            r'^[-*]\s',
        ]
        
        step_count = 0
        for line in thinking.split('\n'):
            for pattern in step_patterns:
                if re.match(pattern, line.strip(), re.IGNORECASE):
                    step_count += 1
                    break
        
        return {
            'pattern_counts': pattern_counts,
            'explicit_steps': step_count,
            'line_count': len(thinking.split('\n')),
            'word_count': len(thinking.split()),
        }
    
    def visualize(self, result: ThinkingResult, max_lines: int = 30) -> str:
        """
        Create a comprehensive visualization of the thinking process.
        """
        output = []
        
        # Header
        output.append("\n" + "#" * 70)
        output.append("#  R1 THINKING PROCESS ANALYSIS")
        output.append("#" * 70)
        
        # Analyze thinking
        analysis = self.analyze_thinking(result.thinking)
        self.analyses.append(analysis)
        
        # Token statistics
        output.append("\n[TOKEN STATISTICS]")
        output.append("-" * 40)
        output.append(f"  Thinking tokens: ~{result.thinking_tokens}")
        output.append(f"  Answer tokens:   ~{result.answer_tokens}")
        output.append(f"  Thinking ratio:  {result.thinking_ratio:.1%}")
        output.append(f"  Overhead:        {result.thinking_tokens / max(result.answer_tokens, 1):.1f}x")
        
        # Pattern analysis
        output.append("\n[THINKING PATTERNS DETECTED]")
        output.append("-" * 40)
        for pattern_type, count in analysis['pattern_counts'].items():
            if count > 0:
                bar = "*" * min(count, 10)
                output.append(f"  {pattern_type:<15} {bar} ({count})")
        
        # Structure analysis
        output.append("\n[STRUCTURE]")
        output.append("-" * 40)
        output.append(f"  Explicit steps: {analysis['explicit_steps']}")
        output.append(f"  Total lines:    {analysis['line_count']}")
        output.append(f"  Word count:     {analysis['word_count']}")
        
        # Thinking content
        output.append("\n[THINKING CONTENT]")
        output.append("-" * 40)
        
        if result.thinking:
            lines = result.thinking.split('\n')
            for i, line in enumerate(lines[:max_lines]):
                if line.strip():
                    # Truncate long lines
                    display_line = line.strip()[:75]
                    if len(line.strip()) > 75:
                        display_line += "..."
                    output.append(f"  {display_line}")
            
            if len(lines) > max_lines:
                output.append(f"  ... ({len(lines) - max_lines} more lines)")
        else:
            output.append("  (No thinking tokens detected)")
        
        # Final answer
        output.append("\n[FINAL ANSWER]")
        output.append("-" * 40)
        answer_preview = result.answer[:500]
        if len(result.answer) > 500:
            answer_preview += "..."
        output.append(answer_preview)
        
        output.append("\n" + "#" * 70)
        
        return "\n".join(output)
    
    def get_aggregate_stats(self) -> Dict:
        """Get aggregate statistics from all analyses."""
        if not self.analyses:
            return {}
        
        avg_steps = sum(a['explicit_steps'] for a in self.analyses) / len(self.analyses)
        avg_lines = sum(a['line_count'] for a in self.analyses) / len(self.analyses)
        
        # Most common patterns
        pattern_totals = {}
        for a in self.analyses:
            for pattern, count in a['pattern_counts'].items():
                pattern_totals[pattern] = pattern_totals.get(pattern, 0) + count
        
        return {
            'total_analyses': len(self.analyses),
            'avg_explicit_steps': avg_steps,
            'avg_line_count': avg_lines,
            'pattern_totals': pattern_totals,
        }


# Test the visualizer
visualizer = ThinkingVisualizer()

# Sample R1-style response
sample_response = '''<think>
Let me understand the problem. We need to find when two trains meet.

Given:
- Train A leaves at 9:00 AM at 60 mph
- Train B leaves at 10:00 AM at 80 mph
- Distance between stations: 280 miles

Step 1: Calculate Train A's head start
By 10:00 AM, Train A has traveled for 1 hour at 60 mph = 60 miles
Remaining distance: 280 - 60 = 220 miles

Step 2: Calculate closing speed
After 10:00 AM, both trains are moving toward each other.
Combined speed = 60 + 80 = 140 mph

Step 3: Calculate time to meet
Time = Distance / Speed = 220 / 140 = 1.57 hours = 1 hour 34 minutes

Step 4: Calculate meeting time
10:00 AM + 1 hour 34 minutes = 11:34 AM

Let me verify: 
- Train A traveled: 2h 34m at 60 mph = 154 miles
- Train B traveled: 1h 34m at 80 mph = 126 miles
- Total: 154 + 126 = 280 miles. Correct!
</think>

The trains will meet at **11:34 AM**.

Here's how I solved it:
1. By 10 AM, Train A has already traveled 60 miles (1 hour × 60 mph)
2. Remaining distance: 280 - 60 = 220 miles
3. Combined speed: 60 + 80 = 140 mph
4. Time to meet: 220 ÷ 140 ≈ 1.57 hours ≈ 1 hour 34 minutes after 10 AM
'''

parsed = parse_r1_response(sample_response)
print(visualizer.visualize(parsed))

## Solution: Thinking Overhead Analyzer

In [None]:
def analyze_thinking_overhead(
    results: List[ThinkingResult],
    verbose: bool = True
) -> Dict:
    """
    Solution: Comprehensive analysis of thinking token overhead.
    """
    if not results:
        return {}
    
    thinking_tokens = [r.thinking_tokens for r in results]
    answer_tokens = [r.answer_tokens for r in results]
    
    total_thinking = sum(thinking_tokens)
    total_answer = sum(answer_tokens)
    total = total_thinking + total_answer
    
    stats = {
        'n_responses': len(results),
        'total_thinking_tokens': total_thinking,
        'total_answer_tokens': total_answer,
        'total_tokens': total,
        'thinking_percentage': total_thinking / total * 100 if total > 0 else 0,
        'overhead_ratio': total_thinking / total_answer if total_answer > 0 else 0,
        'avg_thinking_per_response': total_thinking / len(results),
        'avg_answer_per_response': total_answer / len(results),
        'min_thinking': min(thinking_tokens),
        'max_thinking': max(thinking_tokens),
        'min_overhead': min(t/a if a > 0 else 0 for t, a in zip(thinking_tokens, answer_tokens)),
        'max_overhead': max(t/a if a > 0 else 0 for t, a in zip(thinking_tokens, answer_tokens)),
    }
    
    if verbose:
        print("\n" + "=" * 60)
        print("THINKING OVERHEAD ANALYSIS")
        print("=" * 60)
        print(f"\nResponses analyzed: {stats['n_responses']}")
        print(f"\nToken Distribution:")
        print(f"  Total thinking tokens: {stats['total_thinking_tokens']:,}")
        print(f"  Total answer tokens:   {stats['total_answer_tokens']:,}")
        print(f"  Thinking percentage:   {stats['thinking_percentage']:.1f}%")
        print(f"\nOverhead:")
        print(f"  Average ratio:  {stats['overhead_ratio']:.1f}x")
        print(f"  Range:          {stats['min_overhead']:.1f}x - {stats['max_overhead']:.1f}x")
        print(f"\nPer Response:")
        print(f"  Avg thinking:   {stats['avg_thinking_per_response']:.0f} tokens")
        print(f"  Avg answer:     {stats['avg_answer_per_response']:.0f} tokens")
    
    return stats

## Solution: Query R1 with Full Analysis

In [None]:
def query_r1_with_analysis(
    question: str,
    model: str = MODEL,
    max_tokens: int = 2048,
    temperature: float = 0.0,
) -> Tuple[ThinkingResult, Dict]:
    """
    Query R1 and return parsed result with analysis.
    """
    print(f"Querying {model}...")
    start_time = time.time()
    
    response = ollama.chat(
        model=model,
        messages=[{"role": "user", "content": question}],
        options={"temperature": temperature, "num_predict": max_tokens}
    )
    
    elapsed = time.time() - start_time
    response_text = response['message']['content']
    
    result = parse_r1_response(response_text)
    
    # Create visualizer and analyze
    viz = ThinkingVisualizer()
    analysis = viz.analyze_thinking(result.thinking)
    
    analysis['latency'] = elapsed
    analysis['tokens_per_second'] = (result.thinking_tokens + result.answer_tokens) / elapsed if elapsed > 0 else 0
    
    return result, analysis


# Test on multiple problems
test_problems = [
    "What is 17 * 23?",
    "If a = 3 and b = 4, what is a^2 + b^2?",
    "A store sells apples for $2 each. If I have $15, how many apples can I buy and how much change will I get?",
]

results = []
print("Running test problems...\n")

for prob in test_problems:
    print(f"Q: {prob}")
    result, analysis = query_r1_with_analysis(prob)
    results.append(result)
    print(f"  Latency: {analysis['latency']:.1f}s")
    print(f"  Thinking: {result.thinking_tokens} tokens")
    print(f"  Answer: {result.answer[:80]}...\n")

# Aggregate analysis
if results:
    analyze_thinking_overhead(results)

## Key Takeaways

1. **R1 uses `<think>` tokens** for explicit reasoning before answering
2. **Thinking overhead** is typically 2-5x the answer length
3. **Parse thinking tokens** to extract clean answers
4. **Analyze patterns** to understand reasoning quality
5. **Allow sufficient max_tokens** for R1 to think (at least 2048)