# Lab 3.4.1: Chain-of-Thought Workshop - SOLUTIONS

This notebook contains complete solutions to all exercises from Lab 3.4.1.

In [None]:
import ollama
import json
import re
from typing import List, Dict, Optional

MODEL = "qwen3:8b"  # Adjust as needed

## Solution: Create Your Own Few-Shot Examples

Here's a complete set of high-quality few-shot examples for percentage calculations:

In [None]:
# Solution: Custom few-shot examples for percentage calculations
PERCENTAGE_EXAMPLES = [
    {
        "question": "What is 20% of 150?",
        "reasoning": """Let me solve this step by step:
1. To find a percentage of a number, I convert the percentage to a decimal
2. 20% = 20/100 = 0.20
3. Now multiply: 150 * 0.20 = 30

The answer is 30."""
    },
    {
        "question": "A shirt costs $80 and is on sale for 25% off. What is the sale price?",
        "reasoning": """Let me solve this step by step:
1. First, find the discount amount: 25% of $80
2. 25% = 0.25
3. Discount = $80 * 0.25 = $20
4. Sale price = Original price - Discount = $80 - $20 = $60

The answer is $60."""
    },
    {
        "question": "If you scored 45 out of 60 on a test, what percentage did you get?",
        "reasoning": """Let me solve this step by step:
1. To find percentage, divide the part by the whole and multiply by 100
2. Percentage = (45 / 60) * 100
3. First: 45 / 60 = 0.75
4. Then: 0.75 * 100 = 75%

The answer is 75%."""
    },
]


def few_shot_percentage(question: str) -> str:
    """Apply few-shot CoT for percentage problems."""
    prompt_parts = []
    
    for ex in PERCENTAGE_EXAMPLES:
        prompt_parts.append(f"Q: {ex['question']}")
        prompt_parts.append(f"A: {ex['reasoning']}")
        prompt_parts.append("")
    
    prompt_parts.append(f"Q: {question}")
    prompt_parts.append("A: Let me solve this step by step:")
    
    prompt = "\n".join(prompt_parts)
    
    response = ollama.chat(
        model=MODEL,
        messages=[{"role": "user", "content": prompt}],
        options={"temperature": 0.0, "num_predict": 512}
    )
    
    return response['message']['content']


# Test the solution
test_question = "A restaurant bill is $85. If you want to leave a 18% tip, how much is the tip?"
print(f"Question: {test_question}\n")
print("Response:")
print(few_shot_percentage(test_question))

## Solution: Complete CoT Prompt Library

In [None]:
class CoTPromptLibrary:
    """
    Complete solution for the CoT Prompt Template Library challenge.
    
    Includes templates for multiple domains with examples.
    """
    
    TEMPLATES = {
        "math": {
            "system": "You are a math tutor. Always show your work step by step.",
            "trigger": "Let me solve this step by step:",
            "examples": [
                {
                    "q": "What is 25% of 80?",
                    "a": """Let me solve this step by step:
1. 25% means 25 per 100, or 25/100 = 0.25
2. To find 25% of 80, multiply: 80 * 0.25
3. 80 * 0.25 = 20

The answer is 20."""
                }
            ]
        },
        "code_debug": {
            "system": "You are a debugging expert. Analyze code systematically.",
            "trigger": "Let me analyze this code step by step:",
            "examples": [
                {
                    "q": "Why does this code fail: print(len(None))?",
                    "a": """Let me analyze this code step by step:
1. The code calls len() on None
2. len() expects an object with a __len__ method (like string, list, dict)
3. None is a NoneType object and doesn't have __len__
4. This will raise TypeError: object of type 'NoneType' has no len()

Fix: Check for None before calling len(), e.g., len(x) if x else 0"""
                }
            ]
        },
        "logic": {
            "system": "You are a logic expert. Use formal reasoning.",
            "trigger": "Let me reason through this logically:",
            "examples": [
                {
                    "q": "All dogs are mammals. All mammals breathe air. Do dogs breathe air?",
                    "a": """Let me reason through this logically:
1. Premise 1: All dogs are mammals
2. Premise 2: All mammals breathe air
3. By syllogism: If dogs are mammals, and mammals breathe air...
4. Conclusion: Dogs breathe air (by transitive property)

The answer is yes."""
                }
            ]
        },
        "word_problem": {
            "system": "You solve word problems by identifying variables and relationships.",
            "trigger": "Let me break down this problem:",
            "examples": [
                {
                    "q": "Tom is twice as old as Mary. In 5 years, the sum of their ages will be 40. How old is Mary now?",
                    "a": """Let me break down this problem:
1. Let Mary's current age = M
2. Tom's current age = 2M (twice Mary's age)
3. In 5 years: Mary = M+5, Tom = 2M+5
4. Sum in 5 years: (M+5) + (2M+5) = 40
5. Simplify: 3M + 10 = 40
6. Solve: 3M = 30, M = 10

The answer is Mary is 10 years old."""
                }
            ]
        },
    }
    
    def __init__(self):
        self.templates = self.TEMPLATES.copy()
    
    def get_prompt(self, domain: str, question: str, use_examples: bool = True) -> str:
        """Get a CoT prompt for a given domain and question."""
        if domain not in self.templates:
            raise ValueError(f"Unknown domain: {domain}. Available: {list(self.templates.keys())}")
        
        template = self.templates[domain]
        
        parts = []
        
        if use_examples and template.get('examples'):
            for ex in template['examples']:
                parts.append(f"Q: {ex['q']}")
                parts.append(f"A: {ex['a']}")
                parts.append("")
        
        parts.append(f"Q: {question}")
        parts.append(f"A: {template['trigger']}")
        
        return "\n".join(parts)
    
    def add_domain(self, domain: str, system: str, trigger: str, examples: List[Dict] = None):
        """Add a new domain to the library."""
        self.templates[domain] = {
            "system": system,
            "trigger": trigger,
            "examples": examples or []
        }
    
    def query(self, domain: str, question: str, model: str = MODEL) -> str:
        """Query the model with domain-specific CoT prompting."""
        prompt = self.get_prompt(domain, question)
        
        response = ollama.chat(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            options={"temperature": 0.0, "num_predict": 1024}
        )
        
        return response['message']['content']


# Test the complete library
library = CoTPromptLibrary()

print("Testing CoT Library across domains:\n")

# Math
print("=" * 50)
print("MATH DOMAIN")
print("=" * 50)
print(library.query("math", "What is 15% of 240?"))

# Logic
print("\n" + "=" * 50)
print("LOGIC DOMAIN")
print("=" * 50)
print(library.query("logic", "Some birds can fly. Penguins are birds. Can penguins fly?"))

## Solution: Comprehensive Accuracy Evaluation

In [None]:
def comprehensive_evaluation(
    problems: List[Dict],
    n_problems: int = 10,
) -> Dict:
    """
    Complete solution for comprehensive evaluation comparing
    direct answering, zero-shot CoT, and few-shot CoT.
    """
    from typing import Callable
    import time
    
    def extract_answer(response: str) -> Optional[float]:
        """Extract numerical answer from response."""
        patterns = [
            r"[Tt]he (?:final )?answer is[:\s]+\$?([\d,]+(?:\.\d+)?)",
            r"=\s*\$?([\d,]+(?:\.\d+)?)\s*(?:$|\.|\n)",
        ]
        for pattern in patterns:
            matches = re.findall(pattern, response)
            if matches:
                try:
                    return float(matches[-1].replace(',', ''))
                except:
                    continue
        
        numbers = re.findall(r'-?[\d,]+(?:\.\d+)?', response)
        if numbers:
            try:
                return float(numbers[-1].replace(',', ''))
            except:
                pass
        return None
    
    methods = {
        "direct": lambda q: f"{q}\n\nGive only the numerical answer:",
        "zero_shot_cot": lambda q: f"{q}\n\nLet's think step by step:",
        "few_shot_cot": lambda q: f"""Q: If there are 3 cars and 2 more arrive, how many?
A: Let's think step by step. 3 + 2 = 5 cars. The answer is 5.

Q: {q}
A: Let's think step by step:"""
    }
    
    results = {method: {"correct": 0, "total": 0, "latency": 0} for method in methods}
    
    for i, prob in enumerate(problems[:n_problems]):
        question = prob['question']
        expected = prob.get('numerical_answer', prob.get('answer'))
        
        print(f"\nProblem {i+1}: {question[:50]}...")
        
        for method_name, prompt_fn in methods.items():
            prompt = prompt_fn(question)
            
            start = time.time()
            response = ollama.chat(
                model=MODEL,
                messages=[{"role": "user", "content": prompt}],
                options={"temperature": 0.0, "num_predict": 512}
            )
            elapsed = time.time() - start
            
            predicted = extract_answer(response['message']['content'])
            
            try:
                correct = abs(float(predicted or 0) - float(expected)) < 0.01 * abs(float(expected))
            except:
                correct = False
            
            results[method_name]["total"] += 1
            results[method_name]["correct"] += int(correct)
            results[method_name]["latency"] += elapsed
            
            status = "CORRECT" if correct else "WRONG"
            print(f"  {method_name}: {status} (pred={predicted}, exp={expected})")
    
    # Calculate summary
    print("\n" + "=" * 60)
    print("COMPREHENSIVE EVALUATION RESULTS")
    print("=" * 60)
    
    for method, data in results.items():
        accuracy = data["correct"] / data["total"] if data["total"] > 0 else 0
        avg_latency = data["latency"] / data["total"] if data["total"] > 0 else 0
        print(f"{method:20} Accuracy: {accuracy:.1%}  Avg Latency: {avg_latency:.2f}s")
    
    return results


# Run if problems are available
# comprehensive_evaluation(problems, n_problems=5)

## Key Takeaways

1. **Zero-shot CoT** ("Let's think step by step") is simple and effective
2. **Few-shot CoT** with good examples can further improve accuracy
3. **Domain-specific templates** help maintain consistency
4. **Always extract the final answer** programmatically for evaluation
5. **CoT helps most on multi-step reasoning problems**