# MathLang Pro - Causal Analysis Introduction

This notebook demonstrates advanced MathLang features for causal analysis and research.

## Topics Covered
- Counterfactual reasoning with CounterfactualNode
- Advanced CoreRuntime configuration
- Learning analytics and log analysis
- Custom engine integration

In [None]:
# Setup
import sys
sys.path.insert(0, '../..')

from core.parser import Parser
from core.evaluator import Evaluator, SymbolicEvaluationEngine
from core.symbolic_engine import SymbolicEngine
from core.learning_logger import LearningLogger
from core.computation_engine import ComputationEngine
from core.validation_engine import ValidationEngine
from core.hint_engine import HintEngine
from core.core_runtime import CoreRuntime
from core.exercise_spec import ExerciseSpec

## 1. Counterfactual Reasoning

The `Counterfactual` block allows you to explore "what if" scenarios in mathematical problem-solving.

In [None]:
# Example: Exploring alternative solution paths
source = """
Problem: x**2 + 5*x + 6

# Actual factorization
Step: (x + 2)*(x + 3)

# Counterfactual: What if we had x^2 + 5x + 4 instead?
Counterfactual:
  assume a = x**2 + 5*x + 4
  expect (x + 1)*(x + 4)

End: done
"""

parser = Parser()
program = parser.parse(source)

symbolic_engine = SymbolicEngine()
engine = SymbolicEvaluationEngine(symbolic_engine)
logger = LearningLogger()

evaluator = Evaluator(program, engine, logger)
success = evaluator.run()

# Analyze counterfactual reasoning
cf_records = [r for r in logger.records if r['phase'] == 'counterfactual']
print("Counterfactual Analysis:")
for record in cf_records:
    print(f"  {record['rendered']}")
    if 'meta' in record and 'result' in record['meta']:
        print(f"  Result: {record['meta']['result']}")

## 2. CoreRuntime Configuration

For research purposes, you can configure CoreRuntime with custom engines and exercise specifications.

In [None]:
# Create a research exercise with detailed tracking
research_spec = ExerciseSpec(
    id="research_001",
    target_expression="(x - 1)*(x + 1)",
    validation_mode="symbolic_equiv",
    intermediate_steps=[
        "x**2 - 1",
        "(x - 1)*(x + 1)"
    ],
    hint_rules={
        "x**2 + 1": "Sign error detected",
        "x**2 - 2": "Constant error"
    },
    metadata={
        "difficulty": "medium",
        "concept": "difference_of_squares",
        "study_id": "pilot_2024"
    }
)

# Initialize runtime
symbolic = SymbolicEngine()
computation = ComputationEngine(symbolic)
validation = ValidationEngine(computation)
hint = HintEngine(computation)
logger = LearningLogger()

runtime = CoreRuntime(computation, validation, hint, research_spec, logger)

print(f"RuntimeConfiguration:")
print(f"  Exercise ID: {research_spec.id}")
print(f"  Validation Mode: {research_spec.validation_mode}")
print(f"  Study: {research_spec.metadata['study_id']}")

## 3. Simulating Student Responses

For research, you can simulate various student response patterns.

In [None]:
# Simulate different error patterns
student_attempts = [
    {"answer": "x**2 + 1", "pattern": "sign_error"},
    {"answer": "x**2 - 2", "pattern": "constant_error"},
    {"answer": "x**2 - 1", "pattern": "expanded_form"},
    {"answer": "(x - 1)*(x + 1)", "pattern": "correct"}
]

results = []
for attempt in student_attempts:
    runtime.set("(x - 1)*(x + 1)")
    result = runtime.finalize(attempt["answer"])
    
    results.append({
        "answer": attempt["answer"],
        "pattern": attempt["pattern"],
        "valid": result["valid"],
        "hint": result["details"].get("hint", {}).get("message") if not result["valid"] else None
    })

# Analyze patterns
import pandas as pd
df = pd.DataFrame(results)
print("\nStudent Response Analysis:")
print(df)

## 4. Learning Analytics

Extract insights from learning logs for analysis.

In [None]:
# Run a full problem-solving session
source = """
Problem: 2*x + 3*x
Step: 5*x
End: done
"""

parser = Parser()
program = parser.parse(source)
engine = SymbolicEvaluationEngine(SymbolicEngine())
session_logger = LearningLogger()

evaluator = Evaluator(program, engine, session_logger)
evaluator.run()

# Extract analytics
log_df = pd.DataFrame(session_logger.records)

print("Learning Log Summary:")
print(f"  Total steps: {len(log_df)}")
print(f"  Phases: {log_df['phase'].value_counts().to_dict()}")
print(f"  Status breakdown: {log_df['status'].value_counts().to_dict()}")

# Show detailed logs
print("\nDetailed Log:")
print(log_df[['phase', 'expression', 'status', 'rule_id']])

## 5. Custom Analysis Functions

Create custom analysis functions for your research.

In [None]:
def analyze_error_patterns(logs):
    """Analyze common error patterns in logs."""
    errors = [log for log in logs if log['status'] == 'mistake']
    
    patterns = {}
    for error in errors:
        reason = error.get('meta', {}).get('reason', 'unknown')
        patterns[reason] = patterns.get(reason, 0) + 1
    
    return patterns

def calculate_success_rate(logs):
    """Calculate step success rate."""
    steps = [log for log in logs if log['phase'] in ['step', 'end']]
    if not steps:
        return 0.0
    
    successes = len([s for s in steps if s['status'] == 'ok'])
    return successes / len(steps)

# Example usage
patterns = analyze_error_patterns(session_logger.records)
success_rate = calculate_success_rate(session_logger.records)

print(f"\nError Patterns: {patterns}")
print(f"Success Rate: {success_rate:.2%}")

## Next Steps

- Explore `pro_advanced_analysis.ipynb` for log mining techniques
- Check documentation for custom engine development
- See examples in `pro/examples/` for real research applications