In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [30]:
import google.generativeai as genai
from typing import List, Optional
from dataclasses import dataclass

# This variable will track if the API is configured correctly.
API_CONFIGURED = False

try:
    # This is the standard way to access secrets in a Kaggle Notebook environment.
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("GEMINI_API_KEY")
    genai.configure(api_key=api_key)
    print("✅ Successfully configured Gemini API from Kaggle Secrets.")
    API_CONFIGURED = True
except Exception as e:
    # This block will run if the API key isn't found or is invalid.
    print(f"⚠️ Could not configure Gemini API. Please ensure you have added your key to Kaggle Secrets with the label 'GEMINI_API_KEY'. Error: {e}")
    print("➡️ The script will not be able to connect to Gemini.")


@dataclass
class MathProblem:
    """A simple data structure to hold a math problem and its details."""
    problem: str
    problem_type: str
    difficulty: str
    expected_answer: str

@dataclass
class EvaluationResult:
    """A data structure to hold the complete results for a single problem."""
    problem: MathProblem
    learner_solution: str
    judger_verdict: str


def create_hard_problems() -> List[MathProblem]:
    """Create a list of hard and very hard sample math problems for testing."""
    return [
        MathProblem(
            problem="Find the derivative of the Weierstrass function, defined as f(x) = Σ [from n=0 to ∞] aⁿ * cos(bⁿπx), for 0 < a < 1 and ab > 1 + (3/2)π.",
            problem_type="derivative",
            difficulty="very_hard",
            expected_answer="The function is continuous everywhere but differentiable nowhere. Therefore, the derivative does not exist for any value of x."
        ),
        MathProblem(
            problem="Consider f_n(x) = 2nx*e^(-nx²) on [0, 1]. Evaluate lim[n→∞] ∫[0 to 1] f_n(x) dx and ∫[0 to 1] lim[n→∞] f_n(x) dx. Are they equal?",
            problem_type="real_analysis",
            difficulty="very_hard",
            expected_answer="They are not equal. The integral of the limit is ∫0 dx = 0. The limit of the integral is lim[n→∞] (1 - e⁻ⁿ) = 1. They differ because convergence is not uniform, so the limit and integral cannot be interchanged."
        ),
        MathProblem(
            problem="Using the residue theorem from complex analysis, prove that the sum of the reciprocals of the squares of the positive integers equals π²/6. (i.e., prove Σ [from n=1 to ∞] 1/n² = π²/6).",
            problem_type="complex_analysis",
            difficulty="very_hard",
            expected_answer="The proof involves integrating f(z) = π cot(πz) / z² over a square contour C_N as N→∞. The function has a third-order pole at z=0 and simple poles at all non-zero integers z=n. The sum of the residues at all poles is 0 as the contour integral vanishes. The residue at z=n (n≠0) is 1/n². The residue at z=0 is -π²/3. Summing all residues gives -π²/3 + 2 * Σ[n=1 to ∞] 1/n² = 0, which implies Σ[n=1 to ∞] 1/n² = π²/6."
        ),
        MathProblem(
            problem="Prove the Brouwer fixed-point theorem: every continuous function f mapping a closed n-dimensional ball Dⁿ to itself has at least one fixed point.",
            problem_type="algebraic_topology",
            difficulty="very_hard",
            expected_answer="The proof is by contradiction using homology theory. Assume a continuous map f: Dⁿ → Dⁿ has no fixed point. Then one can construct a retraction r: Dⁿ → Sⁿ⁻¹ (its boundary). Such a retraction is a continuous map that is the identity on the boundary. This leads to a contradiction because the (n-1)-th homology group of Dⁿ is trivial (H_{n-1}(Dⁿ) = 0), while that of Sⁿ⁻¹ is non-trivial (H_{n-1}(Sⁿ⁻¹) ≅ ℤ). A retraction would imply the identity map on Sⁿ⁻¹ is null-homotopic, which is false. Thus, a fixed point must exist."
        ),
        MathProblem(
            problem="Apply the Baire Category Theorem to the space of continuous functions C([0, 1]) to prove the existence of a continuous function that is nowhere differentiable.",
            problem_type="functional_analysis",
            difficulty="very_hard",
            expected_answer="Let C([0, 1]) be the complete metric space of continuous functions with the sup norm. Let E_n = {f ∈ C([0,1]) | ∃x₀ s.t. |f(x)-f(x₀)| ≤ n|x-x₀| for all x}. Each E_n is a closed, nowhere dense set. The set of functions differentiable at at least one point is contained in the union ∪[n=1 to ∞] E_n. By the Baire Category Theorem, this union is a 'meager' set. Since C([0, 1]) is not meager, its complement—the set of continuous, nowhere differentiable functions—must be non-empty."
        )
    ]


def solve_math_problem_with_gemini(problem_text: str):
    """
    Solves a math problem using the Gemini API with a direct prompt.
    This function acts as the 'learner' model.
    """
    if not API_CONFIGURED: return "API is not configured. Cannot proceed."
    model = genai.GenerativeModel('gemini-1.5-flash-8b')
    prompt = f"""You are a brilliant mathematician. Your task is to solve the following math problem.
Please provide a clear, step-by-step solution and state the final answer explicitly.

Problem:
"{problem_text}"
"""
    try:
        response = model.generate_content(prompt, request_options={'timeout': 120})
        return response.text
    except Exception as e:
        return f"An error occurred while contacting the Gemini API: {str(e)}"


def judge_solution_with_gemini(problem: str, expected_answer: str, student_solution: str):
    """
    Uses a powerful Gemini model to act as a judge, evaluating a solution.
    """
    if not API_CONFIGURED: return "API is not configured. Cannot proceed with judgment."
    judger_model = genai.GenerativeModel('gemini-1.5-pro')
    prompt = f"""You are an expert mathematics professor acting as an impartial judge.
Your task is to evaluate a solution provided by a student AI.

You will be provided with:
1. The Original Problem
2. The Ground Truth / Expected Answer
3. The Student AI's Generated Solution

Your evaluation MUST begin with a single-word verdict: **Correct**, **Partially Correct**, or **Incorrect**.
This verdict should be the very first word of your response.
After the verdict, provide a brief, clear justification for your decision.

---
**1. Original Problem:**
{problem}

---
**2. Ground Truth / Expected Answer:**
{expected_answer}

---
**3. Student AI's Generated Solution:**
{student_solution}
---

**Your Judgment:**
"""
    try:
        response = judger_model.generate_content(prompt, request_options={'timeout': 120})
        return response.text
    except Exception as e:
        return f"An error occurred while contacting the Gemini API for judgment: {str(e)}"


def run_evaluation() -> List[EvaluationResult]:
    """
    Runs the full evaluation pipeline and returns a list of result objects.
    """
    if not API_CONFIGURED:
        print("\nExiting because the Gemini API is not configured.")
        return []

    problems_to_solve = create_hard_problems()
    print(f"\nFound {len(problems_to_solve)} math problems to solve.")
    
    results = []

    for i, problem_obj in enumerate(problems_to_solve):
        print("\n" + "="*80)
        print(f"               Processing Problem #{i+1} ({problem_obj.difficulty})")
        print("="*80)
        print(f"📌 Problem Statement: {problem_obj.problem}")
        
        # --- Step 1: Learner solves the problem ---
        print("1. Sending problem to Learner (Gemini Flash)...")
        learner_solution = solve_math_problem_with_gemini(problem_obj.problem)
        print("...Received learner's response.")

        # --- Step 2: Judger evaluates the solution ---
        print("2. Asking the Judger (Gemini Pro) to evaluate...")
        judger_verdict = judge_solution_with_gemini(
            problem=problem_obj.problem,
            expected_answer=problem_obj.expected_answer,
            student_solution=learner_solution
        )
        print("...Received judger's verdict.")
        
        # --- Step 3: Store the comprehensive result ---
        result = EvaluationResult(
            problem=problem_obj,
            learner_solution=learner_solution,
            judger_verdict=judger_verdict
        )
        results.append(result)
        print(f"Problem #{i+1} processing complete.")

    return results

# --- Main Execution Block for Cell 1 ---
# We run the evaluation and store the results in a variable `evaluation_results`.
# This variable will be accessible by the next cell in the notebook.
evaluation_results = []
if __name__ == "__main__":
    evaluation_results = run_evaluation()
    print("\n\n✅ Evaluation run complete. Results are stored in the 'evaluation_results' variable.")

✅ Successfully configured Gemini API from Kaggle Secrets.

Found 5 math problems to solve.

               Processing Problem #1 (very_hard)
📌 Problem Statement: Find the derivative of the Weierstrass function, defined as f(x) = Σ [from n=0 to ∞] aⁿ * cos(bⁿπx), for 0 < a < 1 and ab > 1 + (3/2)π.
1. Sending problem to Learner (Gemini Flash)...
...Received learner's response.
2. Asking the Judger (Gemini Pro) to evaluate...
...Received judger's verdict.
Problem #1 processing complete.

               Processing Problem #2 (very_hard)
📌 Problem Statement: Consider f_n(x) = 2nx*e^(-nx²) on [0, 1]. Evaluate lim[n→∞] ∫[0 to 1] f_n(x) dx and ∫[0 to 1] lim[n→∞] f_n(x) dx. Are they equal?
1. Sending problem to Learner (Gemini Flash)...
...Received learner's response.
2. Asking the Judger (Gemini Pro) to evaluate...
...Received judger's verdict.
Problem #2 processing complete.

               Processing Problem #3 (very_hard)
📌 Problem Statement: Using the residue theorem from complex analysis,

In [31]:
from typing import List
from dataclasses import dataclass
import collections

# NOTE: The dataclasses are redefined here for clarity and so this cell can be
# run independently, assuming `evaluation_results` exists. In a real project,
# you might import them from a shared .py file.
@dataclass
class MathProblem:
    problem: str
    problem_type: str
    difficulty: str
    expected_answer: str

@dataclass
class EvaluationResult:
    problem: MathProblem
    learner_solution: str
    judger_verdict: str


def calculate_and_print_metrics(results: List[EvaluationResult]):
    """
    Analyzes a list of EvaluationResult objects, calculates a performance
    score, and prints a detailed report.
    """
    if not results:
        print("No evaluation results to analyze. Please run the first cell.")
        return

    scores = []
    verdict_counts = collections.defaultdict(int)
    
    # Define the scoring map
    SCORE_MAP = {
        "correct": 1.0,
        "partially correct": 0.5,
        "incorrect": 0.0,
    }

    print("\n" + "="*80)
    print("                      PERFORMANCE METRICS REPORT")
    print("="*80)
    print(f"Analyzing {len(results)} results...\n")

    for i, result in enumerate(results):
        # --- Parsing Logic ---
        # We instructed the judger to start its response with the verdict.
        # We parse the first few words to be robust.
        verdict_text = result.judger_verdict.lower().strip()
        
        parsed_verdict = "unscorable" # Default if no keyword is found
        
        if verdict_text.startswith("correct"):
            parsed_verdict = "correct"
        elif verdict_text.startswith("partially correct"):
            parsed_verdict = "partially correct"
        elif verdict_text.startswith("incorrect"):
            parsed_verdict = "incorrect"
        
        # --- Scoring Logic ---
        score = SCORE_MAP.get(parsed_verdict, 0.0)
        scores.append(score)
        verdict_counts[parsed_verdict] += 1
        
        # --- Print Individual Result Summary ---
        print(f"--- Problem #{i+1}: {result.problem.problem[:60]}... ---")
        print(f"Judger's Verdict: {parsed_verdict.upper()}")
        print(f"Assigned Score: {score}")
        print(f"Judger's Justification: {result.judger_verdict}\n")


    # --- Aggregate Metrics ---
    total_problems = len(results)
    total_score = sum(scores)
    average_score = (total_score / total_problems) if total_problems > 0 else 0

    print("\n" + "="*80)
    print("                         OVERALL SUMMARY")
    print("="*80)
    print(f"Total Problems Evaluated: {total_problems}")
    print("-" * 30)
    print("Verdict Distribution:")
    for verdict, count in verdict_counts.items():
        percentage = (count / total_problems) * 100
        print(f"  - {verdict.title():<20}: {count} ({percentage:.1f}%)")
    print("-" * 30)
    print(f"Total Score Achieved: {total_score:.2f} / {total_problems:.2f}")
    print(f"AVERAGE PERFORMANCE SCORE: {average_score:.2%}")
    print("="*80)


# --- Execution Block for Cell 2 ---
# This code assumes the `evaluation_results` variable was created in the previous cell.
if 'evaluation_results' in locals() and evaluation_results:
    calculate_and_print_metrics(evaluation_results)
else:
    print("⚠️ 'evaluation_results' not found. Please run the main evaluation cell above first.")


                      PERFORMANCE METRICS REPORT
Analyzing 5 results...

--- Problem #1: Find the derivative of the Weierstrass function, defined as ... ---
Judger's Verdict: INCORRECT
Assigned Score: 0.0
Judger's Justification: Incorrect.

The student AI has missed the fundamental point of the Weierstrass function.  While the AI correctly performs the term-by-term differentiation, it incorrectly assumes that the conditions given justify this operation. The condition ab > 1 + (3/2)π is a sufficient condition for the Weierstrass function to be nowhere differentiable, *not* a condition that allows term-by-term differentiation to yield a valid derivative. The Weierstrass function is a classic example of a function that is continuous everywhere but differentiable nowhere. Therefore, the derivative does *not* exist, despite the AI's formal manipulation.  The AI demonstrates a significant misunderstanding of the concept by attempting to find a derivative where none exists.


--- Problem #2:

In [27]:
import numpy as np
import random
import json
import time
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from collections import defaultdict
import google.generativeai as genai

try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("GEMINI_API_KEY")
    genai.configure(api_key=api_key)
    # The print statement is optional, but helpful for debugging
    # print("✅ Successfully configured Gemini API from Kaggle Secrets.")
except Exception as e:
    # This will run if the secret is not found, e.g., you forgot to add it.
    print(f"⚠️ Could not configure Gemini API. Please ensure you have added your key to Kaggle Secrets with the label 'GEMINI_API_KEY'. Error: {e}")

@dataclass
class MathProblem:
    problem: str
    problem_type: str
    difficulty: str
    expected_answer: str

@dataclass
class StepResult:
    step_number: int
    action: str
    step_content: str
    is_correct: bool
    reward: float
    judge_feedback: str = ""
    attempts: int = 1

class QLearningAgent:
    """
    Implements the Q-learning algorithm exactly as specified in the image.
    Q(S, A) <- Q(S, A) + α * [R + γ * max_a Q(S', a) - Q(S, A)]
    """
    def __init__(self, learning_rate=0.1, discount_factor=0.9, epsilon=0.2):
        self.learning_rate: float = learning_rate
        self.discount_factor: float = discount_factor
        self.epsilon: float = epsilon
        self.q_table: Dict[str, Dict[str, float]] = defaultdict(lambda: defaultdict(float))
        # A more structured action space for 5 steps
        self.action_space: Dict[int, List[str]] = {
            1: ["identify_and_setup", "initial_decomposition"],
            2: ["apply_core_rule", "select_technique"],
            3: ["execute_calculation", "algebraic_manipulation"],
            4: ["simplify_result", "check_for_edge_cases"],
            5: ["final_answer_and_conclusion", "verify_solution"]
        }

    def get_state(self, problem: MathProblem, step: int) -> str:
        """Create a simplified but effective state representation."""
        # State depends on problem type, difficulty, and current step number.
        return f"{problem.problem_type}_{problem.difficulty}_step{step}"

    def choose_action(self, state: str, step: int) -> str:
        """Choose action using epsilon-greedy policy from the available actions for the current step."""
        available_actions = self.action_space.get(step)

        # Exploration: choose a random action
        if random.random() < self.epsilon:
            return random.choice(available_actions)

        # Exploitation: choose the best-known action
        q_values_for_state = self.q_table[state]

        # Filter Q-values to only include actions available at this step
        available_q_values = {action: q_values_for_state[action] for action in available_actions}

        if not available_q_values or all(v == 0 for v in available_q_values.values()):
            # If no q-values learned yet for this state, choose randomly
            return random.choice(available_actions)

        return max(available_q_values, key=available_q_values.get)

    def update_q_value(self, state: str, action: str, reward: float, next_state: str, next_step: int):
        """
        Update Q-value using the Bellman equation from the image.
        Q(S, A) <- Q(S, A) + α * [R + γ * max_a Q(S', a) - Q(S, A)]
        """
        # 1. Get the old Q-value: Q(S, A)
        current_q = self.q_table[state][action]

        # 2. Find the maximum Q-value for the next state: max_a Q(S', a)
        next_q_values = self.q_table[next_state]
        available_next_actions = self.action_space.get(next_step, [])

        max_next_q = 0
        if next_q_values and available_next_actions:
            # Consider only actions available in the next step
            relevant_next_q = [next_q_values[act] for act in available_next_actions]
            if relevant_next_q:
                max_next_q = max(relevant_next_q)

        # 3. Calculate the TD target: R + γ * max_a Q(S', a)
        td_target = reward + self.discount_factor * max_next_q

        # 4. Calculate the TD error: td_target - Q(S, A)
        td_error = td_target - current_q

        # 5. Update the Q-value: Q(S, A) + α * TD_error
        new_q = current_q + self.learning_rate * td_error
        self.q_table[state][action] = new_q

        # print(f"  🧠 Q-Update: s={state}, a={action}, r={reward:.1f} | OldQ:{current_q:.2f} -> NewQ:{new_q:.2f}")

class GeminiMathSolver:
    def __init__(self, learner_model_name="gemini-1.5-flash-8b", judge_model_name="gemini-1.5-pro"):
        self.agent = QLearningAgent()
        self.max_steps = 5
        self.max_retries_per_step = 4 # Allow the learner to try again if it makes a mistake
        self.api_configured = False

        try:
            self.learner_model = genai.GenerativeModel(learner_model_name)
            self.judge_model = genai.GenerativeModel(judge_model_name)
            self.api_configured = True
            print("✅ Gemini API configured successfully.")
        except Exception as e:
            print(f"⚠️ Gemini API not configured. Running in MOCK mode. Error: {str(e)}")

    def get_learner_response(self, problem: MathProblem, step: int, action: str, previous_steps: List[str], feedback: Optional[str] = None) -> str:
        """Generates a response from the learner model, incorporating feedback if provided."""
        if not self.api_configured:
            return f"[MOCK] Step {step}: Executing action '{action}' for problem type {problem.problem_type}."

        feedback_prompt = ""
        if feedback:
            feedback_prompt = f"""
            Your previous attempt at this step was incorrect. Here is the feedback from the expert judge:
            ---
            {feedback}
            ---
            Please correct your mistake and provide a new, accurate response for this step.
            """

        if previous_steps:
            previous_steps_str = "".join(f"Step {i+1}: {s}\n" for i, s in enumerate(previous_steps))
        else:
            previous_steps_str = "None"

        step_instructions = {
            1: "Start by identifying the function/problem type and outlining the initial setup or first principle to apply.",
            2: "Apply the main mathematical rule or technique (e.g., chain rule, integration by parts, matrix inversion).",
            3: "Perform the necessary calculations and algebraic manipulations based on the previous step.",
            4: "Simplify the resulting expression and check for any intermediate errors or edge cases.",
            5: "State the complete, final answer clearly. This is your last step."
        }

        prompt = f"""
    You are an expert mathematician solving a calculus problem in a structured, 5-step process.
    You are on step {step} of 5.

    Problem: {problem.problem}

    Previous Steps:
    {previous_steps_str}

    Current Step Instructions ({step}/5): {step_instructions[step]}
    Your high-level action for this step is: '{action}'.

    {feedback_prompt}

    Provide only the mathematical work for this current step.
    {'This is the final step, you must provide the final answer.' if step == 5 else f'You have {5-step} steps remaining after this.'}
    """
        try:
            response = self.learner_model.generate_content(prompt)
            return response.text
        except Exception as e:
            print(f"❌ Error during learner generation: {str(e)}")
            return f"Error generating response: {e}"


    def get_judge_evaluation(self, problem: MathProblem, step_content: str, step_number: int) -> Tuple[bool, float, str]:
        """Evaluates a step using the powerful judge model."""
        if not self.api_configured:
            # Mock evaluation for testing without an API key
            is_correct = random.random() > 0.4 # 60% chance of being correct
            reward = (10 if is_correct else -5)
            feedback = "MOCK: This is a mock evaluation."
            return is_correct, reward, feedback

        is_final_step = (step_number == self.max_steps)

        prompt = f"""
    You are an expert mathematician and judge. Your task is to evaluate one step of a solution to a math problem.
    The problem is: "{problem.problem}"
    The expected final answer is: "{problem.expected_answer}"

    The current step being evaluated is Step {step_number}.
    The student's submission for this step is:
    ---
    {step_content}
    ---

    Based on the problem, the student's submission for this step, and the expected final answer, is this step correct?
    - A step is CORRECT if it is mathematically sound and makes logical progress towards the final answer.
    - A step is INCORRECT if it contains a mathematical error, a logical flaw, or is a step that doesn't lead to the correct solution.

    Start your response with the word "CORRECT" or "INCORRECT".
    Then, provide a brief, one-sentence explanation for your decision.

    Example 1:
    CORRECT: The application of the product rule is accurate.

    Example 2:
    INCORRECT: The derivative of sin(x) is cos(x), not -cos(x) as written.

    Example 3:
    INCORRECT: The calculation is correct, but this approach of integration by parts will not lead to the final answer.

    Now, evaluate the student's submission.
    """

        try:
            response = self.judge_model.generate_content(prompt)
            feedback_text = response.text.strip()

            is_correct = feedback_text.upper().startswith('CORRECT')

            # Define rewards
            if is_correct:
                reward = 15.0 if is_final_step else 5.0 + step_number
            else:
                reward = -20.0 if is_final_step else -10.0

            return is_correct, reward, feedback_text

        except Exception as e:
            print(f"❌ Error during judge evaluation: {str(e)}")
            return False, -15.0, f"Evaluation failed due to an API error: {e}"

    def solve_problem(self, problem: MathProblem) -> List[StepResult]:
        """Solves a problem using the Q-learning guided, 5-step process with retries."""
        print(f"\n" + "="*70)
        print(f"🧮 Solving Problem: {problem.problem}")
        print(f"🎯 Expected Answer: {problem.expected_answer}")
        print("-" * 70)

        results = []
        previous_steps_content = []

        for step in range(1, self.max_steps + 1):
            state = self.agent.get_state(problem, step)
            feedback_for_retry = None

            for attempt in range(1, self.max_retries_per_step + 1):
                # 1. Choose Action A from State S
                action = self.agent.choose_action(state, step)

                # 2. Take Action A, get Step Content
                step_content = self.get_learner_response(problem, step, action, previous_steps_content, feedback_for_retry)

                # 3. Observe Reward R and Next State S'
                is_correct, reward, judge_feedback = self.get_judge_evaluation(problem, step_content, step)

                status_icon = "✅" if is_correct else "❌"
                print(f"Step {step}/{self.max_steps} (Attempt {attempt}) | Action: {action} | Result: {status_icon}")
                print(f"  💬 Learner: {step_content.strip()}")
                print(f"  👨‍⚖️ Judge: {judge_feedback.strip()}")

                if is_correct:
                    # The step was correct, finalize and move to the next step
                    result = StepResult(step, action, step_content, is_correct, reward, judge_feedback, attempt)
                    results.append(result)
                    previous_steps_content.append(step_content)

                    # 4. Update Q-Table
                    next_step = step + 1
                    next_state = self.agent.get_state(problem, next_step) if next_step <= self.max_steps else "terminal"
                    self.agent.update_q_value(state, action, reward, next_state, next_step)

                    time.sleep(2) # API rate limiting
                    break # Exit the retry loop
                else:
                    # The step was incorrect, prepare for another attempt
                    feedback_for_retry = judge_feedback
                    # Apply a penalty for the failed attempt and update Q-value to discourage this action
                    self.agent.update_q_value(state, action, reward, state, step) # Update with penalty, next_state is current state

                    if attempt == self.max_retries_per_step:
                        # Max retries reached, accept the wrong answer and move on
                        print(f"  ⚠️ Max retries reached for step {step}. Accepting incorrect step.")
                        result = StepResult(step, action, step_content, is_correct, reward, judge_feedback, attempt)
                        results.append(result)
                        previous_steps_content.append(step_content)
                        break # Exit the retry loop

                    time.sleep(2) # API rate limiting

        # Final Summary
        total_reward = sum(r.reward for r in results)
        correct_steps = sum(1 for r in results if r.is_correct)
        print("\n" + "-"*30 + " PROBLEM SUMMARY " + "-"*25)
        print(f"📊 Final Result: {correct_steps}/{self.max_steps} steps correct.")
        print(f"🏆 Total Reward for Problem: {total_reward:.1f}")
        print("="*70)
        return results

# --- Main Execution ---
def create_hard_problems() -> List[MathProblem]:
    """Create a list of hard and very hard sample math problems for testing."""
    return [
        MathProblem(
            problem="Find the derivative of the Weierstrass function, defined as f(x) = Σ [from n=0 to ∞] aⁿ * cos(bⁿπx), for 0 < a < 1 and ab > 1 + (3/2)π.",
            problem_type="derivative",
            difficulty="very_hard",
            expected_answer="The function is continuous everywhere but differentiable nowhere. Therefore, the derivative does not exist for any value of x."
        ),
        MathProblem(
            problem="Consider f_n(x) = 2nx*e^(-nx²) on [0, 1]. Evaluate lim[n→∞] ∫[0 to 1] f_n(x) dx and ∫[0 to 1] lim[n→∞] f_n(x) dx. Are they equal?",
            problem_type="real_analysis",
            difficulty="very_hard",
            expected_answer="They are not equal. The integral of the limit is ∫0 dx = 0. The limit of the integral is lim[n→∞] (1 - e⁻ⁿ) = 1. They differ because convergence is not uniform, so the limit and integral cannot be interchanged."
        ),
        MathProblem(
            problem="Using the residue theorem from complex analysis, prove that the sum of the reciprocals of the squares of the positive integers equals π²/6. (i.e., prove Σ [from n=1 to ∞] 1/n² = π²/6).",
            problem_type="complex_analysis",
            difficulty="very_hard",
            expected_answer="The proof involves integrating f(z) = π cot(πz) / z² over a square contour C_N as N→∞. The function has a third-order pole at z=0 and simple poles at all non-zero integers z=n. The sum of the residues at all poles is 0 as the contour integral vanishes. The residue at z=n (n≠0) is 1/n². The residue at z=0 is -π²/3. Summing all residues gives -π²/3 + 2 * Σ[n=1 to ∞] 1/n² = 0, which implies Σ[n=1 to ∞] 1/n² = π²/6."
        ),
        MathProblem(
            problem="Prove the Brouwer fixed-point theorem: every continuous function f mapping a closed n-dimensional ball Dⁿ to itself has at least one fixed point.",
            problem_type="algebraic_topology",
            difficulty="very_hard",
            expected_answer="The proof is by contradiction using homology theory. Assume a continuous map f: Dⁿ → Dⁿ has no fixed point. Then one can construct a retraction r: Dⁿ → Sⁿ⁻¹ (its boundary). Such a retraction is a continuous map that is the identity on the boundary. This leads to a contradiction because the (n-1)-th homology group of Dⁿ is trivial (H_{n-1}(Dⁿ) = 0), while that of Sⁿ⁻¹ is non-trivial (H_{n-1}(Sⁿ⁻¹) ≅ ℤ). A retraction would imply the identity map on Sⁿ⁻¹ is null-homotopic, which is false. Thus, a fixed point must exist."
        ),
        MathProblem(
            problem="Apply the Baire Category Theorem to the space of continuous functions C([0, 1]) to prove the existence of a continuous function that is nowhere differentiable.",
            problem_type="functional_analysis",
            difficulty="very_hard",
            expected_answer="Let C([0, 1]) be the complete metric space of continuous functions with the sup norm. Let E_n = {f ∈ C([0,1]) | ∃x₀ s.t. |f(x)-f(x₀)| ≤ n|x-x₀| for all x}. Each E_n is a closed, nowhere dense set. The set of functions differentiable at at least one point is contained in the union ∪[n=1 to ∞] E_n. By the Baire Category Theorem, this union is a 'meager' set. Since C([0, 1]) is not meager, its complement—the set of continuous, nowhere differentiable functions—must be non-empty."
        )
    ]

def main():
    solver = GeminiMathSolver()
    if not solver.api_configured:
        print("\n--- RUNNING IN MOCK MODE. NO REAL LEARNING WILL OCCUR. ---")
        print("--- Please configure your Gemini API key to run properly. ---")

    problems = create_hard_problems()
    all_problem_outcomes = []

    for prob in problems:
        # Each problem returns a list of its StepResult objects
        problem_results = solver.solve_problem(prob)
        all_problem_outcomes.append(problem_results)

    # --- NEW OVERALL SUMMARY SECTION ---
    print("\n\n" + "="*30 + " OVERALL SCRIPT SUMMARY " + "="*24)
    total_problems = len(all_problem_outcomes)
    perfectly_solved_count = 0

    if total_problems > 0:
        for problem_result_list in all_problem_outcomes:
            # A problem is perfectly solved if it has the max number of steps and all are correct
            is_perfect = (len(problem_result_list) == solver.max_steps and
                          all(step.is_correct for step in problem_result_list))
            if is_perfect:
                perfectly_solved_count += 1

        incorrectly_solved_count = total_problems - perfectly_solved_count

        print(f"Total Problems Attempted: {total_problems}")
        print(f"✅ Problems Solved Perfectly (all {solver.max_steps} steps correct): {perfectly_solved_count}")
        print(f"❌ Problems with 1 or More Incorrect Steps: {incorrectly_solved_count}")
    else:
        print("No problems were attempted.")
    print("="*78)
    # --- END NEW SUMMARY SECTION ---


    print("\n\n" + "="*30 + " FINAL Q-TABLE STATE " + "="*25)
    # Print a few learned Q-values to show it's working
    if solver.agent.q_table:
        for i, (state, actions) in enumerate(solver.agent.q_table.items()):
            if i >= 10: break # Show a bit more of the Q-table if available
            print(f"State: {state}")
            for action, value in actions.items():
                print(f"  - Action: {action}, Q-Value: {value:.3f}")
    else:
        print("Q-Table is empty (likely ran in mock mode or encountered API errors).")

if __name__ == "__main__":
    main()
    print("hihi")

✅ Gemini API configured successfully.

🧮 Solving Problem: Find the derivative of the Weierstrass function, defined as f(x) = Σ [from n=0 to ∞] aⁿ * cos(bⁿπx), for 0 < a < 1 and ab > 1 + (3/2)π.
🎯 Expected Answer: The function is continuous everywhere but differentiable nowhere. Therefore, the derivative does not exist for any value of x.
----------------------------------------------------------------------
Step 1/5 (Attempt 1) | Action: initial_decomposition | Result: ❌
  💬 Learner: Identify the function as an infinite sum of trigonometric functions.  The problem requires finding the derivative of a function defined by an infinite series.  The initial approach will be to apply the term-by-term differentiation rule to the series, assuming absolute convergence of the resulting series.
  👨‍⚖️ Judge: INCORRECT: While identifying the function as an infinite sum is correct, assuming term-by-term differentiation based on the *hope* of absolute convergence is a flawed approach for the Weierst

In [36]:
import numpy as np
import random
import json
import time
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from collections import defaultdict, deque

# --- New Imports for DQN ---
import torch
import torch.nn as nn
import torch.optim as optim
from sentence_transformers import SentenceTransformer

import google.generativeai as genai

# --- Gemini API Configuration (No Changes) ---
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("GEMINI_API_KEY")
    genai.configure(api_key=api_key)
except Exception as e:
    print(f"⚠️ Could not configure Gemini API. Please ensure you have added your key to Kaggle Secrets with the label 'GEMINI_API_KEY'. Error: {e}")

# --- Data Classes (No Changes) ---
@dataclass
class MathProblem:
    problem: str
    problem_type: str
    difficulty: str
    expected_answer: str

@dataclass
class StepResult:
    step_number: int
    action: str
    step_content: str
    is_correct: bool
    reward: float
    judge_feedback: str = ""
    attempts: int = 1

# --- DQN Model and Agent (No Changes) ---
class Q_Network(nn.Module):
    def __init__(self, embedding_dim: int):
        super(Q_Network, self).__init__()
        input_size = embedding_dim * 2
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 1)
    def forward(self, state_embedding: torch.Tensor, action_embedding: torch.Tensor):
        x = torch.cat([state_embedding, action_embedding], dim=1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, learning_rate=0.001, discount_factor=0.9, epsilon=1.0, epsilon_decay=0.995, min_epsilon=0.1, batch_size=32):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"🧠 DQN Agent using device: {self.device}")
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2', device=self.device)
        self.embedding_dim = self.embedding_model.get_sentence_embedding_dimension()
        self.q_network = Q_Network(self.embedding_dim).to(self.device)
        self.target_network = Q_Network(self.embedding_dim).to(self.device)
        self.target_network.load_state_dict(self.q_network.state_dict())
        self.target_network.eval()
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=learning_rate)
        self.loss_fn = nn.MSELoss()
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.min_epsilon = min_epsilon
        self.batch_size = batch_size
        self.replay_buffer = deque(maxlen=10000)

    def get_text_embedding(self, text: str) -> torch.Tensor:
        embedding = self.embedding_model.encode(text, convert_to_tensor=True, show_progress_bar=False)
        return embedding.to(self.device).unsqueeze(0)

    def get_state_representation(self, problem: MathProblem, step: int, previous_steps_content: List[str]) -> str:
        last_step_text = "None"
        if previous_steps_content:
            last_step_text = previous_steps_content[-1].strip().split('\n')[0]
        return (f"Problem Type: {problem.problem_type}. Difficulty: {problem.difficulty}. "
                f"Currently on step {step} of 5. Last step result: {last_step_text}")

    def choose_action(self, state_embedding: torch.Tensor, candidate_actions: List[str]) -> str:
        if random.random() < self.epsilon:
            return random.choice(candidate_actions)
        else:
            with torch.no_grad():
                best_action = None
                max_q_value = -float('inf')
                for action_text in candidate_actions:
                    action_embedding = self.get_text_embedding(action_text)
                    q_value = self.q_network(state_embedding, action_embedding).item()
                    if q_value > max_q_value:
                        max_q_value = q_value
                        best_action = action_text
                return best_action

    def remember(self, state_embedding, action_embedding, reward, next_state_embedding, done):
        self.replay_buffer.append((state_embedding, action_embedding, reward, next_state_embedding, done))

    def replay(self):
        if len(self.replay_buffer) < self.batch_size: return
        minibatch = random.sample(self.replay_buffer, self.batch_size)
        states = torch.cat([s for s, a, r, ns, d in minibatch]).to(self.device)
        actions = torch.cat([a for s, a, r, ns, d in minibatch]).to(self.device)
        rewards = torch.tensor([r for s, a, r, ns, d in minibatch], dtype=torch.float32).to(self.device).unsqueeze(1)
        next_states = torch.cat([ns for s, a, r, ns, d in minibatch]).to(self.device)
        dones = torch.tensor([d for s, a, r, ns, d in minibatch], dtype=torch.float32).to(self.device).unsqueeze(1)
        current_q_values = self.q_network(states, actions)
        with torch.no_grad():
            pseudo_action_embedding = self.get_text_embedding("Proceed to next step.").repeat(self.batch_size, 1)
            next_q_values = self.target_network(next_states, pseudo_action_embedding)
        td_target = rewards + (1 - dones) * self.discount_factor * next_q_values
        loss = self.loss_fn(current_q_values, td_target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        if self.epsilon > self.min_epsilon:
            self.epsilon *= self.epsilon_decay

    def update_target_network(self):
        self.target_network.load_state_dict(self.q_network.state_dict())

# --- MODIFIED: GeminiMathSolver Class ---
class GeminiMathSolver:
    def __init__(self, learner_model_name="gemini-1.5-flash", judge_model_name="gemini-1.5-pro"):
        self.agent = DQNAgent()
        self.max_steps = 5
        self.max_tactical_retries = 2 # Retries for a single step
        self.max_strategic_retries = 2 # Retries for the whole problem
        self.update_target_every = 5
        self.api_configured = False
        try:
            self.learner_model = genai.GenerativeModel(learner_model_name)
            self.judge_model = genai.GenerativeModel(judge_model_name)
            self.api_configured = True
            print("✅ Gemini API configured successfully.")
        except Exception as e:
            print(f"⚠️ Gemini API not configured. Running in MOCK mode. Error: {str(e)}")

    # --- NEW: Method to generate a new high-level strategy ---
    def generate_new_strategy(self, problem: MathProblem, failed_solution: List[StepResult]) -> str:
        """Analyzes a failed solution and proposes a new high-level strategy."""
        print("\n" + "🤔" * 35)
        print("🤔 Analyzing failed solution to generate a new high-level strategy...")
        print("🤔" * 35)

        if not self.api_configured:
            return "[MOCK] The previous attempt failed. Try a completely different method."

        # Format the failed attempt for the prompt
        failed_attempt_str = ""
        for step_res in failed_solution:
            failed_attempt_str += (f"Step {step_res.step_number} (Action: '{step_res.action}'):\n"
                                   f"{step_res.step_content}\n"
                                   f"Judge's Feedback: {step_res.judge_feedback}\n---\n")

        prompt = f"""
        You are a master mathematician and strategist. A student attempted to solve a problem but failed, even after several retries on the final step. Their entire method may be flawed.

        Problem: "{problem.problem}"
        Expected Final Answer: "{problem.expected_answer}"

        Here is the student's full, incorrect attempt:
        ---
        {failed_attempt_str}
        ---

        Your task is to analyze their entire failed solution and provide a single, concise, high-level strategic suggestion for a COMPLETELY DIFFERENT APPROACH.
        Do NOT solve the problem yourself. Just provide a guiding principle for the next attempt.

        Example suggestions:
        - "The previous approach using integration by parts led to a recursive loop. Suggest trying a u-substitution with u=cos(x) instead."
        - "The direct differentiation was too complex. Advise the student to first simplify the expression using logarithmic properties before differentiating."
        - "The series expansion failed. Suggest analyzing the function's properties for continuity and differentiability directly, as it might be a special case like the Weierstrass function."

        Now, provide your strategic guidance for the given problem and failed attempt.
        """
        try:
            response = self.judge_model.generate_content(prompt)
            strategy = response.text.strip()
            print(f"💡 New Strategy: {strategy}")
            return strategy
        except Exception as e:
            print(f"❌ Error generating new strategy: {e}")
            return "An error occurred. Try to be more careful with basic calculations."

    # --- MODIFIED: Prompt now includes strategic guidance ---
    def get_learner_candidate_actions(self, problem: MathProblem, step: int, previous_steps: List[str], strategic_guidance: Optional[str] = None, num_candidates=3) -> List[str]:
        if not self.api_configured:
            return [f"[MOCK] Action for step {step}" for _ in range(num_candidates)]

        previous_steps_str = "".join(f"Step {i+1}: {s}\n" for i, s in enumerate(previous_steps)) if previous_steps else "None"

        # Dynamically add the guidance section to the prompt if it exists
        guidance_prompt = ""
        if strategic_guidance:
            guidance_prompt = f"""
            **IMPORTANT STRATEGIC GUIDANCE FOR THIS ENTIRE ATTEMPT:**
            ---
            {strategic_guidance}
            ---
            You MUST generate actions that follow this new high-level strategy.
            """

        prompt = f"""
        You are a mathematician planning to solve a problem.
        Problem: {problem.problem}
        Previous Steps Taken:
        {previous_steps_str}

        {guidance_prompt}

        You are now on step {step} of 5.
        Your task is to propose {num_candidates} distinct, high-level strategic actions for THIS step that are consistent with any overall guidance provided.
        Each action should be a concise phrase. Provide the actions as a numbered list.
        """
        try:
            response = self.learner_model.generate_content(prompt)
            actions = [line.split('. ', 1)[1].strip() for line in response.text.strip().split('\n') if '. ' in line]
            return actions if actions else ["Perform the next calculation."]
        except Exception as e:
            print(f"Error generating candidate actions: {e}")
            return [f"Error-gen action {i}" for i in range(num_candidates)]

    def get_learner_step_execution(self, problem: MathProblem, step: int, chosen_action: str, previous_steps: List[str], feedback: Optional[str] = None) -> str:
        # This function does not need changes
        if not self.api_configured:
            return f"[MOCK] Step {step}: Executing action '{chosen_action}'"
        feedback_prompt = f"Your previous attempt was wrong. Feedback: {feedback}. Please correct your work." if feedback else ""
        previous_steps_str = "".join(f"Step {i+1}: {s}\n" for i, s in enumerate(previous_steps)) if previous_steps else "None"
        prompt = f"""
        You are an expert mathematician solving a problem step-by-step.
        On step {step}/5. Problem: {problem.problem}
        Previous Steps: {previous_steps_str}
        Your high-level instruction for this step is: '{chosen_action}'.
        {feedback_prompt}
        Now, perform the mathematical work for THIS step only.
        """
        response = self.learner_model.generate_content(prompt)
        return response.text

    def get_judge_evaluation(self, problem: MathProblem, step_content: str, step_number: int) -> Tuple[bool, float, str]:
        # This function does not need changes
        if not self.api_configured:
            is_correct = random.random() > 0.4
            reward = (10 if is_correct else -5)
            return is_correct, reward, "MOCK: Evaluation."
        is_final_step = (step_number == self.max_steps)
        prompt = f"""
        You are an expert math judge. Evaluate one step of a solution.
        Problem: "{problem.problem}"
        Expected Final Answer: "{problem.expected_answer}"
        The student is on Step {step_number} and submitted this:
        ---
        {step_content}
        ---
        Is this step mathematically correct AND a logical progression towards the final answer?
        Start your response with "CORRECT" or "INCORRECT", followed by a one-sentence explanation.
        """
        try:
            response = self.judge_model.generate_content(prompt)
            feedback_text = response.text.strip()
            is_correct = feedback_text.upper().startswith('CORRECT')
            reward = (15.0 if is_correct and is_final_step else 5.0 + step_number) if is_correct else (-20.0 if is_final_step else -10.0)
            return is_correct, reward, feedback_text
        except Exception as e:
            print(f"❌ Error during judge evaluation: {str(e)}")
            return False, -15.0, f"Evaluation failed due to an API error: {e}"

    # --- MODIFIED: This is now the "inner loop" ---
    def _solve_single_attempt(self, problem: MathProblem, strategic_guidance: Optional[str] = None) -> Tuple[List[StepResult], bool]:
        results = []
        previous_steps_content = []

        for step in range(1, self.max_steps + 1):
            state_text = self.agent.get_state_representation(problem, step, previous_steps_content)
            state_embedding = self.agent.get_text_embedding(state_text)
            feedback_for_retry = None
            
            for attempt in range(1, self.max_tactical_retries + 1):
                candidate_actions = self.get_learner_candidate_actions(problem, step, previous_steps_content, strategic_guidance)
                print(f"Step {step}/{self.max_steps} (Tac. Attempt {attempt}) | Generated Actions: {candidate_actions}")
                
                action_text = self.agent.choose_action(state_embedding, candidate_actions)
                action_embedding = self.agent.get_text_embedding(action_text)
                
                step_content = self.get_learner_step_execution(problem, step, action_text, previous_steps_content, feedback_for_retry)
                is_correct, reward, judge_feedback = self.get_judge_evaluation(problem, step_content, step)
                
                status_icon = "✅" if is_correct else "❌"
                print(f"  🤖 Chosen Action: '{action_text}' | Result: {status_icon}")
                print(f"  💬 Learner: {step_content.strip()}")
                print(f"  👨‍⚖️ Judge: {judge_feedback.strip()} (Reward: {reward:.1f})")

                next_step = step + 1
                done = (next_step > self.max_steps)
                next_state_text = self.agent.get_state_representation(problem, next_step, previous_steps_content + [step_content]) if not done else "terminal"
                next_state_embedding = self.agent.get_text_embedding(next_state_text)
                
                self.agent.remember(state_embedding, action_embedding, reward, next_state_embedding, done)
                self.agent.replay()
                
                if is_correct:
                    result = StepResult(step, action_text, step_content, is_correct, reward, judge_feedback, attempt)
                    results.append(result)
                    previous_steps_content.append(step_content)
                    time.sleep(1)
                    break
                else:
                    feedback_for_retry = judge_feedback
                    if attempt == self.max_tactical_retries:
                        print(f"  ⚠️ Max tactical retries reached for step {step}. Accepting incorrect step.")
                        result = StepResult(step, action_text, step_content, is_correct, reward, judge_feedback, attempt)
                        results.append(result)
                        previous_steps_content.append(step_content)
                        break
                    time.sleep(1)
        
        final_step_was_correct = results[-1].is_correct if results else False
        return results, final_step_was_correct

    # --- NEW: The "outer loop" that handles strategic retries ---
    def solve_problem_with_strategic_retries(self, problem: MathProblem):
        """
        Solves a problem with a two-tiered retry system.
        If the entire solution fails, it generates a new strategy and starts over.
        """
        print(f"\n" + "="*80)
        print(f"🧮 Solving Problem: {problem.problem} | Expected: {problem.expected_answer}")
        print("="*80)

        strategic_guidance = None
        final_results = []
        
        for strat_attempt in range(1, self.max_strategic_retries + 1):
            print(f"\n--- STRATEGIC ATTEMPT #{strat_attempt}/{self.max_strategic_retries} ---")
            if strategic_guidance:
                print(f"GUIDANCE: {strategic_guidance}")
            print("-" * 55)

            results, final_step_success = self._solve_single_attempt(problem, strategic_guidance)
            
            if final_step_success:
                print("\n" + "🎉" * 20)
                print("🎉 Final step was correct! Problem solved successfully!")
                print("🎉" * 20)
                final_results = results
                break
            else:
                final_results = results # Store the latest failed attempt
                print("\n" + "🔥" * 20)
                print("🔥 Final step was incorrect after all tactical retries.")
                print("🔥 The overall strategy may be flawed.")
                print("🔥" * 20)
                if strat_attempt < self.max_strategic_retries:
                    # Generate a new strategy for the next loop iteration
                    strategic_guidance = self.generate_new_strategy(problem, results)
                else:
                    print("\n" + "🛑" * 20)
                    print("🛑 Max strategic retries reached. Unable to solve the problem.")
                    print("🛑" * 20)

        # Final Summary
        total_reward = sum(r.reward for r in final_results)
        correct_steps = sum(1 for r in final_results if r.is_correct)
        print("\n" + "="*30 + " FINAL SUMMARY " + "="*30)
        print(f"Problem: {problem.problem}")
        print(f"📊 Final Result: {correct_steps}/{self.max_steps} steps correct.")
        print(f"🏆 Total Reward from last attempt: {total_reward:.1f}")
        print(f"📉 Final Epsilon: {self.agent.epsilon:.3f}")
        print("="*74)
        return final_results

# --- Main Execution ---
def create_hard_problems() -> List[MathProblem]:
    """Create a list of hard and very hard sample math problems for testing."""
    return [
        MathProblem(
            problem="Find the derivative of the Weierstrass function, defined as f(x) = Σ [from n=0 to ∞] aⁿ * cos(bⁿπx), for 0 < a < 1 and ab > 1 + (3/2)π.",
            problem_type="derivative",
            difficulty="very_hard",
            expected_answer="The function is continuous everywhere but differentiable nowhere. Therefore, the derivative does not exist for any value of x."
        ),
        MathProblem(
            problem="Consider f_n(x) = 2nx*e^(-nx²) on [0, 1]. Evaluate lim[n→∞] ∫[0 to 1] f_n(x) dx and ∫[0 to 1] lim[n→∞] f_n(x) dx. Are they equal?",
            problem_type="real_analysis",
            difficulty="very_hard",
            expected_answer="They are not equal. The integral of the limit is ∫0 dx = 0. The limit of the integral is lim[n→∞] (1 - e⁻ⁿ) = 1. They differ because convergence is not uniform, so the limit and integral cannot be interchanged."
        ),
        MathProblem(
            problem="Using the residue theorem from complex analysis, prove that the sum of the reciprocals of the squares of the positive integers equals π²/6. (i.e., prove Σ [from n=1 to ∞] 1/n² = π²/6).",
            problem_type="complex_analysis",
            difficulty="very_hard",
            expected_answer="The proof involves integrating f(z) = π cot(πz) / z² over a square contour C_N as N→∞. The function has a third-order pole at z=0 and simple poles at all non-zero integers z=n. The sum of the residues at all poles is 0 as the contour integral vanishes. The residue at z=n (n≠0) is 1/n². The residue at z=0 is -π²/3. Summing all residues gives -π²/3 + 2 * Σ[n=1 to ∞] 1/n² = 0, which implies Σ[n=1 to ∞] 1/n² = π²/6."
        ),
        MathProblem(
            problem="Prove the Brouwer fixed-point theorem: every continuous function f mapping a closed n-dimensional ball Dⁿ to itself has at least one fixed point.",
            problem_type="algebraic_topology",
            difficulty="very_hard",
            expected_answer="The proof is by contradiction using homology theory. Assume a continuous map f: Dⁿ → Dⁿ has no fixed point. Then one can construct a retraction r: Dⁿ → Sⁿ⁻¹ (its boundary). Such a retraction is a continuous map that is the identity on the boundary. This leads to a contradiction because the (n-1)-th homology group of Dⁿ is trivial (H_{n-1}(Dⁿ) = 0), while that of Sⁿ⁻¹ is non-trivial (H_{n-1}(Sⁿ⁻¹) ≅ ℤ). A retraction would imply the identity map on Sⁿ⁻¹ is null-homotopic, which is false. Thus, a fixed point must exist."
        ),
        MathProblem(
            problem="Apply the Baire Category Theorem to the space of continuous functions C([0, 1]) to prove the existence of a continuous function that is nowhere differentiable.",
            problem_type="functional_analysis",
            difficulty="very_hard",
            expected_answer="Let C([0, 1]) be the complete metric space of continuous functions with the sup norm. Let E_n = {f ∈ C([0,1]) | ∃x₀ s.t. |f(x)-f(x₀)| ≤ n|x-x₀| for all x}. Each E_n is a closed, nowhere dense set. The set of functions differentiable at at least one point is contained in the union ∪[n=1 to ∞] E_n. By the Baire Category Theorem, this union is a 'meager' set. Since C([0, 1]) is not meager, its complement—the set of continuous, nowhere differentiable functions—must be non-empty."
        )
    ]

def main():
    solver = GeminiMathSolver()
    if not solver.api_configured:
        print("\n--- RUNNING IN MOCK MODE. DQN WILL TRAIN ON MOCK DATA. ---")
    
    problems = create_hard_problems()
    for i, prob in enumerate(problems):
        # --- Call the new supervisory method ---
        solver.solve_problem_with_strategic_retries(prob)
        
        if (i + 1) % solver.update_target_every == 0:
            print("\n" + "--- 🎯 Updating Target Network ---" + "\n")
            solver.agent.update_target_network()

    print("\n\nDQN training complete.")
    torch.save(solver.agent.q_network.state_dict(), "dqn_math_solver_v2.pth")
    print("💾 Model weights saved to dqn_math_solver_v2.pth")

if __name__ == "__main__":
    main()

2025-07-23 18:21:56.953556: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753294917.322467      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753294917.427540      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🧠 DQN Agent using device: cpu


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Gemini API configured successfully.

🧮 Solving Problem: Find the derivative of the Weierstrass function, defined as f(x) = Σ [from n=0 to ∞] aⁿ * cos(bⁿπx), for 0 < a < 1 and ab > 1 + (3/2)π. | Expected: The function is continuous everywhere but differentiable nowhere. Therefore, the derivative does not exist for any value of x.

--- STRATEGIC ATTEMPT #1/2 ---
-------------------------------------------------------
Step 1/5 (Tac. Attempt 1) | Generated Actions: ['Investigate pointwise differentiability.', 'Analyze the convergence of the series and its derivative.', 'Explore the Fourier series representation.']
  🤖 Chosen Action: 'Explore the Fourier series representation.' | Result: ✅
  💬 Learner: Step 1/5: Exploring the Fourier Series Representation

The Weierstrass function, as given, is already presented in a form resembling a Fourier series.  However, it's crucial to understand its properties and limitations concerning differentiation.  A standard Fourier series represents a peri