In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install --upgrade transformers

In [None]:
%%capture
!pip install unsloth

from unsloth import FastVisionModel
import torch
import os
import re
from tqdm import tqdm
from PIL import Image
from transformers import TextStreamer

In [None]:
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.

# Load the vision model
model, tokenizer = FastVisionModel.from_pretrained(
    "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
    # "unsloth/Qwen2.5-VL-32B-Instruct-bnb-4bit",
    load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for long context
    # T4x2 specific optimizations
    device_map="balanced",  # Distribute across both T4 GPUs
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    # Aggressive memory limits for T4x2
    max_memory={
        0: "13GB",  # GPU 0 - leave some buffer
        1: "13GB",  # GPU 1 - leave some buffer
        "cpu": "20GB"
    },
    offload_folder="./offload_temp",
    offload_state_dict=True,
)

In [None]:
total = sum(p.numel() for p in model.parameters())
print(f"Total params: {total/1e9:.2f}‚ÄØbillion")

In [None]:
# 'x

In [None]:
# Enable for inference
FastVisionModel.for_inference(model)
model.eval()  # set to eval mode

def solve_geometry_problem_with_image_and_predicates(image_path, predicates, question):
    """
    Solve geometry problem using Qwen2.5-VL Model with both image and predicates

    Args:
        image_path (str): Path to the geometry problem image
        predicates (str): Formal logical predicates describing the geometry
        question (str): Question to solve
        choices (str): Multiple choice options (A, B, C, D)

    Returns:
        dict: Contains 'content'
    """
    # Load the image
    try:
        image = Image.open(image_path).convert('RGB')
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return {'content': 'Error loading image'}
    
    # Enhanced prompt combining image and predicates with your specific requirements
    prompt = f"""You are an expert AI mathematician solving geometric problems through rigorous deductive reasoning.

GEOMETRIC FIGURE IMAGE:
The image shows a geometric figure. Use this visual information along with the formal predicates below to understand the complete geometric setup.

GIVEN PREDICATES: 
{predicates}

QUESTION: 
{question}

SOLVE THROUGH SYSTEMATIC DEDUCTION:

**STEP 1: CRITICAL INFORMATION ANALYSIS**
- Examine both the image and predicates to identify the most important geometric relationships
- List key measurements, angles, and special constructions (circles, perpendiculars, etc.)
- Clearly state what specific value the question asks for
- Cross-reference visual elements in the image with the formal predicates

**STEP 2: DEDUCTIVE REASONING CHAIN**
- Build a logical sequence where each inference follows from previous steps
- **JUSTIFY EVERY STEP** by citing specific predicates or geometric theorems
- Actively combine predicates to reveal deeper relationships
- Use both visual cues from the image and formal relationships from predicates
- Example format: "Since [Predicate A] and [Predicate B], by [Theorem Name], we can conclude [Result]"
- Show all mathematical calculations as part of this logical chain
- **NO ASSUMPTIONS** - every step must be explicitly supported

**STEP 3: CONCLUSION AND SELECTION**
- State your final calculated answer based on the deductive chain

**DEDUCTIVE REASONING GUIDELINES:**
- **Synthesize Information:** Don't just list predicates - combine them to find new relationships
- **Use Given Measurements:** Pay special attention to provided angle/length measurements
- **Apply Geometric Theorems:** Use inscribed angle, central angle, perpendicular, circle, and triangle theorems
- **Logical Flow:** Each step must logically follow from established facts
- **Explicit Justification:** Always state WHY each inference is valid
- **Visual-Predicate Integration:** Use the image to understand spatial relationships and predicates for precise logical reasoning

‚ö†Ô∏è CRITICAL FINAL ANSWER FORMAT REQUIREMENT ‚ö†Ô∏è

YOU MUST END YOUR RESPONSE WITH YOUR FINAL ANSWER IN THIS EXACT FORMAT:

Final Answer: \\[\\boxed{{your_answer_here}}\\]

üî¥ FORBIDDEN FORMATS - DO NOT USE:
‚ùå Final Answer: $answer$
‚ùå Final Answer: $$answer$$
‚ùå ### Final Answer:
‚ùå **Final Answer:**
‚ùå Final Answer: \\[\\boxed{{answer}}\\] followed by additional text
‚ùå Multiple \\boxed{{}} expressions in your response
‚ùå \\boxed{{}} expressions anywhere except the final answer line

‚úÖ REQUIRED FORMAT EXAMPLES:

For coordinates:
Final Answer: \\[\\boxed{{(2, -2)}}\\]

For angles with degrees:
Final Answer: \\[\\boxed{{230^\\circ}}\\]

For measurements with units:
Final Answer: \\[\\boxed{{4.4 \\text{{ m}}}}\\]

For areas:
Final Answer: \\[\\boxed{{\\text{{Area}} = 347.4248\\pi \\text{{ cm}}^2}}\\]

For surface areas:
Final Answer: \\[\\boxed{{\\text{{Surface Area}} = 9236.28 \\text{{ m}}^2}}\\]

For volumes:
Final Answer: \\[\\boxed{{\\text{{Volume}} = 113.10 \\text{{ cm}}^3}}\\]

For polar coordinates:
Final Answer: \\[\\boxed{{(x,y) = (270^\\circ, 5)}}\\]

For piecewise functions:
Final Answer: \\[\\boxed{{\\text{{When }} x \\leq -3, f(x) = -x-5; \\text{{ when }} x > 3, f(x) = x+1}}\\]

For function notation:
Final Answer: \\[\\boxed{{g(x) = (x + 4)^2 - 5}}\\]

For domain and range:
Final Answer: \\[\\boxed{{\\text{{Domain: }} [-4, 4], \\quad \\text{{Range: }} [0, 2]}}\\]

For constants:
Final Answer: \\[\\boxed{{\\pi}}\\]

For fractions:
Final Answer: \\[\\boxed{{\\frac{{22\\sqrt{{3}}}}{{3}}}}\\]

For simple numbers:
Final Answer: \\[\\boxed{{675}}\\]

üî• MANDATORY FORMATTING RULES:

1. **Use double braces**: \\boxed{{{{answer}}}} (the double braces are essential for proper parsing)
2. **No markdown formatting**: Do not use **bold** or *italic* in your final answer line
3. **One answer only**: Include only ONE \\boxed{{}} expression in your entire response
4. **No trailing content**: After "Final Answer: \\[\\boxed{{...}}\\]", do not add any additional text
5. **Proper LaTeX syntax**: Use \\text{{}} for text, \\frac{{}}{{}} for fractions, ^\\circ for degrees
6. **Escape special characters**: Use \\{{ and \\}} for literal braces in text
7. **No nested \\boxed**: Do not put \\boxed inside another \\boxed

FORMATTING GUIDELINES:
- Use proper LaTeX syntax within the \\boxed{{}} expression
- Include units when applicable using \\text{{unit}} for unit labels
- Use appropriate mathematical notation (^\\circ for degrees, \\pi for pi, etc.)
- For areas, volumes, and surface areas, clearly label what the measurement represents
- For coordinates, use parentheses: (x,y)
- For ranges and domains, use brackets and interval notation
- Round decimal answers to appropriate precision (typically 1-4 decimal places)
- Use \\quad for spacing between multiple parts of an answer within the \\boxed{{}}

üîß COMMON MISTAKES TO AVOID:

‚ùå Don't write: Final Answer: \\[\\boxed{{\\text{{The answer is 42}}}}\\]
‚úÖ Instead write: Final Answer: \\[\\boxed{{42}}\\]

‚ùå Don't write: Final Answer: \\[\\boxed{{x = 5}}\\] where x equals 5.
‚úÖ Instead write: Final Answer: \\[\\boxed{{x = 5}}\\]

‚ùå Don't write: Final Answer: \\[\\boxed{{\\frac{{22\\sqrt{{3}}}}{{3}}}}\\] square units
‚úÖ Instead write: Final Answer: \\[\\boxed{{\\frac{{22\\sqrt{{3}}}}{{3}} \\text{{ square units}}}}\\]

VALIDATION CHECKLIST:
Before submitting, verify your final answer:
‚ñ° Starts with exactly "Final Answer: \\[\\boxed{{"
‚ñ° Ends with exactly "}}\\]"
‚ñ° Contains only ONE \\boxed{{}} expression
‚ñ° Uses proper LaTeX syntax
‚ñ° No markdown formatting (**//**) in the final line
‚ñ° No additional text after the closing \\]

Begin your analysis now and remember: your response must end with exactly one properly formatted Final Answer line using the \\boxed{{}} format.
"""

    # Create messages for Qwen2.5-VL
    messages = [
        {
            "role": "user", 
            "content": [
                {"type": "image"},
                {"type": "text", "text": prompt}
            ]
        }
    ]

    # Apply chat template
    input_text = tokenizer.apply_chat_template(
        messages, 
        add_generation_prompt=True
    )
    
    # Prepare inputs with image
    inputs = tokenizer(
        image,
        input_text,
        add_special_tokens=False,
        return_tensors="pt",
    ).to("cuda")

    with torch.no_grad():
        # Generate response with Qwen2.5-VL parameters
        outputs = model.generate(
            **inputs,
            max_new_tokens=5000,
            temperature=0.8,
            min_p=0.1,
            use_cache=True,
            do_sample=True,
        )
    
    torch.cuda.empty_cache()

    # Decode only the generated tokens (excluding input)
    generated_tokens = outputs[:, inputs.input_ids.shape[-1]:]
    content = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0].strip()

    return {
        'content': content
    }

In [None]:
import re

def extract_answer_letter(content):
    """
    Enhanced function to extract the final mathematical answer from model output.
    Handles multiline boxed expressions and various LaTeX formats.
    """
    
    def clean_and_format_answer(answer):
        """
        Clean and format the extracted answer with better handling of LaTeX
        """
        
        answer = answer.strip()
        
        # Remove trailing punctuation
        answer = re.sub(r'[.,;!?]+$', '', answer)
        
        # If already in LaTeX format, return as is
        if answer.startswith('$') and answer.endswith('$'):
            return answer
        
        # Handle nested boxed expressions (shouldn't happen but just in case)
        if '\\boxed{' in answer:
            # Use the same balanced brace extraction
            boxed_match = re.search(r'\\boxed\{', answer)
            if boxed_match:
                start_pos = boxed_match.end()
                brace_count = 1
                i = start_pos
                result = ""
                
                while i < len(answer) and brace_count > 0:
                    char = answer[i]
                    if char == '{':
                        brace_count += 1
                        result += char
                    elif char == '}':
                        brace_count -= 1
                        if brace_count > 0:
                            result += char
                    else:
                        result += char
                    i += 1
                
                if brace_count == 0:
                    return f"${result}$"
        
        # Handle display math
        if answer.startswith('$$') and answer.endswith('$$'):
            return f"${answer[2:-2].strip()}$"
        
        # Clean up any markdown formatting that might have been captured
        answer = re.sub(r'\*\*([^*]+)\*\*', r'\1', answer)  # Remove **bold**
        answer = re.sub(r'\*([^*]+)\*', r'\1', answer)      # Remove *italic*
        
        # If it contains LaTeX commands or mathematical symbols, wrap in $
        if (any(symbol in answer for symbol in ['\\', '^', '_', '\\text', '\\frac', '\\sqrt', '\\pi', '\\circ', '\\infty']) or
            is_mathematical_expression(answer)):
            return f"${answer}$"
        
        # Handle simple numbers
        if re.match(r'^\d+\.?\d*$', answer):
            return f"${answer}$"
        
        # Handle coordinates
        if re.match(r'^\([^)]+\)$', answer):
            return f"${answer}$"
        
        # Default: wrap in LaTeX if it looks mathematical
        return f"${answer}$"

    def is_mathematical_expression(text):
        """
        Check if text looks like a mathematical expression
        """
        
        # Common mathematical indicators
        math_indicators = [
            r'\d+',                    # digits
            r'[+\-*/=]',               # basic operators
            r'[(){}\[\]]',            # parentheses/brackets
            r'[œÄœÄ¬≤¬≥‚àö‚àû¬±‚â†‚âà‚â§‚â•]',           # common symbols
            r'%',                       # percent
            r'e[+-]?\d+',              # scientific notation
            r'\\[a-zA-Z]+',          # LaTeX commands
            r'\^|_',                   # superscript/subscript markers
            r'(sin|cos|tan|cot|sec|csc|log|ln|exp|sqrt|root|lim|sum|prod|int|frac)',
            r'(Area|Volume|Surface|Domain|Range|Radius|Diameter|Circumference)',
            r'[<>]=?',                  # inequalities
            r'‚àà|‚à™|‚à©|‚àß|‚à®|‚àÇ|‚àá',           # set/logic/differential operators
            r'‚áí|‚áî',                     # implication/equivalence
            r'\|',                     # absolute value bars
            r'\\text\{[^}]+\}',        # LaTeX text commands
            r'\\circ',                 # degree symbol
        ]
        
        # Check if it contains mathematical elements
        for indicator in math_indicators:
            if re.search(indicator, text, re.IGNORECASE):
                return True
        
        # Check if it's mostly mathematical characters
        math_chars = r'[\d\.\s\^\{\}\\\(\)\,\-\+\=\w¬∞œÄŒ∏Œ±Œ≤Œ≥Œ¥ŒªŒºœÉœÜœàœâ]'
        if len(text) > 0 and len(re.findall(math_chars, text)) / len(text) > 0.7:
            return True
        
        return False

    # First, try to find boxed expressions in the entire content
    # Use a more robust approach to handle nested braces
    boxed_matches = list(re.finditer(r'\\boxed\{', content))
    
    if boxed_matches:
        # Find the last \boxed{ occurrence
        last_match = boxed_matches[-1]
        start_pos = last_match.end()  # Position after the opening brace
        
        # Extract content within balanced braces
        brace_count = 1  # We already have one opening brace
        i = start_pos
        result = ""
        
        while i < len(content) and brace_count > 0:
            char = content[i]
            if char == '{':
                brace_count += 1
                result += char
            elif char == '}':
                brace_count -= 1
                if brace_count > 0:  # Only add if we're not at the final closing brace
                    result += char
            else:
                result += char
            i += 1
        
        if brace_count == 0:  # Successfully found balanced braces
            return clean_and_format_answer(result)
    
    # Try multiline patterns with more flexible matching
    multiline_patterns = [
        r'Final Answer:\s*\\\[\s*\\boxed\{([^}]*(?:\{[^}]*\}[^}]*)*)\}\s*\\\]',
        r'Final Answer:\s*\$\$\s*\\boxed\{([^}]*(?:\{[^}]*\}[^}]*)*)\}\s*\$\$',
        r'Final Answer:\s*\$\s*\\boxed\{([^}]*(?:\{[^}]*\}[^}]*)*)\}\s*\$',
    ]
    
    # Try multiline patterns (use re.DOTALL to match newlines)
    for pattern in multiline_patterns:
        match = re.search(pattern, content, re.IGNORECASE | re.DOTALL)
        if match:
            answer = match.group(1).strip()
            return clean_and_format_answer(answer)
    
    # Split content into lines and process
    lines = [line.strip() for line in content.strip().split('\n') if line.strip()]
    
    if not lines:
        return ""
    
    # Check the last few lines for "Final Answer:" pattern
    for i in range(min(5, len(lines))):
        line = lines[-(i+1)]
        
        # Look for "Final Answer:" variations - be more specific about what follows
        final_answer_patterns = [
            r'Final Answer:\s*\$([^$]+)\$',
            r'Final Answer:\s*([^$\\\[\n]+)$',
            r'The Final Answer is:\s*\$([^$]+)\$',
            r'The correct answer is:\s*\$([^$]+)\$',
            r'Answer:\s*\$([^$]+)\$',
            r'The answer is:\s*\$([^$]+)\$',
        ]
        
        for pattern in final_answer_patterns:
            match = re.search(pattern, line, re.IGNORECASE)
            if match:
                answer = match.group(1).strip()
                return clean_and_format_answer(answer)
    
    # Look for any remaining math expressions in the last line
    if lines:
        last_line = lines[-1]
        
        # Check for various math formats
        math_patterns = [
            r'\$([^$]+)\$',                    # $expression$
            r'\$\$([^$]+)\$\$',               # $$expression$$
        ]
        
        for pattern in math_patterns:
            match = re.search(pattern, last_line, re.DOTALL)
            if match:
                return f"${match.group(1).strip()}$"
        
        # Check if the last line looks like a mathematical expression
        if is_mathematical_expression(last_line):
            return f"${last_line}$"
    
    return ""

In [None]:
def validate_and_retry_if_needed(image_path, predicates, question, max_retries=3):
    """
    Try to get a valid answer letter, with retries if needed.
    """
    for attempt in range(max_retries):
        result = solve_geometry_problem_with_image_and_predicates(image_path, predicates, question)
        content = result['content']
        answer_letter = extract_answer_letter(content)
        
        if is_valid_mathematical_answer(answer_letter):
            return content, answer_letter
        
        print(f"Attempt {attempt + 1} failed to extract valid answer letter")
    
    # If all attempts fail, try one more time with a very direct prompt
    try:
        image = Image.open(image_path).convert('RGB')
        direct_prompt = f"""Look at this geometry problem image and use the given predicates to answer the question.

PREDICATES: {predicates}
QUESTION: {question}

‚ö†Ô∏è CRITICAL FINAL ANSWER FORMAT REQUIREMENT ‚ö†Ô∏è

YOU MUST END YOUR RESPONSE WITH YOUR FINAL ANSWER IN THIS EXACT FORMAT:

Final Answer: \\[\\boxed{{your_answer_here}}\\]

üî¥ FORBIDDEN FORMATS - DO NOT USE:
‚ùå Final Answer: $answer$
‚ùå Final Answer: $$answer$$
‚ùå ### Final Answer:
‚ùå **Final Answer:**
‚ùå Final Answer: \\[\\boxed{{answer}}\\] followed by additional text
‚ùå Multiple \\boxed{{}} expressions in your response
‚ùå \\boxed{{}} expressions anywhere except the final answer line

‚úÖ REQUIRED FORMAT EXAMPLES:

For coordinates:
Final Answer: \\[\\boxed{{(2, -2)}}\\]

For angles with degrees:
Final Answer: \\[\\boxed{{230^\\circ}}\\]

For measurements with units:
Final Answer: \\[\\boxed{{4.4 \\text{{ m}}}}\\]

For areas:
Final Answer: \\[\\boxed{{\\text{{Area}} = 347.4248\\pi \\text{{ cm}}^2}}\\]

For surface areas:
Final Answer: \\[\\boxed{{\\text{{Surface Area}} = 9236.28 \\text{{ m}}^2}}\\]

For volumes:
Final Answer: \\[\\boxed{{\\text{{Volume}} = 113.10 \\text{{ cm}}^3}}\\]

For polar coordinates:
Final Answer: \\[\\boxed{{(x,y) = (270^\\circ, 5)}}\\]

For piecewise functions:
Final Answer: \\[\\boxed{{\\text{{When }} x \\leq -3, f(x) = -x-5; \\text{{ when }} x > 3, f(x) = x+1}}\\]

For function notation:
Final Answer: \\[\\boxed{{g(x) = (x + 4)^2 - 5}}\\]

For domain and range:
Final Answer: \\[\\boxed{{\\text{{Domain: }} [-4, 4], \\quad \\text{{Range: }} [0, 2]}}\\]

For constants:
Final Answer: \\[\\boxed{{\\pi}}\\]

For fractions:
Final Answer: \\[\\boxed{{\\frac{{22\\sqrt{{3}}}}{{3}}}}\\]

For simple numbers:
Final Answer: \\[\\boxed{{675}}\\]

üî• MANDATORY FORMATTING RULES:

1. **Use double braces**: \\boxed{{{{answer}}}} (the double braces are essential for proper parsing)
2. **No markdown formatting**: Do not use **bold** or *italic* in your final answer line
3. **One answer only**: Include only ONE \\boxed{{}} expression in your entire response
4. **No trailing content**: After "Final Answer: \\[\\boxed{{...}}\\]", do not add any additional text
5. **Proper LaTeX syntax**: Use \\text{{}} for text, \\frac{{}}{{}} for fractions, ^\\circ for degrees
6. **Escape special characters**: Use \\{{ and \\}} for literal braces in text
7. **No nested \\boxed**: Do not put \\boxed inside another \\boxed

FORMATTING GUIDELINES:
- Use proper LaTeX syntax within the \\boxed{{}} expression
- Include units when applicable using \\text{{unit}} for unit labels
- Use appropriate mathematical notation (^\\circ for degrees, \\pi for pi, etc.)
- For areas, volumes, and surface areas, clearly label what the measurement represents
- For coordinates, use parentheses: (x,y)
- For ranges and domains, use brackets and interval notation
- Round decimal answers to appropriate precision (typically 1-4 decimal places)
- Use \\quad for spacing between multiple parts of an answer within the \\boxed{{}}

üîß COMMON MISTAKES TO AVOID:

‚ùå Don't write: Final Answer: \\[\\boxed{{\\text{{The answer is 42}}}}\\]
‚úÖ Instead write: Final Answer: \\[\\boxed{{42}}\\]

‚ùå Don't write: Final Answer: \\[\\boxed{{x = 5}}\\] where x equals 5.
‚úÖ Instead write: Final Answer: \\[\\boxed{{x = 5}}\\]

‚ùå Don't write: Final Answer: \\[\\boxed{{\\frac{{22\\sqrt{{3}}}}{{3}}}}\\] square units
‚úÖ Instead write: Final Answer: \\[\\boxed{{\\frac{{22\\sqrt{{3}}}}{{3}} \\text{{ square units}}}}\\]

VALIDATION CHECKLIST:
Before submitting, verify your final answer:
‚ñ° Starts with exactly "Final Answer: \\[\\boxed{{"
‚ñ° Ends with exactly "}}\\]"
‚ñ° Contains only ONE \\boxed{{}} expression
‚ñ° Uses proper LaTeX syntax
‚ñ° No markdown formatting (**//**) in the final line
‚ñ° No additional text after the closing \\]

Begin your analysis now and remember: your response must end with exactly one properly formatted Final Answer line using the \\boxed{{}} format.
"""
        
        messages = [
            {
                "role": "user", 
                "content": [
                    {"type": "image"},
                    {"type": "text", "text": direct_prompt}
                ]
            }
        ]
        
        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        inputs = tokenizer(image, input_text, add_special_tokens=False, return_tensors="pt").to("cuda")
        
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=3000, temperature=0.8, min_p=0.1, use_cache=True, do_sample=True)
        
        generated_tokens = outputs[:, inputs.input_ids.shape[-1]:]
        final_content = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0].strip()
        final_letter = extract_answer_letter(final_content)
        
        return final_content, final_letter if final_letter else get_default_mathematical_answer(question)
    
    except Exception as e:
        print(f"Error in final retry: {e}")
        return "Error occurred", 'A'

In [None]:
def is_valid_mathematical_answer(answer):
    """
    Check if the extracted answer is a valid mathematical expression.
    Args:
        answer (str): The extracted answer
    Returns:
        bool: True if valid, False otherwise
    """
    if not answer or len(answer.strip()) == 0:
        return False
    
    # Check for common mathematical patterns
    mathematical_patterns = [
        # LaTeX Math Blocks
        r'\$[^$]+\$',                                    # Inline math: $...$
        r'\$\$[^$]+\$\$',                               # Display math: $$...$$
        r'\\begin\{[^}]+\}.*?\\end\{[^}]+\}',          # LaTeX environments (matrices, align, etc.)
        
        # Basic Mathematical Expressions
        r'\\frac\{[^}]+\}\{[^}]+\}',                    # Fractions: \frac{a}{b}
        r'\\sqrt(\[[^\]]+\])?\{[^}]+\}',                # Square/nth roots: \sqrt{x}, \sqrt[n]{x}
        r'[a-zA-Z][a-zA-Z0-9]*\^[^{\s]+',              # Simple exponents: x^2, y^10
        r'[a-zA-Z][a-zA-Z0-9]*\^\{[^}]+\}',            # Complex exponents: x^{2n+1}
        r'[a-zA-Z][a-zA-Z0-9]*_[^{\s]+',               # Simple subscripts: x_i, a_n
        r'[a-zA-Z][a-zA-Z0-9]*_\{[^}]+\}',             # Complex subscripts: x_{i,j}
        
        # Integrals and Sums
        r'\\int[_^{}\s\w]*.*?d[a-zA-Z]',                # Integrals: \int_a^b f(x) dx
        r'\\oint[_^{}\s\w]*.*?d[a-zA-Z]',               # Contour integrals
        r'\\sum[_^{}\s\w]*[^\\]*',                      # Summations: \sum_{i=1}^n a_i
        r'\\prod[_^{}\s\w]*[^\\]*',                     # Products: \prod_{i=1}^n a_i
        r'\\lim[_^{}\s\w]*[^\\]*',                      # Limits: \lim_{x \to 0}
        
        # Functions and Operators
        r'\\(?:sin|cos|tan|sec|csc|cot|arcsin|arccos|arctan)\b',  # Trig functions
        r'\\(?:sinh|cosh|tanh|sech|csch|coth)\b',       # Hyperbolic functions
        r'\\(?:log|ln|exp|lg)\b',                       # Logarithmic/exponential
        r'\\(?:max|min|sup|inf|arg|det|dim|ker|rank)\b', # Mathematical operators
        r'\\(?:gcd|lcm|mod)\b',                         # Number theory functions
        r'\\(?:Re|Im)\b',                               # Real/imaginary parts
        
        # Derivatives and Calculus
        r'\\(?:nabla|partial|grad|div|curl)\b',         # Vector calculus
        r'\\frac\{d[^}]*\}\{d[^}]*\}',                  # Derivatives: dy/dx
        r'\\frac\{\\partial[^}]*\}\{\\partial[^}]*\}',  # Partial derivatives
        r'[a-zA-Z][a-zA-Z0-9]*\'\'*',                   # Prime notation: f', f''
        r'\\dot\{[^}]+\}',                              # Dot notation: \dot{x}
        r'\\ddot\{[^}]+\}',                             # Double dot: \ddot{x}
        
        # Greek Letters and Special Symbols
        r'\\(?:alpha|beta|gamma|delta|epsilon|varepsilon|zeta|eta|theta|vartheta|iota|kappa|lambda|mu|nu|xi|omicron|pi|varpi|rho|varrho|sigma|varsigma|tau|upsilon|phi|varphi|chi|psi|omega|Gamma|Delta|Theta|Lambda|Xi|Pi|Sigma|Upsilon|Phi|Psi|Omega)\b',
        r'\\(?:infty|pi|pm|mp|cdot|times|div|ast|star|circ|bullet|oplus|ominus|otimes|oslash|odot|bigcirc)\b',
        r'\\(?:hbar|ell|wp|partial|nabla|Box|triangle|clubsuit|diamondsuit|heartsuit|spadesuit)\b',
        
        # Relations and Inequalities
        r'\\(?:leq|geq|ll|gg|neq|equiv|approx|cong|simeq|sim|propto|parallel|perp|mid|nmid)\b',
        r'\\(?:subset|supset|subseteq|supseteq|subsetneq|supsetneq|in|notin|ni|owns)\b',
        r'\\(?:cup|cap|setminus|bigcup|bigcap|uplus|sqcup|sqcap|wedge|vee|bigwedge|bigvee)\b',
        r'[<>]=?',                                       # Basic inequalities: <, >, <=, >=
        r'\\(?:leftarrow|rightarrow|leftrightarrow|Leftarrow|Rightarrow|Leftrightarrow|mapsto|hookleftarrow|hookrightarrow)\b',
        
        # Number Systems and Sets
        r'\\mathbb\{[A-Z]\}',                           # Blackboard bold: ‚Ñù, ‚ÑÇ, ‚Ñ§, ‚Ñï, ‚Ñö
        r'\\mathcal\{[A-Z]\}',                          # Calligraphic letters
        r'\\mathfrak\{[a-zA-Z]\}',                      # Fraktur letters
        r'\{[^}]*\}',                                   # Sets: {1,2,3}, {x | x > 0}
        r'\\(?:emptyset|varnothing)\b',                 # Empty set
        
        # Brackets and Delimiters
        r'\\left[\(\[\{|].*?\\right[\)\]\}|]',          # Large delimiters
        r'\\(?:langle|rangle|lceil|rceil|lfloor|rfloor)\b', # Angle brackets, ceiling, floor
        r'\\(?:big|Big|bigg|Bigg)[lr]?[\(\[\{|]',       # Sized delimiters
        
        # Coordinates and Vectors
        r'\([^)]*[-+]?\d+(?:\.\d+)?[^)]*\)',            # Coordinates: (2, -1), (x, y)
        r'\[[^\]]*[-+]?\d+(?:\.\d+)?[^\]]*\]',          # Intervals/vectors: [0,1], [a,b]
        r'\\(?:vec|overrightarrow|overline|underline|hat|tilde|bar|dot|ddot)\{[^}]+\}', # Vector notation
        r'\\(?:binom|choose)\{[^}]+\}\{[^}]+\}',        # Binomial coefficients
        
        # Units and Measurements
        r'\d+\.?\d*\s*(?:mm|cm|m|km|in|ft|yd|mi|kg|g|lb|oz|s|min|h|Hz|¬∞|rad|sr|mol|cd|A|V|W|J|N|Pa|¬∞C|¬∞F|K)\b',
        r'\d+\.?\d*\s*¬∞\s*\w*',                         # Angles: 30¬∞, 45¬∞C
        r'\d+\.?\d*\s*\%',                              # Percentages: 50%, 3.14%
        
        # Scientific Notation and Numbers
        r'[-+]?\d+(?:\.\d+)?[eE][-+]?\d+',              # Scientific notation: 1.23e-4
        r'[-+]?\d*\.\d+',                               # Decimal numbers
        r'[-+]?\d+',                                    # Integers
        
        # Function Definitions and Equations
        r'[a-zA-Z][a-zA-Z0-9]*\([^)]*\)\s*=\s*[^,\n]+', # Function definitions: f(x) = ...
        r'[a-zA-Z][a-zA-Z0-9]*\s*=\s*[^,\n]+',         # Variable assignments: x = ...
        r'(?:Area|Volume|Surface\s*Area|Perimeter|Circumference|Diameter|Radius)\s*=\s*[^,\n]+',
        r'(?:Domain|Range|Codomain)\s*[:=]\s*[^,\n]+',  # Domain/Range specifications
        
        # Logic and Proofs
        r'\\(?:forall|exists|nexists|therefore|because|implies|iff|land|lor|lnot|top|bot)\b',
        r'\\(?:models|vdash|dashv|vDash|Vdash|nvdash|nVdash|nvDash|nVDash)\b',
        r'\\(?:square|blacksquare|qed|QED)\b',          # End of proof symbols
        
        # Probability and Statistics
        r'\\(?:Pr|P|E|Var|Cov|Corr)\b',                # Probability operators
        r'\\(?:mathbb\{E\}|mathbb\{P\}|mathbb\{V\})\b', # Expected value, probability, variance
        r'\\(?:sim|stackrel\{d\}\{=\}|stackrel\{p\}\{\\to\})\b', # Distributions, convergence
        
        # Spacing and Formatting (for completeness)
        r'\\(?:quad|qquad|,|;|:|\!|\\|\s)\b',           # Spacing commands
        r'\\(?:text|mathrm|mathit|mathbf|mathsf|mathtt|mathcal|mathfrak|mathbb)\{[^}]+\}', # Text formatting
    ]
    
    for pattern in mathematical_patterns:
        if re.search(pattern, answer, re.IGNORECASE):
            return True
    
    return False

def get_default_mathematical_answer(question):
    """
    Generate a default mathematical answer based on the question type.
    Args:
        question (str): The original question
    Returns:
        str: A default mathematical expression
    """
    question_lower = question.lower()
    
    if 'coordinate' in question_lower or 'point' in question_lower:
        return "$(0, 0)$"
    elif 'angle' in question_lower or 'degree' in question_lower:
        return "$0^\\circ$"
    elif 'area' in question_lower:
        return "$\\text{Area} = 0 \\text{ cm}^2$"
    elif 'volume' in question_lower:
        return "$\\text{Volume} = 0 \\text{ cm}^3$"
    elif 'surface' in question_lower:
        return "$\\text{Surface Area} = 0 \\text{ cm}^2$"
    elif 'domain' in question_lower:
        return "$\\text{Domain: } [0, 0]$"
    elif 'range' in question_lower:
        return "$\\text{Range: } [0, 0]$"
    elif 'function' in question_lower:
        return "$f(x) = 0$"
    elif 'distance' in question_lower or 'length' in question_lower:
        return "$0 \\text{ units}$"
    else:
        return "$0$"

In [None]:
target_numbers = [327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338,
                      341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352,
                      353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364,
                      365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376,
                      377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
                      389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400,
                      401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412,
                      413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424,
                      425, 426, 427, 428, 431, 432, 433, 434, 435, 436, 437, 438,
                      439, 441, 442, 443, 444, 445, 446, 447, 448, 450, 451, 452,
                      453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
                      465, 467, 468, 469, 470, 471, 472, 473, 474, 475, 481, 482,
                      483, 484, 485, 486, 487, 488, 489, 490, 491, 493, 494, 495,
                      497, 500, 501, 502, 503, 504, 505, 506, 508, 509, 510, 511,
                      512, 513, 514, 515, 519, 520, 522, 529, 530, 531, 532, 533,
                      534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 547, 549,
                      556, 557, 568, 569, 570, 571, 573, 578, 580, 588, 591, 592,
                      593, 595, 596, 599, 600, 601, 602, 604, 605, 606, 608, 610,
                      619, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633,
                      634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645,
                      646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657,
                      658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669,
                      670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681,
                      682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693,
                      694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705,
                      706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717,
                      718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729,
                      730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741,
                      742, 743, 744, 745, 746, 747, 748, 749, 750, 765, 766, 767,
                      770, 771, 775, 776]

In [None]:
if __name__ == "__main__":
    # Input directories
    image_dir = "image"
    predicates_dir = "predicates_output"
    questions_dir = "questions"
    
    # Output directories
    reasoning_output_dir = "/kaggle/working/reasoning_output_with_predicates"
    answer_literal_dir = "/kaggle/working/answer_literal_qwen2_5_vl_predicates"
    os.makedirs(reasoning_output_dir, exist_ok=True)
    os.makedirs(answer_literal_dir, exist_ok=True)
    
    # Iterate over problem numbers 2700 to 3001 (inclusive)
    # problems_to_solve = target_numbers[:5]
    
    for num in tqdm(target_numbers):
        num_str = str(num).zfill(3)
        
        # Paths to input files
        image_path = os.path.join(image_dir, f"{num_str}.png")
        predicates_path = os.path.join(predicates_dir, f"{num_str}.txt")
        ques_path = os.path.join(questions_dir, f"{num_str}.txt")
        
        # Check if all required files exist
        if not all(os.path.exists(path) for path in [image_path, predicates_path, ques_path]):
            print(f"Skipping problem {num_str}: Missing input files")
            continue
        
        # Read inputs
        try:
            with open(predicates_path, "r") as f:
                predicates = f.read().strip()
            with open(ques_path, "r") as f:
                question = f.read().strip()
        except Exception as e:
            print(f"Error reading files for problem {num_str}: {e}")
            continue
        
        # Solve with validation and retry mechanism
        simple_content, answer_letter = validate_and_retry_if_needed(image_path, predicates, question)
        
        # Build the reasoning file content
        reasoning_lines = []
        reasoning_lines.append("=" * 100)
        reasoning_lines.append("PROBLEM DETAILS:")
        reasoning_lines.append("=" * 100)
        reasoning_lines.append(f"IMAGE: {image_path}")
        reasoning_lines.append(f"PREDICATES: {predicates_path}")
        reasoning_lines.append("")
        reasoning_lines.append(f"PREDICATES CONTENT:\n{predicates}")
        reasoning_lines.append("")
        reasoning_lines.append(f"QUESTION:\n{question}")
        reasoning_lines.append("")
        
        reasoning_lines.append("=" * 100)
        reasoning_lines.append("FINAL RESPONSE:")
        reasoning_lines.append("=" * 100)
        reasoning_lines.append(simple_content)
        reasoning_lines.append("")
        reasoning_lines.append("=" * 100)
        reasoning_lines.append(f"EXTRACTED MATHEMATICAL ANSWER: {answer_letter}")
        reasoning_lines.append("=" * 100)
        
        reasoning_output = "\n".join(reasoning_lines)
        
        # Write reasoning output to file
        reasoning_out_path = os.path.join(reasoning_output_dir, f"{num_str}.txt")
        with open(reasoning_out_path, "w") as f:
            f.write(reasoning_output)
        
        # Write just the answer letter to a separate file
        letter_out_path = os.path.join(answer_literal_dir, f"{num_str}.txt")
        with open(letter_out_path, "w") as f:
            f.write(answer_letter)
        
        print(f"Problem {num_str}: Answer = {answer_letter}")