In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth if you want!
# !pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
from unsloth import FastLanguageModel  # FastVisionModel for LLMs
import torch
max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-8B-unsloth-bnb-4bit",
    max_seq_length = max_seq_length,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
import os
import re
from tqdm import tqdm
import torch

In [None]:
model.eval()  # set to eval mode

def solve_geometry_problem(predicates, question, choices, enable_thinking=True):
    """
    Solve geometry problem using Qwen3 Model with step-by-step reasoning

    Args:
        predicates (str): Geometric constraints from image
        question (str): Question to solve
        choices (str): Multiple choice options (A, B, C, D)
        enable_thinking (bool): Enable thinking mode for step-by-step reasoning

    Returns:
        dict: Contains 'thinking_content' and 'content'
    """

    # Enhanced prompt to force choice selection
    prompt = f"""You are an expert AI mathematician specializing in geometry. Your task is to solve the following geometric problem using the provided predicates through systematic reasoning and theorem application.

GIVEN GEOMETRIC PREDICATES:
{predicates}

QUESTION:
{question}

ANSWER CHOICES:
{choices}

YOUR TASK:
Provide a complete step-by-step solution following the structured approach below, then select the correct answer choice.

STEP-BY-STEP SOLUTION PROCESS:

STEP 1: PREDICATE ANALYSIS AND SETUP
- Parse and categorize the given predicates into:
  * Geometric shapes (points, lines, circles, triangles, etc.)
  * Measurements and equalities (lengths, angles, areas)
  * Relationships (perpendicular, parallel, congruent, etc.)
  * Positioning (points on lines/circles, intersections, etc.)
- Identify what specific value or measurement the question is asking for
- Note any special geometric constructions or theorems that might apply

STEP 2: CONSTRAINT SYNTHESIS
- Combine related predicates to understand the complete geometric picture
- Identify key relationships that will be useful for solving
- Look for:
  * Equal lengths or angles that can be substituted
  * Perpendicular relationships that create right triangles
  * Circle properties (radii, chords, central/inscribed angles)
  * Congruent or similar triangles
  * Theorem applications (Pythagorean, inscribed angle, etc.)

STEP 3: SOLUTION STRATEGY
- Based on the predicates and question, determine the most direct solution path
- Identify which geometric theorems, properties, or formulas to apply
- Plan the sequence of logical steps needed to reach the answer

STEP 4: MATHEMATICAL DERIVATION
- Execute your solution strategy step by step
- Show all calculations clearly with proper mathematical notation
- Apply geometric theorems and properties systematically
- Use the relationships established in the predicates
- Substitute known values and solve for unknowns

STEP 5: VERIFICATION AND ANSWER SELECTION
- Verify your calculated result makes geometric sense
- Compare your result with the provided answer choices
- Select the choice that best matches your calculated answer
- If no exact match, select the closest reasonable option

GEOMETRIC REASONING GUIDANCE:
- Consider all relevant geometric theorems and properties
- Apply circle, triangle, quadrilateral, and angle theorems as appropriate
- Look for relationships between shapes, measurements, and positions
- Use both basic and advanced geometric principles as needed

PREDICATE USAGE GUIDANCE:
- Interpret predicates based on their geometric meaning and context
- Combine multiple predicates to understand complex relationships
- Consider both direct and derived information from predicate combinations

CRITICAL INSTRUCTIONS:
1. **USE THE PREDICATES SYSTEMATICALLY** - Every predicate provides important information
2. **APPLY RELEVANT GEOMETRIC KNOWLEDGE** - Use any geometric theorems, properties, or principles that help solve the problem
3. **REASON FLEXIBLY** - Adapt your approach based on the specific problem and predicates
4. **SHOW ALL WORK** - Make your reasoning clear and mathematical
5. **BE PRECISE** - Use exact values when possible, approximate only when necessary

⚠️ CRITICAL OUTPUT FORMAT REQUIREMENT ⚠️
YOU MUST END YOUR RESPONSE WITH EXACTLY ONE OF THESE FOUR LINES:
Final Answer: A
Final Answer: B
Final Answer: C
Final Answer: D

❌ ABSOLUTELY FORBIDDEN - DO NOT USE:
- "The final answer is $\\boxed{{14}}$"
- "The final answer is $\\boxed{{A}}$"
- "$\\boxed{{A}}$"
- "\\boxed{{A}}"
- "(A)"
- "A is correct."
- "Final Answer: The answer is A"
- Any LaTeX formatting
- Any mathematical notation
- Any additional text after the letter

✅ REQUIRED FORMAT EXAMPLES:
If you determine the answer is choice A: "Final Answer: A"
If you determine the answer is choice B: "Final Answer: B"
If you determine the answer is choice C: "Final Answer: C"
If you determine the answer is choice D: "Final Answer: D"

IMPORTANT: Your response must end with exactly "Final Answer: [SINGLE LETTER]" - nothing else on that line. Do not include any boxed notation, LaTeX, or mathematical formatting in your final line.

Begin your analysis now and remember to end with the exact required format.
"""

    # Create messages
    messages = [
        {"role": "user", "content": prompt}
    ]

    # Apply chat template with thinking mode
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=enable_thinking
    )

    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    with torch.no_grad():
        # Generate response with official Qwen3 parameters
        if enable_thinking:
            # For thinking mode: Temperature=0.6, TopP=0.95, TopK=20, MinP=0
            generated_ids = model.generate(
                **model_inputs,
                max_new_tokens=15000,  # Adjusted from 15768 to 4000
                temperature=0.6,
                top_p=0.95,
                top_k=20,
                min_p=0.0,
                do_sample=True,
                repetition_penalty=1.1  # To reduce repetitions
            )
        else:
            # For non-thinking mode: Temperature=0.7, TopP=0.8, TopK=20, MinP=0
            generated_ids = model.generate(
                **model_inputs,
                max_new_tokens=15000,  # Adjusted from 15768 to 4000
                temperature=0.7,
                top_p=0.8,
                top_k=20,
                min_p=0.0,
                do_sample=True,
                repetition_penalty=1.1  # To reduce repetitions
            )
    torch.cuda.empty_cache()

    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()

    # Improved thinking content parsing
    if enable_thinking:
        # First decode the entire output
        full_output = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")
        
        # Try to find thinking tags in the decoded text
        if "<think>" in full_output and "</think>" in full_output:
            # Split by thinking tags
            parts = full_output.split("<think>", 1)
            if len(parts) > 1:
                thinking_part = parts[1].split("</think>", 1)
                if len(thinking_part) > 1:
                    thinking_content = thinking_part[0].strip()
                    content = thinking_part[1].strip()
                else:
                    # No closing think tag found
                    thinking_content = thinking_part[0].strip()
                    content = ""
            else:
                thinking_content = ""
                content = full_output
        else:
            # No thinking tags found, try token-based parsing as fallback
            try:
                # Get the token ID for </think>
                think_end_token = tokenizer.convert_tokens_to_ids("</think>")
                if think_end_token in output_ids:
                    index = len(output_ids) - output_ids[::-1].index(think_end_token)
                    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
                    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
                else:
                    # No thinking token found, treat entire output as content
                    thinking_content = ""
                    content = full_output
            except (ValueError, KeyError):
                # Fallback: treat entire output as content
                thinking_content = ""
                content = full_output
    else:
        # No thinking mode, entire output is content
        thinking_content = ""
        content = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")

    return {
        'thinking_content': thinking_content,
        'content': content
    }

In [None]:
def extract_answer_letter(content):
    """
    Enhanced function to extract an answer letter from a model's output.
    It includes patterns for plain text, markdown, and LaTeX formats to ensure
    the answer is captured reliably.
    Args:
        content (str): The model's output content.
    Returns:
        str: The extracted answer letter (A, B, C, or D), or an empty string if not found.
    """
    # Enhanced list of regex patterns to try, in order of preference
    patterns = [
        # LaTeX box patterns - FIXED PATTERNS
        r"\\boxed\{([A-D])\}",                # Handles '\boxed{A}' or '$\boxed{A}$'
        r"\$\\boxed\{([A-D])\}\$",            # Handles '$\boxed{A}$'
        r"\\boxed\{\\text\{([A-D])\}\}",      # Handles '\boxed{\text{A}}'
        r"\$\\boxed\{\\text\{([A-D])\}\}\$",  # Handles '$\boxed{\text{A}}$'
        r"The final answer is \$\\boxed\{([A-D])\}\$",  # 'The final answer is $\boxed{A}$'
        r"The final answer is \\boxed\{([A-D])\}",      # 'The final answer is \boxed{A}'
        r"The final answer is \$\\boxed\{(\d+)\}\$",    # Extract from numeric boxed answers
        
        # Standard patterns
        r"Final Answer:\s*([A-D])\b",        # Final Answer: A
        r"Final Answer:\s*\*\*([A-D])\*\*",  # Final Answer: **A**
        r"Answer:\s*([A-D])\b",              # Answer: A
        r"Answer:\s*\*\*([A-D])\*\*",        # Answer: **A**
        r"Answer:\s*\*([A-D])\*",            # Answer: *A*
        r"Answer:\s*_([A-D])_",              # Answer: _A_
        r"Answer:\s*\(([A-D])\)",            # Answer: (A)
        r"Answer:\s*([A-D])\.",              # Answer: A.
        
        # Sentence-based patterns
        r"The answer is\s*([A-D])\b",        # The answer is A
        r"The correct answer is\s*([A-D])\b", # The correct answer is A
        r"\b([A-D])\s*is the correct",       # A is the correct
        
        # Choice/option patterns
        r"choice\s*([A-D])\b",               # choice A
        r"option\s*([A-D])\b",               # option A
        r"select\s*([A-D])\b",               # select A
        r"choose\s*([A-D])\b",               # choose A
        
        # Concluding word patterns
        r"Therefore,?\s*([A-D])\b",          # Therefore, A
        r"Thus,?\s*([A-D])\b",               # Thus, A
        r"Hence,?\s*([A-D])\b",              # Hence, A
    ]
    
    # Try each pattern in the defined order
    for pattern in patterns:
        match = re.search(pattern, content, re.IGNORECASE)
        if match:
            captured = match.group(1).upper()
            # Handle numeric answers by mapping to choices if needed
            if captured.isdigit():
                # You might need to implement logic here to map numbers to letters
                # based on your specific answer choices
                continue
            return captured
    
    # Special handling for boxed numeric answers like "The final answer is $\boxed{14}$"
    # Try to match the numeric value with your answer choices
    numeric_boxed = re.search(r"\\boxed\{([0-9.]+)\}", content)
    if numeric_boxed:
        numeric_value = numeric_boxed.group(1)
        # You would need to compare this with your actual answer choices
        # and return the corresponding letter
        # For now, we'll continue to other patterns
        pass
    
    # If no specific pattern matches, look for isolated letters near the end
    lines = content.strip().split('\n')
    for line in reversed(lines[-10:]):  # Check the last 10 lines
        line = line.strip()
        if line in ['A', 'B', 'C', 'D']:
            return line
        # Check if a line contains only one of the possible answer letters
        letters_found = re.findall(r'\b([A-D])\b', line)
        if len(letters_found) == 1:
            return letters_found[0].upper()
    
    # As a last resort, find any occurrence of A, B, C, or D in the content
    all_letters = re.findall(r'\b([A-D])\b', content)
    if all_letters:
        # Return the last one found, as it's most likely the final answer
        return all_letters[-1].upper()
    
    return ""

# Additional helper function to map numeric answers to letters if needed
def map_numeric_to_letter(numeric_value, answer_choices):
    """
    Map a numeric value to the corresponding letter choice.
    Args:
        numeric_value (str): The numeric value extracted
        answer_choices (dict): Dictionary mapping letters to values
    Returns:
        str: The corresponding letter, or empty string if no match
    """
    try:
        num_val = float(numeric_value)
        for letter, choice_value in answer_choices.items():
            if isinstance(choice_value, (int, float)) and abs(float(choice_value) - num_val) < 0.01:
                return letter
    except (ValueError, TypeError):
        pass
    return ""

In [None]:
def validate_and_retry_if_needed(predicates, question, choices, max_retries=3):
    """
    Try to get a valid answer letter, with retries if needed.
    """
    for attempt in range(max_retries):
        result = solve_geometry_problem(predicates, question, choices, enable_thinking=True)
        content = result['content']
        thinking_content = result['thinking_content']
        answer_letter = extract_answer_letter(content)
        
        if answer_letter in ['A', 'B', 'C', 'D']:
            return content, thinking_content, answer_letter
        
        print(f"Attempt {attempt + 1} failed to extract valid answer letter")
    
    # If all attempts fail, try one more time with a very direct prompt
    direct_prompt = f"""Given the following geometry problem, you must choose exactly one answer from A, B, C, or D.

CONSTRAINTS: {predicates}
QUESTION: {question}
CHOICES: {choices}

⚠️ CRITICAL OUTPUT FORMAT REQUIREMENT ⚠️
YOU MUST END YOUR RESPONSE WITH EXACTLY ONE OF THESE FOUR LINES:
Final Answer: A
Final Answer: B
Final Answer: C
Final Answer: D

❌ ABSOLUTELY FORBIDDEN - DO NOT USE:
- "The final answer is $\\boxed{{14}}$"
- "The final answer is $\\boxed{{A}}$"
- "$\\boxed{{A}}$"
- "\\boxed{{A}}"
- "(A)"
- "A is correct."
- "Final Answer: The answer is A"
- Any LaTeX formatting
- Any mathematical notation
- Any additional text after the letter

✅ REQUIRED FORMAT EXAMPLES:
If you determine the answer is choice A: "Final Answer: A"
If you determine the answer is choice B: "Final Answer: B"
If you determine the answer is choice C: "Final Answer: C"
If you determine the answer is choice D: "Final Answer: D"

IMPORTANT: Your response must end with exactly "Final Answer: [SINGLE LETTER]" - nothing else on that line. Do not include any boxed notation, LaTeX, or mathematical formatting in your final line.

Begin your analysis now and remember to end with the exact required format.
"""
    
    messages = [{"role": "user", "content": direct_prompt}]
    text = tokenizer.apply_chat_template(messages,
                                         tokenize=False, 
                                         add_generation_prompt=True, 
                                         enable_thinking=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        # Use thinking mode parameters for the final retry
        generated_ids = model.generate(
            **model_inputs, 
            max_new_tokens=10000, 
            temperature=0.6,
            top_p=0.95,
            top_k=20,
            min_p=0.0,
            do_sample=True,
            repetition_penalty=1.1
        )
    
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
    final_content = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
    final_letter = extract_answer_letter(final_content)
    
    return final_content, "", final_letter if final_letter in ['A', 'B', 'C', 'D'] else 'A'  # Default to A if still fails

In [None]:
if __name__ == "__main__":
    # Input directories
    predicates_dir = "predicates_output"
    questions_dir = "questions"
    choices_dir = "choices"
    
    # Output directories
    reasoning_output_dir = "/kaggle/working/reasoning_output"
    answer_literal_dir = "/kaggle/working/answer_literal_qwen"
    os.makedirs(reasoning_output_dir, exist_ok=True)
    os.makedirs(answer_literal_dir, exist_ok=True)
    
    # Iterate over problem numbers 2401 to 3001 (inclusive)
    # for num in tqdm(range(2450, 2500)):  # Fixed the range issue
    for num in tqdm(range(50, 100)):
        
        num_str = str(num).zfill(3)
        
        # Paths to input files
        pred_path = os.path.join(predicates_dir, f"{num_str}.txt")
        ques_path = os.path.join(questions_dir, f"{num_str}.txt")
        choice_path = os.path.join(choices_dir, f"{num_str}.txt")
        
        # Check if all required files exist
        if not all(os.path.exists(path) for path in [pred_path, ques_path, choice_path]):
            print(f"Skipping problem {num_str}: Missing input files")
            continue
        
        # Read inputs
        try:
            with open(pred_path, "r") as f:
                predicates = f.read().strip()
            with open(ques_path, "r") as f:
                question = f.read().strip()
            with open(choice_path, "r") as f:
                choices = f.read().strip()
        except Exception as e:
            print(f"Error reading files for problem {num_str}: {e}")
            continue
        
        # Solve with validation and retry mechanism
        simple_content, thinking_content, answer_letter = validate_and_retry_if_needed(predicates, question, choices)
        
        # Build the reasoning file content with thinking content included
        reasoning_lines = []
        reasoning_lines.append("=" * 100)
        reasoning_lines.append("PROBLEM DETAILS:")
        reasoning_lines.append("=" * 100)
        reasoning_lines.append(f"PREDICATES:\n{predicates}")
        reasoning_lines.append("")
        reasoning_lines.append(f"QUESTION:\n{question}")
        reasoning_lines.append("")
        reasoning_lines.append(f"CHOICES:\n{choices}")
        reasoning_lines.append("")
        
        # Add thinking content if available
        if thinking_content.strip():
            reasoning_lines.append("=" * 100)
            reasoning_lines.append("MODEL'S INTERNAL REASONING (THINKING MODE):")
            reasoning_lines.append("=" * 100)
            reasoning_lines.append(thinking_content)
            reasoning_lines.append("")
        
        reasoning_lines.append("=" * 100)
        reasoning_lines.append("FINAL RESPONSE:")
        reasoning_lines.append("=" * 100)
        reasoning_lines.append(simple_content)
        reasoning_lines.append("")
        reasoning_lines.append("=" * 100)
        reasoning_lines.append(f"EXTRACTED ANSWER: {answer_letter}")
        reasoning_lines.append("=" * 100)
        
        reasoning_output = "\n".join(reasoning_lines)
        
        # Write reasoning output to file
        reasoning_out_path = os.path.join(reasoning_output_dir, f"{num_str}.txt")
        with open(reasoning_out_path, "w") as f:
            f.write(reasoning_output)
        
        # Validate answer letter
        if not answer_letter or answer_letter not in ['A', 'B', 'C', 'D']:
            print(f"Warning: Invalid answer letter '{answer_letter}' for problem {num_str}")
            print(f"Content: {simple_content[:200]}...")
            # Force a default answer rather than empty
            answer_letter = 'A'  # Default fallback
        
        # Write just the answer letter to a separate file
        letter_out_path = os.path.join(answer_literal_dir, f"{num_str}.txt")
        with open(letter_out_path, "w") as f:
            f.write(answer_letter)
        
        print(f"Problem {num_str}: Answer = {answer_letter}")