In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
%%capture
!pip install unsloth

In [None]:
from unsloth import FastVisionModel
import torch
import os
import re
from tqdm import tqdm
from PIL import Image
from transformers import TextStreamer

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", # Llama 3.2 vision support
    "unsloth/Llama-3.2-11B-Vision-bnb-4bit",
    "unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit", # Can fit in a 80GB card!
    "unsloth/Llama-3.2-90B-Vision-bnb-4bit",
    "unsloth/Pixtral-12B-2409-bnb-4bit",              # Pixtral fits in 16GB!
    "unsloth/Pixtral-12B-Base-2409-bnb-4bit",         # Pixtral base model
    "unsloth/Qwen2-VL-2B-Instruct-bnb-4bit",          # Qwen2 VL support
    "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
    "unsloth/Qwen2-VL-72B-Instruct-bnb-4bit",
    "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",        # Latest Qwen2.5 VL
    "unsloth/llava-v1.6-mistral-7b-hf-bnb-4bit",      # Any Llava variant works!
    "unsloth/llava-1.5-7b-hf-bnb-4bit",
] # More models at https://huggingface.co/unsloth

# Load the vision model
model, tokenizer = FastVisionModel.from_pretrained(
    "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
    # "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
    load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for long context
    device_map="balanced",  # Distribute across both T4 GPUs
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    # Aggressive memory limits for T4x2
    max_memory={
        0: "13GB",  # GPU 0 - leave some buffer
        1: "13GB",  # GPU 1 - leave some buffer
        "cpu": "20GB"
    },
    offload_folder="./offload_temp",
    offload_state_dict=True,
)

In [None]:
# Enable for inference
FastVisionModel.for_inference(model)
model.eval()  # set to eval mode

def generate_predicates_from_image(image_path, question):
    """
    Generate predicates/literals from geometry problem image using Qwen2.5-VL Model

    Args:
        image_path (str): Path to the geometry problem image
        question (str): Question text

    Returns:
        dict: Contains 'content' with generated response
    """
    # Load the image
    try:
        image = Image.open(image_path).convert('RGB')
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return {'content': 'Error loading image'}
    
    # Modified prompt without ground truth dependency
    prompt = f"""You are an expert AI mathematician specializing in geometry. Your task is to analyze the geometric figure in the provided image and generate accurate geometric predicates (literals) that represent all the relationships, measurements, and properties shown in the diagram.

GEOMETRY PROBLEM IMAGE:
The image shows a geometric figure with various shapes, lines, angles, and measurements. Analyze this image carefully to understand all geometric relationships and constraints.

QUESTION:
{question}

YOUR TASK:
1. First, provide step-by-step reasoning showing your analysis process
2. Then, generate geometric predicates based on your analysis using the Guidelines below

STEP-BY-STEP ANALYSIS (Required):
Please follow this format for your reasoning:

STEP 1: IMAGE ANALYSIS
- Describe what geometric shapes, points, lines, and angles you can identify in the image
- List all visible measurements, labels, and annotations
- Note any special markings (right angle symbols, parallel marks, congruent marks, etc.)

STEP 2: RELATIONSHIP IDENTIFICATION
- Identify geometric relationships between elements (parallel, perpendicular, congruent, etc.)
- Determine what constraints and properties are implied by the figure
- Analyze how different elements connect and interact

STEP 3: MEASUREMENT ANALYSIS
- Extract all numerical values shown in the diagram
- Identify what these measurements represent (lengths, angles, areas, etc.)
- Determine how measurements relate to each other

STEP 4: QUESTION ANALYSIS
- Analyze what the question is asking for
- Identify which geometric properties are relevant to solving the problem
- Determine what information is needed to answer the question

STEP 5: PREDICATE PLANNING
- Plan which predicates are needed to represent the identified relationships
- Ensure predicates capture all visible information and constraints
- Consider completeness - are all important relationships captured?

GUIDELINES:
***Follow these predicates to represent diagram literals.

**GEOMETRIC SHAPES:**
- Point: Point(A), Point($)
- Line: Line(A,B), Line(m), Line($)
- Angle: Angle(A,B,C), Angle(A), Angle(1), Angle($)
- Triangle: Triangle(A,B,C), Triangle($), Triangle($1,$2,$3)
- Quadrilateral: Quadrilateral(A,B,C,D), Quadrilateral($)
- Parallelogram: Parallelogram(A,B,C,D), Parallelogram(1), Parallelogram($)
- Square: Square(A,B,C,D), Square(1), Square($)
- Rectangle: Rectangle(A,B,C,D), Rectangle(1), Rectangle($)
- Rhombus: Rhombus(A,B,C,D), Rhombus(1), Rhombus($)
- Trapezoid: Trapezoid(A,B,C,D), Trapezoid(1), Trapezoid($)
- Kite: Kite(A,B,C,D), Kite(1), Kite($)
- Polygon: Polygon($)
- Pentagon: Pentagon(A,B,C,D,E), Pentagon($)
- Hexagon: Hexagon(A,B,C,D,E,F), Hexagon($)
- Heptagon: Heptagon(A,B,C,D,E,F,G), Heptagon($)
- Octagon: Octagon(A,B,C,D,E,F,G,H), Octagon($)
- Circle: Circle(A), Circle(1), Circle($)
- Arc: Arc(A,B), Arc(A,B,C), Arc($)
- Sector: Sector(O,A,B), Sector($)
- Shape: Shape($) // For unknown shapes or regions

**UNARY GEOMETRIC ATTRIBUTES:**
- RightAngle: RightAngle(Angle($))
- Right: Right(Triangle($)) // Right triangle
- Isosceles: Isosceles(Polygon($)) // Isosceles polygon
- Equilateral: Equilateral(Polygon($)) // Equilateral polygon
- Regular: Regular(Polygon($))
- Red: Red(Shape($))
- Blue: Blue(Shape($))
- Green: Green(Shape($))
- Shaded: Shaded(Shape($))

**GEOMETRIC ATTRIBUTES:**
- AreaOf: AreaOf(A)
- PerimeterOf: PerimeterOf(A) // Perimeter of polygon A
- RadiusOf: RadiusOf(A)
- DiameterOf: DiameterOf(A)
- CircumferenceOf: CircumferenceOf(A) // Perimeter of circle A
- AltitudeOf: AltitudeOf(A) // Altitude of polygon A
- HypotenuseOf: HypotenuseOf(A) // Hypotenuse of triangle A
- SideOf: SideOf(A) // Side of square A
- WidthOf: WidthOf(A) // Width of quadrilateral A
- HeightOf: HeightOf(A) // Height of quadrilateral A
- LegOf: LegOf(A) // Leg of trapezoid A
- BaseOf: BaseOf(A) // Base of polygon A
- MedianOf: MedianOf(A) // Median of polygon A
- IntersectionOf: IntersectionOf(A,B) // Intersection of shapes A and B
- MeasureOf: MeasureOf(A) // Measure of angle A
- LengthOf: LengthOf(A) // Length of line A
- ScaleFactorOf: ScaleFactorOf(A,B) // Scale factor of shape A to shape B

**BINARY GEOMETRIC RELATIONS:**
- PointLiesOnLine: PointLiesOnLine(Point($),Line($1,$2))
- PointLiesOnCircle: PointLiesOnCircle(Point($),Circle($))
- Parallel: Parallel(Line($),Line($))
- Perpendicular: Perpendicular(Line($),Line($))
- IntersectAt: IntersectAt(Line($),Line($),Line($),Point($))
- BisectsAngle: BisectsAngle(Line($),Angle($))
- Congruent: Congruent(Polygon($),Polygon($))
- Similar: Similar(Polygon($),Polygon($))
- Tangent: Tangent(Line($),Circle($))
- Secant: Secant(Line($),Circle($))
- CircumscribedTo: CircumscribedTo(Shape($),Shape($))
- InscribedIn: InscribedIn(Shape($),Shape($))

**A-IsXOf-B GEOMETRIC RELATIONS:**
- IsMidpointOf: IsMidpointOf(Point($),Line($)) // Point A is midpoint of line B
- IsCentroidOf: IsCentroidOf(Point($),Shape($)) // Point A is centroid of shape B
- IsIncenterOf: IsIncenterOf(Point($),Shape($)) // Point A is incenter of shape B
- IsRadiusOf: IsRadiusOf(Line($),Circle($)) // Line A is radius of circle B
- IsDiameterOf: IsDiameterOf(Line($),Circle($)) // Line A is diameter of circle B
- IsMidsegmentOf: IsMidsegmentOf(Line($),Triangle($)) // Line A is midsegment of triangle B
- IsChordOf: IsChordOf(Line($),Circle($)) // Line A is chord of circle B
- IsSideOf: IsSideOf(Line($),Polygon($)) // Line A is side of polygon B
- IsHypotenuseOf: IsHypotenuseOf(Line($),Triangle($)) // Line A is hypotenuse of triangle B
- IsPerpendicularBisectorOf: IsPerpendicularBisectorOf(Line($),Triangle($)) // Line A is perpendicular bisector of triangle B
- IsAltitudeOf: IsAltitudeOf(Line($),Triangle($)) // Line A is altitude of triangle B
- IsMedianOf: IsMedianOf(Line($),Quadrilateral($)) // Line A is median of quadrilateral B
- IsBaseOf: IsBaseOf(Line($),Quadrilateral($)) // Line A is base of quadrilateral B
- IsDiagonalOf: IsDiagonalOf(Line($),Quadrilateral($)) // Line A is diagonal of quadrilateral B
- IsLegOf: IsLegOf(Line($),Trapezoid($)) // Line A is leg of trapezoid B

**NUMERICAL ATTRIBUTES AND RELATIONS:**
- SinOf: SinOf(Var)
- CosOf: CosOf(Var)
- TanOf: TanOf(Var)
- CotOf: CotOf(Var)
- HalfOf: HalfOf(Var)
- SquareOf: SquareOf(Var)
- SqrtOf: SqrtOf(Var)
- RatioOf: RatioOf(Var), RatioOf(Var1,Var2)
- SumOf: SumOf(Var1,Var2,...)
- AverageOf: AverageOf(Var1,Var2,...)
- Add: Add(Var1,Var2,...)
- Mul: Mul(Var1,Var2,...)
- Sub: Sub(Var1,Var2,...)
- Div: Div(Var1,Var2,...)
- Pow: Pow(Var1,Var2)
- Equals: Equals(Var1,Var2)
- Find: Find(Var) // Find the value of the variable
- UseTheorem: UseTheorem(A_B_C)

VARIABLE NAMING CONVENTIONS:
- Use capital letters for points: A, B, C, D, etc.
- Use lowercase letters for lines when not defined by points: m, n, l, etc.
- Use numbers for unnamed shapes: 1, 2, 3, etc.
- Use $ for generic variables: $, $1, $2, etc.
- Use descriptive names when appropriate: base, height, radius, etc.

CRITICAL INSTRUCTIONS:
1. Carefully examine the geometric figure in the image
2. Identify all points, lines, angles, shapes, and measurements shown
3. **MAKE EACH PREDICATE AS ATOMIC AS POSSIBLE** 
   – Decompose any complex or compound relationship into the simplest, individual geometric 
     statements (e.g., replace "Perpendicular(Line(A,B),Line(C,D))" with separate vector and dot-product or angle-equals-90° predicates)
4. Generate predicates that represent:
   - All geometric shapes present
   - All given measurements and their relationships
   - All geometric properties and constraints
   - Relationships between different elements
5. Always provide the step-by-step reasoning first
6. Then provide the predicates section with clear section header
7. Follow the Guidelines above - these predicates are crucial for representing diagram literals
8. Each predicate must be on a separate line
9. Do not include quotation marks, extra symbols, or explanatory text in predicates
10. Only output predicates in the exact format: PredicateName(arguments)
11. **IMPORTANT: Do NOT include predicates that directly state the final answer or solution**
12. **IMPORTANT: Do NOT include Find(...) predicates or any question-related predicates**
13. Include only the given information, constraints, and geometric relationships visible in the diagram
14. Represent all visible geometric relationships, not derived solutions
15. The predicates should provide sufficient information for another system to solve the problem, but not the solution itself

EXAMPLE OUTPUT FORMAT:
STEP-BY-STEP REASONING:

STEP 1: IMAGE ANALYSIS
- I can see a right triangle ABC with vertices labeled A, B, C
- Side AB is marked as 5 units
- Side BC is marked as 12 units
- There is a right angle symbol at vertex B

STEP 2: RELATIONSHIP IDENTIFICATION
- Triangle ABC is a right triangle with right angle at B
- Sides AB and BC are perpendicular
- Side AC is the hypotenuse

STEP 3: MEASUREMENT ANALYSIS
- AB = 5 units (given)
- BC = 12 units (given)
- AC length is what the question is asking for

STEP 4: QUESTION ANALYSIS
- The question asks for the length of AC
- This requires using the geometric relationships visible in the triangle
- The problem involves finding the hypotenuse of a right triangle

STEP 5: PREDICATE PLANNING
- Need to represent the triangle, right angle, measurements, and relationships
- Must include all visible geometric constraints and properties

GENERATED PREDICATES:
Triangle(A,B,C)
Point(A)
Point(B)
Point(C)
Line(A,B)
Line(B,C)
Line(A,C)
Angle(A,B,C)
RightAngle(Angle(A,B,C))
Right(Triangle(A,B,C))
Equals(LengthOf(Line(A,B)), 5)
Equals(LengthOf(Line(B,C)), 12)
Perpendicular(Line(A,B), Line(B,C))
IsHypotenuseOf(Line(A,C), Triangle(A,B,C))
IsSideOf(Line(A,B), Triangle(A,B,C))
IsSideOf(Line(B,C), Triangle(A,B,C))
IsSideOf(Line(A,C), Triangle(A,B,C))
Equals(SquareOf(LengthOf(Line(A,C))), Add(SquareOf(LengthOf(Line(A,B))), SquareOf(LengthOf(Line(B,C)))))

Generate your complete analysis and predicates now:
"""

    # Create messages for Qwen2.5-VL
    messages = [
        {
            "role": "user", 
            "content": [
                {"type": "image"},
                {"type": "text", "text": prompt}
            ]
        }
    ]

    # Apply chat template
    input_text = tokenizer.apply_chat_template(
        messages, 
        add_generation_prompt=True
    )
    
    # Prepare inputs with image
    inputs = tokenizer(
        image,
        input_text,
        add_special_tokens=False,
        return_tensors="pt",
    ).to("cuda")

    with torch.no_grad():
        # Generate response with Qwen2.5-VL parameters
        outputs = model.generate(
            **inputs,
            max_new_tokens=4000,
            temperature=0.8,
            min_p=0.1,
            use_cache=True,
            do_sample=True,
        )
    
    torch.cuda.empty_cache()

    # Decode only the generated tokens (excluding input)
    generated_tokens = outputs[:, inputs.input_ids.shape[-1]:]
    content = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0].strip()

    return {
        'content': content
    }

In [None]:
def extract_reasoning_and_predicates(content):
    """
    Extract reasoning and predicates sections separately from the generated content
    
    Args:
        content (str): Generated content containing both reasoning and predicates
        
    Returns:
        tuple: (reasoning_content, predicates_list)
    """
    lines = content.split('\n')
    
    # Find the sections
    reasoning_section = ""
    predicates_section = ""
    
    reasoning_started = False
    predicates_started = False
    
    for line in lines:
        line_upper = line.upper().strip()
        
        # Check for reasoning section start
        if ("STEP-BY-STEP REASONING" in line_upper or 
            "STEP 1:" in line_upper or 
            "REASONING" in line_upper) and not predicates_started:
            reasoning_started = True
            reasoning_section += line + '\n'
            continue
        
        # Check for predicates section start
        if ("GENERATED PREDICATES" in line_upper or 
            "PREDICATES:" in line_upper) and reasoning_started:
            predicates_started = True
            reasoning_started = False
            continue
        
        # Add content to appropriate section
        if reasoning_started and not predicates_started:
            reasoning_section += line + '\n'
        elif predicates_started:
            predicates_section += line + '\n'
    
    # If no explicit sections found, try to extract from entire content
    if not reasoning_section.strip() and not predicates_section.strip():
        # Split on common patterns
        if "GENERATED PREDICATES" in content.upper():
            parts = content.upper().split("GENERATED PREDICATES")
            if len(parts) >= 2:
                reasoning_section = parts[0]
                predicates_section = parts[1]
        elif "PREDICATES:" in content.upper():
            parts = content.upper().split("PREDICATES:")
            if len(parts) >= 2:
                reasoning_section = parts[0]
                predicates_section = parts[1]
    
    # Use the simplified extract_predicates function
    clean_predicates = extract_predicates(predicates_section)
    
    return reasoning_section.strip(), clean_predicates


def extract_predicates(predicates_content):
    """
    Extract clean predicates from the model output.
    
    Args:
        predicates_content (str): The predicates section content
        
    Returns:
        list: List of clean predicates
    """
    lines = predicates_content.strip().split('\n')
    predicates = []
    
    for line in lines:
        line = line.strip()
        if not line:
            continue
            
        # Remove quotes and extra whitespace
        line = line.replace('"', '').replace("'", "").strip()
        
        # Skip lines that don't look like predicates
        if not re.match(r'^[A-Z][a-zA-Z]*\(.*\)$', line):
            continue
            
        predicates.append(line)
    
    return predicates


def _deduplicate_predicates(predicates):
    """
    Remove duplicate predicates while preserving order.
    
    Args:
        predicates (list): List of predicates that may contain duplicates
        
    Returns:
        list: List with duplicates removed
    """
    seen = set()
    result = []
    
    for predicate in predicates:
        # Normalize for comparison (remove extra spaces)
        normalized = re.sub(r'\s+', ' ', predicate.strip())
        if normalized not in seen:
            seen.add(normalized)
            result.append(predicate.strip())
    
    return result

In [None]:
if __name__ == "__main__":
    # Input directories
    image_dir = "image"
    questions_dir = "question"
    
    # Output directories
    reasoning_output_dir = "/kaggle/working/reasoning_output"
    predicates_output_dir = "/kaggle/working/predicates_output"
    
    # Create output directories
    os.makedirs(reasoning_output_dir, exist_ok=True)
    os.makedirs(predicates_output_dir, exist_ok=True)
    
    # Iterate over problem numbers 2401 to 2405 (inclusive)
    for num in tqdm(range(2401, 3002)):
        num_str = str(num)
        
        # Paths to input files
        image_path = os.path.join(image_dir, f"{num_str}.png")
        ques_path = os.path.join(questions_dir, f"{num_str}.txt")
        
        # Check if all required files exist
        if not all(os.path.exists(path) for path in [image_path, ques_path]):
            print(f"Skipping problem {num_str}: Missing input files")
            continue
        
        # Read inputs
        try:
            with open(ques_path, "r") as f:
                question = f.read().strip()
        except Exception as e:
            print(f"Error reading files for problem {num_str}: {e}")
            continue
        
        # Generate predicates and reasoning
        try:
            result = generate_predicates_from_image(image_path, question)
            generated_content = result['content']
            
            # Extract reasoning and predicates separately
            reasoning_content, predicates_list = extract_reasoning_and_predicates(generated_content)
            
            # Write reasoning output to file
            reasoning_out_path = os.path.join(reasoning_output_dir, f"{num_str}.txt")
            with open(reasoning_out_path, "w") as f:
                f.write("=" * 80 + "\n")
                f.write(f"STEP-BY-STEP REASONING FOR PROBLEM {num_str}\n")
                f.write("=" * 80 + "\n")
                f.write(f"QUESTION: {question}\n\n")
                f.write("REASONING:\n")
                f.write("=" * 80 + "\n")
                f.write(reasoning_content)
                f.write("\n" + "=" * 80)
            
            # Write clean predicates to file
            predicates_out_path = os.path.join(predicates_output_dir, f"{num_str}.txt")
            with open(predicates_out_path, "w") as f:
                for pred in predicates_list:
                    f.write(pred + "\n")
            
            print(f"Problem {num_str}: Generated {len(predicates_list)} predicates")
            
        except Exception as e:
            print(f"Error processing problem {num_str}: {e}")
            
            # Write error to all output files
            error_content = f"ERROR: {str(e)}\n"
            
            # Write error to reasoning file
            reasoning_out_path = os.path.join(reasoning_output_dir, f"{num_str}.txt")
            with open(reasoning_out_path, "w") as f:
                f.write(error_content)
            
            # Write error to predicates file
            predicates_out_path = os.path.join(predicates_output_dir, f"{num_str}.txt")
            with open(predicates_out_path, "w") as f:
                f.write(error_content)
            
            continue
    
    print("Processing complete!")
    print(f"Outputs saved to:")
    print(f"- Reasoning: {reasoning_output_dir}")
    print(f"- Predicates: {predicates_output_dir}")