In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Predicates Generation


In [None]:
import os
import re
import time
import queue
import logging
import threading
from tqdm import tqdm
from collections import deque
from datetime import datetime
from google import genai

In [None]:
# Ensure the output folder exists
os.makedirs("content", exist_ok=True)

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("gemini_predicate_generator.log"),
        logging.StreamHandler()
    ]
)

In [None]:
class GeminiVisionApiManager:
    """
    Manages multiple Gemini API keys with rotation and rate limiting for vision tasks.
    Handles file uploads and content generation with automatic key switching.
    """

    def __init__(self, api_keys, calls_per_day=200, rate_limit_delay=4):
        """
        Initialize the API manager with multiple API keys.

        Args:
            api_keys (list): List of Gemini API keys.
            calls_per_day (int): Maximum number of calls allowed per key per day.
            rate_limit_delay (int): Seconds to wait between API calls (4 seconds for 15 RPM).
        """
        self.api_keys = deque(api_keys)
        self.calls_per_day = calls_per_day
        self.rate_limit_delay = rate_limit_delay

        # Track usage for each key
        self.usage_count = {key: 0 for key in api_keys}
        self.current_key = self.api_keys[0]
        self.client = genai.Client(api_key=self.current_key)

        # Set up a queue for API calls
        self.call_queue = queue.Queue()
        self.worker_thread = threading.Thread(target=self._process_queue)
        self.worker_thread.daemon = True
        self.worker_thread.start()

        # Cache for uploaded files to avoid re-uploading
        self.file_cache = {}

        logging.info(f"Vision API Manager initialized with {len(api_keys)} keys")

    def _rotate_key(self):
        """Rotate to the next available API key."""
        self.api_keys.rotate(1)
        self.current_key = self.api_keys[0]
        self.client = genai.Client(api_key=self.current_key)
        # Clear file cache when rotating keys as files are key-specific
        self.file_cache.clear()
        logging.info(f"Rotated to new API key (usage: {self.usage_count[self.current_key]})")

    def _find_available_key(self):
        """Find an API key that hasn't reached the daily limit."""
        initial_key = self.current_key

        if self.usage_count[self.current_key] < self.calls_per_day:
            return True

        for _ in range(len(self.api_keys)):
            self._rotate_key()
            if self.usage_count[self.current_key] < self.calls_per_day:
                return True
            if self.current_key == initial_key:
                return False

        return False

    def _process_queue(self):
        """Process the queue of API calls."""
        while True:
            try:
                task_type, args, kwargs, result_queue = self.call_queue.get()
                
                if not self._find_available_key():
                    result_queue.put({"error": "All API keys have reached their daily limit"})
                    self.call_queue.task_done()
                    continue

                try:
                    if task_type == "upload":
                        response = self.client.files.upload(**kwargs)
                        result_queue.put({"response": response})
                    elif task_type == "generate":
                        response = self.client.models.generate_content(*args, **kwargs)
                        result_queue.put({"response": response})
                    
                    self.usage_count[self.current_key] += 1
                    
                except Exception as e:
                    if "quota" in str(e).lower() or "rate limit" in str(e).lower():
                        self.usage_count[self.current_key] = self.calls_per_day
                        logging.warning(f"API key reached rate limit: {str(e)}")
                        result_queue.put({"error": f"Rate limit: {str(e)}"})
                    else:
                        logging.error(f"API call error: {str(e)}")
                        result_queue.put({"error": str(e)})
                
                time.sleep(self.rate_limit_delay)
                self.call_queue.task_done()
                
            except Exception as e:
                logging.error(f"Queue processing error: {str(e)}")
                continue

    def upload_file(self, file_path):
        """
        Upload a file to Gemini, with caching to avoid re-uploads.
        
        Args:
            file_path (str): Path to the file to upload
            
        Returns:
            The uploaded file object
        """
        # Use current key + file path as cache key
        cache_key = f"{self.current_key}:{file_path}"
        
        if cache_key in self.file_cache:
            return self.file_cache[cache_key]
        
        result_queue = queue.Queue()
        self.call_queue.put(("upload", [], {"file": file_path}, result_queue))
        result = result_queue.get()
        
        if "error" in result:
            raise Exception(result["error"])
            
        # Cache the uploaded file
        self.file_cache[cache_key] = result["response"]
        return result["response"]

    def generate_content(self, *args, **kwargs):
        """
        Make an API call to generate content, automatically handling key rotation.

        Returns:
            The response from the API call.
        """
        result_queue = queue.Queue()
        self.call_queue.put(("generate", args, kwargs, result_queue))
        result = result_queue.get()
        
        if "error" in result:
            raise Exception(result["error"])
        return result["response"]

    def reset_usage_counts(self):
        """Reset the usage counts for all keys (e.g., at the start of a new day)."""
        self.usage_count = {key: 0 for key in self.api_keys}
        self.file_cache.clear()  # Clear file cache when resetting
        logging.info("Reset API key usage counts and file cache")

    def get_usage_stats(self):
        """Get usage statistics for all keys."""
        total_used = sum(self.usage_count.values())
        total_available = len(self.api_keys) * self.calls_per_day
        return {
            "per_key": self.usage_count,
            "total_used": total_used,
            "total_available": total_available,
            "percent_used": (total_used / total_available) * 100 if total_available > 0 else 0
        }

In [None]:
def generate_predicates_with_reasoning(api_manager, image_path, question):
    """
    Generate geometric predicates with step-by-step reasoning using Gemini Vision.

    Args:
        api_manager: GeminiVisionApiManager instance
        image_path (str): Path to the geometry problem image
        question (str): Question to analyze

    Returns:
        dict: Contains 'reasoning', 'predicates', and 'raw_content'
    """
    try:
        # Upload the image file
        uploaded_file = api_manager.upload_file(image_path)
        
        # Enhanced prompt with reasoning requirements
        prompt = f"""You are an expert AI mathematician specializing in geometry. Your task is to analyze the geometric figure in the provided image and generate accurate geometric predicates (literals) that represent ALL the relationships, measurements, and properties shown in the diagram.

GEOMETRY PROBLEM IMAGE:
The image shows a geometric figure with various shapes, lines, angles, and measurements. Analyze this image carefully to understand all geometric relationships and constraints.

QUESTION:
{question}

YOUR TASK:
1. First, provide step-by-step reasoning showing your analysis process
2. Then, generate geometric predicates based on your analysis using the Guidelines below

STEP-BY-STEP ANALYSIS (Required):
Please follow this format for your reasoning:

STEP 1: QUESTION ANALYSIS AND REQUIREMENTS
- Carefully read and understand what the question is asking
- Identify the target variable(s) or property to be found
- Determine what geometric relationships are needed to solve the problem
- Note any specific theorems, formulas, or concepts the question hints at
- Identify which elements in the diagram are most relevant to the question

STEP 2: COMPREHENSIVE IMAGE ANALYSIS
- Identify ALL geometric shapes (circles, triangles, quadrilaterals, etc.)
- List ALL points, lines, and their labels/names
- Note ALL visible measurements, angles, and numerical values
- Identify ALL special markings (right angle symbols, parallel marks, congruent marks, equal marks, etc.)
- Look for implied constructions (perpendiculars, bisectors, tangents, chords, radii, etc.)

STEP 3: CIRCLE-SPECIFIC ANALYSIS (If circles are present)
- Identify the center and all points on the circle
- Determine which lines are radii, chords, diameters, or tangents
- Look for inscribed angles, central angles, and arc relationships
- Check for perpendicular relationships involving radii and chords
- Identify any equal radius relationships

STEP 4: ANGLE AND PERPENDICULARITY ANALYSIS
- Examine ALL angles shown in the diagram, both marked and unmarked
- Look for right angle indicators or perpendicular relationships
- Check for angle bisectors or special angle relationships
- Identify complementary, supplementary, or vertical angles
- Look for inscribed angles and their corresponding arcs

STEP 5: CONGRUENCE AND EQUALITY ANALYSIS
- Identify ALL equal lengths, angles, or shapes (look for tick marks, identical measurements)
- Check for congruent triangles or similar figures
- Look for equal radii in circles
- Identify parallel lines or equal distances

STEP 6: INTERSECTION AND POSITIONING ANALYSIS
- Determine where lines intersect and at what points
- Check if points lie on specific lines or circles
- Identify midpoints, centroids, or other special points
- Look for points that divide segments in specific ratios

STEP 7: CONSTRAINT AND RELATIONSHIP SYNTHESIS
- Combine observations to identify implicit relationships
- Look for theorem applications (Pythagorean, inscribed angle, etc.)
- Identify geometric constructions that create specific relationships
- Check for properties that follow from the given constraints

STEP 8: QUESTION-DRIVEN COMPLETENESS CHECK
- Ensure all information needed to solve the problem is captured
- Verify that key relationships for the solution are represented
- Double-check that no critical geometric properties are missed
- Confirm that the predicates will provide sufficient information for problem-solving
- **CROSS-REFERENCE WITH QUESTION**: Make sure all geometric elements mentioned in or required by the question are fully represented in your predicates

STEP 9: SOLUTION PATH VALIDATION
- Mentally trace through how the question could be solved using your identified relationships
- Ensure no missing links in the logical chain from given information to solution
- Add any implicit relationships that are necessary for the solution process
- Verify that common geometric theorems and principles are adequately represented

CRITICAL ANALYSIS GUIDELINES:
⚠️ **QUESTION-FIRST APPROACH**: Let the question guide your analysis - if the question asks about area, ensure all length relationships are captured; if about angles, ensure all angle relationships are identified.

⚠️ **LOOK FOR HIDDEN RELATIONSHIPS**: Many geometric problems have implicit perpendicular relationships, equal lengths, or special angle properties that aren't explicitly marked but are crucial for solving.

⚠️ **CIRCLE GEOMETRY FOCUS**: If the diagram contains circles, pay special attention to:
- Which points lie on the circle vs. inside/outside
- Perpendicular relationships between radii and chords
- Equal radius lengths
- Inscribed vs. central angles
- Tangent-radius perpendicularity

⚠️ **CONSTRUCTION INDICATORS**: Look for:
- Lines that appear to be perpendicular even without explicit markings
- Points that appear to be midpoints or special positions
- Equal lengths suggested by visual symmetry
- Angle relationships implied by the construction

⚠️ **SOLUTION-CRITICAL RELATIONSHIPS**: Based on the question, prioritize identifying:
- Relationships that directly connect to what's being asked
- Intermediate relationships needed for multi-step solutions
- Theorem prerequisites (e.g., if Pythagorean theorem is needed, ensure right angles are identified)

GUIDELINES:
***Follow these predicates to represent diagram literals.

**GEOMETRIC SHAPES:**
- Point: Point(A), Point($)
- Line: Line(A,B), Line(m), Line($)
- Angle: Angle(A,B,C), Angle(A), Angle(1), Angle($)
- Triangle: Triangle(A,B,C), Triangle($), Triangle($1,$2,$3)
- Quadrilateral: Quadrilateral(A,B,C,D), Quadrilateral($)
- Parallelogram: Parallelogram(A,B,C,D), Parallelogram(1), Parallelogram($)
- Square: Square(A,B,C,D), Square(1), Square($)
- Rectangle: Rectangle(A,B,C,D), Rectangle(1), Rectangle($)
- Rhombus: Rhombus(A,B,C,D), Rhombus(1), Rhombus($)
- Trapezoid: Trapezoid(A,B,C,D), Trapezoid(1), Trapezoid($)
- Kite: Kite(A,B,C,D), Kite(1), Kite($)
- Polygon: Polygon($)
- Pentagon: Pentagon(A,B,C,D,E), Pentagon($)
- Hexagon: Hexagon(A,B,C,D,E,F), Hexagon($)
- Heptagon: Heptagon(A,B,C,D,E,F,G), Heptagon($)
- Octagon: Octagon(A,B,C,D,E,F,G,H), Octagon($)
- Circle: Circle(A), Circle(1), Circle($)
- Arc: Arc(A,B), Arc(A,B,C), Arc($)
- Sector: Sector(O,A,B), Sector($)
- Shape: Shape($) // For unknown shapes or regions

**UNARY GEOMETRIC ATTRIBUTES:**
- RightAngle: RightAngle(Angle($))
- Right: Right(Triangle($)) // Right triangle
- Isosceles: Isosceles(Polygon($)) // Isosceles polygon
- Equilateral: Equilateral(Polygon($)) // Equilateral polygon
- Regular: Regular(Polygon($))
- Red: Red(Shape($))
- Blue: Blue(Shape($))
- Green: Green(Shape($))
- Shaded: Shaded(Shape($))

**GEOMETRIC ATTRIBUTES:**
- AreaOf: AreaOf(A)
- PerimeterOf: PerimeterOf(A) // Perimeter of polygon A
- RadiusOf: RadiusOf(A)
- DiameterOf: DiameterOf(A)
- CircumferenceOf: CircumferenceOf(A) // Perimeter of circle A
- AltitudeOf: AltitudeOf(A) // Altitude of polygon A
- HypotenuseOf: HypotenuseOf(A) // Hypotenuse of triangle A
- SideOf: SideOf(A) // Side of square A
- WidthOf: WidthOf(A) // Width of quadrilateral A
- HeightOf: HeightOf(A) // Height of quadrilateral A
- LegOf: LegOf(A) // Leg of trapezoid A
- BaseOf: BaseOf(A) // Base of polygon A
- MedianOf: MedianOf(A) // Median of polygon A
- IntersectionOf: IntersectionOf(A,B) // Intersection of shapes A and B
- MeasureOf: MeasureOf(A) // Measure of angle A
- LengthOf: LengthOf(A) // Length of line A
- ScaleFactorOf: ScaleFactorOf(A,B) // Scale factor of shape A to shape B

**BINARY GEOMETRIC RELATIONS:**
- PointLiesOnLine: PointLiesOnLine(Point($),Line($1,$2))
- PointLiesOnCircle: PointLiesOnCircle(Point($),Circle($))
- Parallel: Parallel(Line($),Line($))
- Perpendicular: Perpendicular(Line($),Line($))
- IntersectAt: IntersectAt(Line($),Line($),Line($),Point($))
- BisectsAngle: BisectsAngle(Line($),Angle($))
- Congruent: Congruent(Polygon($),Polygon($))
- Similar: Similar(Polygon($),Polygon($))
- Tangent: Tangent(Line($),Circle($))
- Secant: Secant(Line($),Circle($))
- CircumscribedTo: CircumscribedTo(Shape($),Shape($))
- InscribedIn: InscribedIn(Shape($),Shape($))

**A-IsXOf-B GEOMETRIC RELATIONS:**
- IsMidpointOf: IsMidpointOf(Point($),Line($)) // Point A is midpoint of line B
- IsCentroidOf: IsCentroidOf(Point($),Shape($)) // Point A is centroid of shape B
- IsIncenterOf: IsIncenterOf(Point($),Shape($)) // Point A is incenter of shape B
- IsRadiusOf: IsRadiusOf(Line($),Circle($)) // Line A is radius of circle B
- IsDiameterOf: IsDiameterOf(Line($),Circle($)) // Line A is diameter of circle B
- IsMidsegmentOf: IsMidsegmentOf(Line($),Triangle($)) // Line A is midsegment of triangle B
- IsChordOf: IsChordOf(Line($),Circle($)) // Line A is chord of circle B
- IsSideOf: IsSideOf(Line($),Polygon($)) // Line A is side of polygon B
- IsHypotenuseOf: IsHypotenuseOf(Line($),Triangle($)) // Line A is hypotenuse of triangle B
- IsPerpendicularBisectorOf: IsPerpendicularBisectorOf(Line($),Triangle($)) // Line A is perpendicular bisector of triangle B
- IsAltitudeOf: IsAltitudeOf(Line($),Triangle($)) // Line A is altitude of triangle B
- IsMedianOf: IsMedianOf(Line($),Quadrilateral($)) // Line A is median of quadrilateral B
- IsBaseOf: IsBaseOf(Line($),Quadrilateral($)) // Line A is base of quadrilateral B
- IsDiagonalOf: IsDiagonalOf(Line($),Quadrilateral($)) // Line A is diagonal of quadrilateral B
- IsLegOf: IsLegOf(Line($),Trapezoid($)) // Line A is leg of trapezoid B

**NUMERICAL ATTRIBUTES AND RELATIONS:**
- SinOf: SinOf(Var)
- CosOf: CosOf(Var)
- TanOf: TanOf(Var)
- CotOf: CotOf(Var)
- HalfOf: HalfOf(Var)
- SquareOf: SquareOf(Var)
- SqrtOf: SqrtOf(Var)
- RatioOf: RatioOf(Var), RatioOf(Var1,Var2)
- SumOf: SumOf(Var1,Var2,...)
- AverageOf: AverageOf(Var1,Var2,...)
- Add: Add(Var1,Var2,...)
- Mul: Mul(Var1,Var2,...)
- Sub: Sub(Var1,Var2,...)
- Div: Div(Var1,Var2,...)
- Pow: Pow(Var1,Var2)
- Equals: Equals(Var1,Var2)
- Find: Find(Var) // Find the value of the variable
- UseTheorem: UseTheorem(A_B_C)

VARIABLE NAMING CONVENTIONS:
- Use capital letters for points: A, B, C, D, etc.
- Use lowercase letters for lines when not defined by points: m, n, l, etc.
- Use numbers for unnamed shapes: 1, 2, 3, etc.
- Use $ for generic variables: $, $1, $2, etc.
- Use descriptive names when appropriate: base, height, radius, etc.

CRITICAL INSTRUCTIONS:
1. **BE EXTREMELY THOROUGH** - Missing relationships are the main cause of poor problem-solving performance
2. **QUESTION-DRIVEN ANALYSIS** - Use the question to guide which relationships are most critical to identify
3. **LOOK BEYOND THE OBVIOUS** - Many critical relationships are implied, not explicitly marked
4. Carefully examine the geometric figure in the image
5. Identify all points, lines, angles, shapes, and measurements shown
6. **MAKE EACH PREDICATE AS ATOMIC AS POSSIBLE** 
   – Decompose any complex or compound relationship into the simplest, individual geometric 
     statements (e.g., replace "Perpendicular(Line(A,B),Line(C,D))" with separate vector and dot-product or angle-equals-90° predicates)
7. Generate predicates that represent:
   - All geometric shapes present
   - All given measurements and their relationships
   - All geometric properties and constraints (including implied ones)
   - ALL relationships between different elements
   - All perpendicular relationships (marked and implied)
   - All equal lengths and angles (marked and implied)
   - **All relationships specifically needed to answer the question**
8. Always provide the step-by-step reasoning first
9. Then provide the predicates section with a clear section header
10. Follow the Guidelines above - these predicates are crucial for representing diagram literals
11. Each predicate must be on a separate line
12. Do not include quotation marks, extra symbols, or explanatory text in predicates
13. Only output predicates in the exact format: PredicateName(arguments)
14. **IMPORTANT: Do NOT include any question-related predicates**
15. Include only the given information, constraints, and geometric relationships visible in the diagram
16. Represent all visible geometric relationships, not derived solutions
17. The predicates should provide sufficient information for another system to solve the problem, but not the solution itself
18. **COMPLETENESS IS KEY** - Better to include extra relationships than miss critical ones
19. **QUESTION COMPLETENESS** - Ensure your predicates capture all relationships necessary for solving the specific question asked

Generate your complete analysis and predicates now:
"""

        # Generate content with the uploaded image and prompt
        response = api_manager.generate_content(
            model="gemini-2.0-flash",
            contents=[uploaded_file, prompt]
        )

        raw_content = response.text.strip()
        
        # Parse the response to separate reasoning and predicates
        reasoning_section = ""
        predicates_section = ""
        
        if "STEP-BY-STEP REASONING:" in raw_content and "GENERATED PREDICATES:" in raw_content:
            parts = raw_content.split("GENERATED PREDICATES:")
            reasoning_section = parts[0].replace("STEP-BY-STEP REASONING:", "").strip()
            predicates_section = parts[1].strip()
        elif "GENERATED PREDICATES:" in raw_content:
            parts = raw_content.split("GENERATED PREDICATES:")
            reasoning_section = "No explicit reasoning provided"
            predicates_section = parts[1].strip()
        else:
            # Try to extract predicates from the entire content
            reasoning_section = raw_content
            predicates_section = raw_content

        return {
            'reasoning': reasoning_section,
            'predicates': predicates_section,
            'raw_content': raw_content
        }
        
    except Exception as e:
        logging.error(f"Error processing image {image_path}: {e}")
        return {
            'reasoning': f'Error during reasoning: {str(e)}',
            'predicates': f'Error processing image: {str(e)}',
            'raw_content': f'Error: {str(e)}'
        }

In [None]:
def self_validate_predicates(api_manager, image_path, question, generated_predicates, reasoning):
    """
    Have the LLM validate its own generated predicates.
    
    Args:
        api_manager: GeminiVisionApiManager instance
        image_path (str): Path to the geometry problem image
        question (str): Original question
        generated_predicates (str): The predicates generated by the model
        reasoning (str): The reasoning process used
        
    Returns:
        dict: Contains validation judgment and scores
    """
    try:
        # Upload the image file (reuse cache if available)
        uploaded_file = api_manager.upload_file(image_path)
        
        validation_prompt = f"""You are now acting as a critical reviewer of your own work. You previously analyzed a geometry problem and generated predicates. Now you must validate your own output.

ORIGINAL PROBLEM:
Question: {question}

YOUR PREVIOUS REASONING:
{reasoning}

YOUR GENERATED PREDICATES:
{generated_predicates}

SELF-VALIDATION TASK:
Critically evaluate your own work by analyzing the following aspects:

1. COMPLETENESS ANALYSIS:
   - Does the image contain geometric elements that you missed?
   - Are all visible measurements and relationships captured?
   - Are there implicit relationships you should have included?

2. ACCURACY ANALYSIS:
   - Are all predicates mathematically correct?
   - Do the predicates accurately represent what's shown in the image?
   - Are there any contradictory or inconsistent predicates?

3. FORMAT ANALYSIS:
   - Are all predicates in the correct format: PredicateName(arguments)?
   - Are predicate names from the valid set of allowed predicates?
   - Are there any syntax errors or typos?

4. CONSISTENCY ANALYSIS:
   - Do the predicates logically support solving the problem?
   - Are there mathematical relationships that contradict each other?
   - Do numerical values make sense in context?

5. RELEVANCE ANALYSIS:
   - Are all predicates relevant to solving the problem?
   - Are there unnecessary or redundant predicates?
   - Do the predicates focus on what the question is asking?

PROVIDE YOUR VALIDATION JUDGMENT:

OVERALL ASSESSMENT: [EXCELLENT/GOOD/FAIR/POOR]

DETAILED ANALYSIS:

COMPLETENESS SCORE: [1-10] /10
Explanation: [Your assessment of completeness]

ACCURACY SCORE: [1-10] /10
Explanation: [Your assessment of accuracy]

FORMAT SCORE: [1-10] /10
Explanation: [Your assessment of format correctness]

CONSISTENCY SCORE: [1-10] /10
Explanation: [Your assessment of logical consistency]

RELEVANCE SCORE: [1-10] /10
Explanation: [Your assessment of relevance]

SPECIFIC ISSUES IDENTIFIED:
[List any specific problems you found]

MISSING ELEMENTS:
[List anything important that was missed]

INCORRECT ELEMENTS:
[List anything that appears to be wrong]

SUGGESTED IMPROVEMENTS:
[Provide specific suggestions for improvement]

CONFIDENCE IN PROBLEM SOLVING: [1-10] /10
Explanation: [How well do your predicates support solving the problem]

FINAL RECOMMENDATION: [ACCEPT/REVISE/REJECT]
Reasoning: [Brief explanation of your recommendation]
"""

        # Generate validation content
        response = api_manager.generate_content(
            model="gemini-2.0-flash",
            contents=[uploaded_file, validation_prompt]
        )

        validation_content = response.text.strip()
        
        # Parse scores from the validation content
        scores = {}
        try:
            # Extract scores using regex
            score_patterns = {
                'completeness': r'COMPLETENESS SCORE:\s*(\d+)',
                'accuracy': r'ACCURACY SCORE:\s*(\d+)',
                'format': r'FORMAT SCORE:\s*(\d+)',
                'consistency': r'CONSISTENCY SCORE:\s*(\d+)',
                'relevance': r'RELEVANCE SCORE:\s*(\d+)',
                'confidence': r'CONFIDENCE IN PROBLEM SOLVING:\s*(\d+)'
            }
            
            for score_name, pattern in score_patterns.items():
                match = re.search(pattern, validation_content, re.IGNORECASE)
                if match:
                    scores[score_name] = int(match.group(1))
                else:
                    scores[score_name] = 0
            
            # Extract overall assessment
            overall_match = re.search(r'OVERALL ASSESSMENT:\s*\[?([A-Z]+)\]?', validation_content, re.IGNORECASE)
            overall_assessment = overall_match.group(1) if overall_match else "UNKNOWN"
            
            # Extract final recommendation
            recommendation_match = re.search(r'FINAL RECOMMENDATION:\s*\[?([A-Z]+)\]?', validation_content, re.IGNORECASE)
            final_recommendation = recommendation_match.group(1) if recommendation_match else "UNKNOWN"
            
        except Exception as e:
            logging.warning(f"Error parsing validation scores: {e}")
            scores = {
                'completeness': 0, 'accuracy': 0, 'format': 0, 
                'consistency': 0, 'relevance': 0, 'confidence': 0
            }
            overall_assessment = "ERROR"
            final_recommendation = "ERROR"

        return {
            'validation_content': validation_content,
            'scores': scores,
            'overall_assessment': overall_assessment,
            'final_recommendation': final_recommendation,
            'average_score': sum(scores.values()) / len(scores) if scores else 0
        }
        
    except Exception as e:
        logging.error(f"Error in self-validation: {e}")
        return {
            'validation_content': f'Error during self-validation: {str(e)}',
            'scores': {'completeness': 0, 'accuracy': 0, 'format': 0, 'consistency': 0, 'relevance': 0, 'confidence': 0},
            'overall_assessment': 'ERROR',
            'final_recommendation': 'ERROR',
            'average_score': 0
        }

In [None]:
def extract_predicates(predicates_content):
    """
    Extract clean predicates from the model output.
    
    Args:
        predicates_content (str): The predicates section content
        
    Returns:
        list: List of clean predicates
    """
    if not predicates_content or not predicates_content.strip():
        return []
    
    # Clean the input: remove excessive whitespace but preserve structure
    content = re.sub(r'\s+', ' ', predicates_content.strip())
    
    predicates = []
    i = 0
    n = len(content)
    
    while i < n:
        # Skip whitespace and common separators
        while i < n and content[i] in ' \t\n\r,;.':
            i += 1
        
        if i >= n:
            break
            
        # Skip comments
        if i < n - 1 and content[i:i+2] == '//':
            # Skip C-style comments
            while i < n and content[i] != '\n':
                i += 1
            continue
        elif content[i] == '#':
            # Skip Python-style comments
            while i < n and content[i] != '\n':
                i += 1
            continue
        
        # Look for a potential predicate start
        start_pos = i
        predicate_buffer = ""
        
        # Collect characters until we find a complete predicate or reach end
        paren_count = 0
        in_string = False
        string_char = None
        escape_next = False
        found_opening_paren = False
        
        while i < n:
            char = content[i]
            predicate_buffer += char
            
            if escape_next:
                escape_next = False
            elif char == '\\':
                escape_next = True
            elif in_string:
                if char == string_char:
                    in_string = False
                    string_char = None
            elif char in '"\'':
                in_string = True
                string_char = char
            elif char == '(':
                paren_count += 1
                found_opening_paren = True
            elif char == ')':
                paren_count -= 1
                
                # Check if we have a complete predicate
                if paren_count == 0 and found_opening_paren:
                    candidate = predicate_buffer.strip()
                    if _is_valid_predicate(candidate):
                        predicates.append(candidate)
                    break
            elif not found_opening_paren and char in ',;\n' and paren_count == 0:
                # We hit a separator before finding a predicate, abandon this candidate
                break
            
            i += 1
        
        # If we didn't find a complete predicate, move to next character
        if paren_count != 0 or not found_opening_paren:
            i = start_pos + 1
        else:
            i += 1
    
    return _deduplicate_predicates(predicates)

def _is_valid_predicate(candidate):
    """
    Validates if a candidate string is a valid predicate.
    Optimized for geometric predicates like Point(A), Triangle(A,B,C), etc.
    
    Args:
        candidate (str): The candidate predicate string
        
    Returns:
        bool: True if valid predicate, False otherwise
    """
    if not candidate or not candidate.strip():
        return False
    
    candidate = candidate.strip()
    
    # Must start and end with proper characters
    if not candidate.endswith(')'):
        return False
    
    # Find the opening parenthesis
    paren_pos = candidate.find('(')
    if paren_pos == -1:
        return False
    
    # Extract predicate name
    predicate_name = candidate[:paren_pos].strip()
    
    # Validate predicate name: must start with letter, can contain letters, numbers, underscores
    if not re.match(r'^[A-Za-z][A-Za-z0-9_]*$', predicate_name):
        return False
    
    # Check for minimum reasonable length (avoid single char false positives)
    if len(predicate_name) < 2 and predicate_name.lower() not in ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']:
        return False
    
    # Extract arguments to ensure they're not empty
    args_part = candidate[paren_pos+1:-1].strip()
    if not args_part:  # Empty parentheses are invalid for geometric predicates
        return False
    
    # Check that parentheses are balanced
    return _check_balanced_parentheses(candidate)

def _check_balanced_parentheses(text):
    """
    Check if parentheses are properly balanced, considering string literals.
    
    Args:
        text (str): Text to check
        
    Returns:
        bool: True if balanced, False otherwise
    """
    paren_count = 0
    in_string = False
    string_char = None
    escape_next = False
    
    for char in text:
        if escape_next:
            escape_next = False
        elif char == '\\':
            escape_next = True
        elif in_string:
            if char == string_char:
                in_string = False
                string_char = None
        elif char in '"\'':
            in_string = True
            string_char = char
        elif char == '(':
            paren_count += 1
        elif char == ')':
            paren_count -= 1
            if paren_count < 0:
                return False
    
    return paren_count == 0 and not in_string

def _deduplicate_predicates(predicates):
    """
    Remove duplicate predicates while preserving order.
    
    Args:
        predicates (list): List of predicates that may contain duplicates
        
    Returns:
        list: List with duplicates removed
    """
    seen = set()
    result = []
    
    for predicate in predicates:
        # Normalize for comparison (remove extra spaces)
        normalized = re.sub(r'\s+', ' ', predicate.strip())
        if normalized not in seen:
            seen.add(normalized)
            result.append(predicate.strip())
    
    return result

In [None]:
def generate_predicates_with_retry(api_manager, image_path, question, max_retries=3):
    """
    Generate predicates with retry mechanism, reasoning, and self-validation.
    
    Args:
        api_manager: GeminiVisionApiManager instance
        image_path (str): Path to the geometry problem image
        question (str): Question to analyze
        max_retries (int): Maximum number of retry attempts
        
    Returns:
        tuple: (predicates_list, reasoning, validation_result, full_content)
    """
    problem_num = os.path.basename(image_path).split('.')[0]
    
    for attempt in range(max_retries):
        try:
            # Generate predicates with reasoning
            result = generate_predicates_with_reasoning(api_manager, image_path, question)
            reasoning = result['reasoning']
            predicates_content = result['predicates']
            raw_content = result['raw_content']
            
            # Extract clean predicates
            predicates = extract_predicates(predicates_content)
            
            if len(predicates) > 0:
                # Self-validate the generated predicates
                validation_result = self_validate_predicates(
                    api_manager, image_path, question, 
                    predicates_content, reasoning
                )
                
                logging.info(f"Attempt {attempt + 1} for problem {problem_num}: Generated {len(predicates)} predicates, Average score: {validation_result['average_score']:.1f}")
                
                # Accept if average score is reasonable or if we're on the last attempt
                if validation_result['average_score'] >= 6.0 or attempt == max_retries - 1:
                    return predicates, reasoning, validation_result, raw_content
                else:
                    logging.warning(f"Attempt {attempt + 1} for problem {problem_num}: Low validation score ({validation_result['average_score']:.1f}), retrying...")
                    
            else:
                logging.warning(f"Attempt {attempt + 1} for problem {problem_num}: No valid predicates extracted")
                
        except Exception as e:
            logging.error(f"Attempt {attempt + 1} failed for problem {problem_num}: {str(e)}")
    
    # If all attempts failed, return empty results
    return [], "All attempts failed", {
        'validation_content': 'All generation attempts failed',
        'scores': {'completeness': 0, 'accuracy': 0, 'format': 0, 'consistency': 0, 'relevance': 0, 'confidence': 0},
        'overall_assessment': 'FAILED',
        'final_recommendation': 'FAILED',
        'average_score': 0
    }, "Generation failed"

In [None]:
def main():
    """Main function to process geometry problems and generate predicates using Gemini Vision API."""
    
    # Initialize API keys
    api_keys = [
        "your api keys"
    ]

    # Initialize API manager
    api_manager = GeminiVisionApiManager(
        api_keys=api_keys,
        calls_per_day=200,
        rate_limit_delay=4  # 4 seconds to stay under 15 RPM
    )

    # Input directories
    image_dir = "image directory"
    questions_dir = "question directory"
    
    # Output directories
    predicates_output_dir = "/kaggle/working/predicates_output"
    reasoning_output_dir = "/kaggle/working/reasoning_output"
    validation_output_dir = "/kaggle/working/validation_output"
    combined_analysis_dir = "/kaggle/working/combined_analysis"
    
    for dir_path in [predicates_output_dir, reasoning_output_dir, validation_output_dir, combined_analysis_dir]:
        os.makedirs(dir_path, exist_ok=True)
    
    logging.info("Starting Gemini Vision geometry predicate generation with reasoning and self-validation...")
    
    # Process problems from 2401 to 3001 (change range as needed)
    for num in tqdm(range(2401, 3002), desc="Processing problems"):
        num_str = str(num)
        
        # Paths to input files
        image_path = os.path.join(image_dir, f"{num_str}.png")
        ques_path = os.path.join(questions_dir, f"{num_str}.txt")
        
        # Check if all required files exist
        if not all(os.path.exists(path) for path in [image_path, ques_path]):
            print(f"Skipping problem {num_str}: Missing input files")
            logging.warning(f"Skipping problem {num_str}: Missing input files")
            continue
        
        # Read inputs
        try:
            with open(ques_path, "r", encoding='utf-8') as f:
                question = f.read().strip()
        except Exception as e:
            print(f"Error reading files for problem {num_str}: {e}")
            logging.error(f"Error reading files for problem {num_str}: {e}")
            continue
        
        # Generate predicates with reasoning and validation
        try:
            predicates, reasoning, validation_result, raw_content = generate_predicates_with_retry(
                api_manager, image_path, question
            )
            
            # Write predicates to output file (clean format)
            predicates_out_path = os.path.join(predicates_output_dir, f"{num_str}.txt")
            with open(predicates_out_path, "w", encoding='utf-8') as f:
                for predicate in predicates:
                    f.write(predicate + "\n")
            
            # Write reasoning to separate file
            reasoning_out_path = os.path.join(reasoning_output_dir, f"{num_str}.txt")
            with open(reasoning_out_path, "w", encoding='utf-8') as f:
                f.write("=" * 80 + "\n")
                f.write(f"PROBLEM {num_str} - STEP-BY-STEP REASONING\n")
                f.write("=" * 80 + "\n")
                f.write(f"Question: {question}\n")
                f.write("=" * 80 + "\n\n")
                f.write(reasoning)
            
            # Write validation results to separate file
            validation_out_path = os.path.join(validation_output_dir, f"{num_str}.txt")
            with open(validation_out_path, "w", encoding='utf-8') as f:
                f.write("=" * 80 + "\n")
                f.write(f"PROBLEM {num_str} - SELF-VALIDATION RESULTS\n")
                f.write("=" * 80 + "\n")
                f.write(f"Overall Assessment: {validation_result['overall_assessment']}\n")
                f.write(f"Final Recommendation: {validation_result['final_recommendation']}\n")
                f.write(f"Average Score: {validation_result['average_score']:.2f}/10\n\n")
                
                f.write("DETAILED SCORES:\n")
                for score_name, score_value in validation_result['scores'].items():
                    f.write(f"  {score_name.capitalize()}: {score_value}/10\n")
                
                f.write("\nFULL VALIDATION CONTENT:\n")
                f.write("-" * 50 + "\n")
                f.write(validation_result['validation_content'])
            
            # Write combined analysis (all information in one file)
            combined_out_path = os.path.join(combined_analysis_dir, f"{num_str}.txt")
            with open(combined_out_path, "w", encoding='utf-8') as f:
                f.write("=" * 100 + "\n")
                f.write(f"COMPLETE ANALYSIS FOR PROBLEM {num_str}\n")
                f.write("=" * 100 + "\n\n")
                
                f.write("PROBLEM DETAILS:\n")
                f.write("-" * 30 + "\n")
                f.write(f"Question: {question}\n")
                f.write(f"Image Path: {image_path}\n\n")
                
                f.write("GENERATED PREDICATES:\n")
                f.write("-" * 30 + "\n")
                if predicates:
                    for i, predicate in enumerate(predicates, 1):
                        f.write(f"{i:2d}. {predicate}\n")
                else:
                    f.write("No predicates generated\n")
                f.write(f"\nTotal Predicates: {len(predicates)}\n\n")
                
                f.write("STEP-BY-STEP REASONING:\n")
                f.write("-" * 30 + "\n")
                f.write(reasoning + "\n\n")
                
                f.write("SELF-VALIDATION SUMMARY:\n")
                f.write("-" * 30 + "\n")
                f.write(f"Overall Assessment: {validation_result['overall_assessment']}\n")
                f.write(f"Final Recommendation: {validation_result['final_recommendation']}\n")
                f.write(f"Average Score: {validation_result['average_score']:.2f}/10\n\n")
                
                f.write("VALIDATION SCORES BREAKDOWN:\n")
                for score_name, score_value in validation_result['scores'].items():
                    f.write(f"  • {score_name.capitalize()}: {score_value}/10\n")
                
                f.write("\nDETAILED VALIDATION ANALYSIS:\n")
                f.write("-" * 30 + "\n")
                f.write(validation_result['validation_content'] + "\n\n")
                
                f.write("RAW LLM OUTPUT:\n")
                f.write("-" * 30 + "\n")
                f.write(raw_content)
            
            # Log progress
            print(f"✓ Problem {num_str}: {len(predicates)} predicates, Score: {validation_result['average_score']:.1f}/10")
            logging.info(f"Successfully processed problem {num_str}: {len(predicates)} predicates generated")
            
        except Exception as e:
            error_msg = f"Error processing problem {num_str}: {str(e)}"
            print(f"✗ {error_msg}")
            logging.error(error_msg)
            
            # Write error information to files
            for output_dir, filename_prefix in [(predicates_output_dir, "predicates"), 
                                              (reasoning_output_dir, "reasoning"), 
                                              (validation_output_dir, "validation"),
                                              (combined_analysis_dir, "combined")]:
                error_file = os.path.join(output_dir, f"{num_str}.txt")
                with open(error_file, "w", encoding='utf-8') as f:
                    f.write(f"ERROR PROCESSING PROBLEM {num_str}\n")
                    f.write("=" * 50 + "\n")
                    f.write(f"Error: {str(e)}\n")
                    f.write(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        
        # Print usage statistics periodically
        if num % 10 == 0:
            stats = api_manager.get_usage_stats()
            print(f"\nAPI Usage Statistics after problem {num}:")
            print(f"  Total calls used: {stats['total_used']}/{stats['total_available']}")
            print(f"  Usage percentage: {stats['percent_used']:.1f}%")
            print(f"  Per-key usage: {stats['per_key']}")
            print()
    
    # Final statistics and summary
    print("\n" + "=" * 80)
    print("PROCESSING COMPLETE")
    print("=" * 80)
    
    final_stats = api_manager.get_usage_stats()
    print(f"Final API Usage Statistics:")
    print(f"  Total calls used: {final_stats['total_used']}/{final_stats['total_available']}")
    print(f"  Usage percentage: {final_stats['percent_used']:.1f}%")
    print(f"  Per-key usage:")
    for key, usage in final_stats['per_key'].items():
        print(f"    {key[-10:]}: {usage} calls")
    
    print(f"\nOutput directories:")
    print(f"  Predicates: {predicates_output_dir}")
    print(f"  Reasoning: {reasoning_output_dir}")
    print(f"  Validation: {validation_output_dir}")
    print(f"  Combined Analysis: {combined_analysis_dir}")
    
    logging.info("Geometry predicate generation process completed successfully")

In [None]:
if __name__ == "__main__":
    main()