In [None]:
import json
import os
import re
import requests
import time
from typing import Dict, List, Any

GEMINI_API_KEY = "AIzaSyDsFFc9Oboi1vBchAgFTZzvlTuPUwtmTVo"
import google.generativeai as genai

# You'll need to set up your API keys as environment variables
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
CLAUDE_API_KEY = os.environ.get("CLAUDE_API_KEY")

# API endpoints
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent"
CLAUDE_URL = "https://api.anthropic.com/v1/messages"

def load_problems(file_path: str) -> List[Dict[str, Any]]:
    """Load problems from a JSON file."""
    with open(file_path, 'r') as f:
        return json.load(f)

def call_gemini_api(problem: str) -> str:
    """Call the Gemini API to solve a mathematical problem."""
    prompt = f"""
    Please solve this math problem step by step and provide the final answer clearly:
    {problem}
    
    Make sure to explain your reasoning and highlight the final answer at the end.
    """
    
    headers = {
        "Content-Type": "application/json",
    }
    
    data = {
        "contents": [{
            "parts": [{
                "text": prompt
            }]
        }],
        "generationConfig": {
            "temperature": 0.2,
            "maxOutputTokens": 1024
        }
    }
    
    url = f"{GEMINI_URL}?key={GEMINI_API_KEY}"
    
    # Implement retries with exponential backoff
    max_retries = 3
    retry_delay = 1
    
    for attempt in range(max_retries):
        try:
            response = requests.post(url, headers=headers, json=data)
            response.raise_for_status()
            response_json = response.json()
            
            # Extract the text from the response
            if "candidates" in response_json and response_json["candidates"]:
                candidate = response_json["candidates"][0]
                if "content" in candidate and "parts" in candidate["content"]:
                    parts = candidate["content"]["parts"]
                    return parts[0]["text"]
            
            return "Error: Unable to extract answer from response"
        
        except requests.exceptions.RequestException as e:
            if attempt == max_retries - 1:
                return f"Error calling Gemini API: {str(e)}"
            
            time.sleep(retry_delay)
            retry_delay *= 2  # Exponential backoff
    
    return "Error: Maximum retries exceeded"

def extract_answer(response: str) -> str:
    """Extract the numerical answer from the response."""
    # Look for patterns like "The answer is X" or "Final answer: X"
    answer_patterns = [
        r"final answer[:\s]+([0-9]+)",
        r"the answer is[:\s]+([0-9]+)",
        r"result is[:\s]+([0-9]+)",
        r"we get[:\s]+([0-9]+)",
        r"equals[:\s]+([0-9]+)"
    ]
    
    for pattern in answer_patterns:
        matches = re.search(pattern, response.lower())
        if matches:
            return matches.group(1)
    
    # If no clear pattern, try to find the last number in the text
    numbers = re.findall(r'\b(\d+)\b', response)
    if numbers:
        return numbers[-1]
    
    return "Unable to extract answer"

def call_claude_api(problem: str, gemini_answer: str) -> str:
    """Call the Claude API to verify a mathematical solution."""
    prompt = f"""
    Please verify if this solution to the math problem is correct. 
    
    Problem:
    {problem}
    
    Proposed answer: {gemini_answer}
    
    Is this answer correct? Please solve the problem step by step and confirm whether the answer is right or wrong.
    If it's wrong, provide the correct answer.
    """
    
    headers = {
        "Content-Type": "application/json",
        "x-api-key": CLAUDE_API_KEY,
        "anthropic-version": "2023-06-01"
    }
    
    data = {
        "model": "claude-3-haiku-20240307",
        "max_tokens": 1000,
        "messages": [
            {"role": "user", "content": prompt}
        ]
    }
    
    # Implement retries with exponential backoff
    max_retries = 3
    retry_delay = 1
    
    for attempt in range(max_retries):
        try:
            response = requests.post(CLAUDE_URL, headers=headers, json=data)
            response.raise_for_status()
            response_json = response.json()
            
            if "content" in response_json and len(response_json["content"]) > 0:
                return response_json["content"][0]["text"]
            
            return "Error: Unable to extract answer from Claude response"
        
        except requests.exceptions.RequestException as e:
            if attempt == max_retries - 1:
                return f"Error calling Claude API: {str(e)}"
            
            time.sleep(retry_delay)
            retry_delay *= 2  # Exponential backoff
    
    return "Error: Maximum retries exceeded"

def is_answer_correct(claude_response: str) -> bool:
    """Determine if Claude confirmed the answer as correct."""
    positive_indicators = [
        "correct",
        "right",
        "accurate",
        "yes",
        "confirmed",
        "verification successful"
    ]
    
    negative_indicators = [
        "incorrect",
        "wrong",
        "inaccurate",
        "error",
        "mistake",
        "not correct"
    ]
    
    # Count positive and negative indicators
    positive_count = sum(1 for indicator in positive_indicators if indicator in claude_response.lower())
    negative_count = sum(1 for indicator in negative_indicators if indicator in claude_response.lower())
    
    # If there are more positive than negative indicators, consider it correct
    return positive_count > negative_count

def main(json_file_path: str):
    """Main function to process the problems and verify solutions."""
    # Load problems
    problems = load_problems(json_file_path)
    
    # Prepare results
    results = []
    
    for i, problem_data in enumerate(problems):
        print(f"Processing problem {i+1}/{len(problems)}: {problem_data['unique_id']}")
        
        # Extract problem
        modified_problem = problem_data["modified_problem"]
        original_problem = problem_data["original_problem"]
        original_answer = problem_data["original_answer"]
        problem_type = problem_data["problem_type"]
        
        # Solve with Gemini
        gemini_response = call_gemini_api(modified_problem)
        gemini_answer = extract_answer(gemini_response)
        
        # Verify with Claude
        claude_response = call_claude_api(modified_problem, gemini_answer)
        is_correct = is_answer_correct(claude_response)
        
        # Store results
        result = {
            "problem_id": problem_data["unique_id"],
            "problem_type": problem_type,
            "modified_problem": modified_problem,
            "original_problem": original_problem,
            "original_answer": original_answer,
            "gemini_answer": gemini_answer,
            "gemini_response": gemini_response,
            "claude_verification": claude_response,
            "is_correct": is_correct
        }
        
        results.append(result)
        
        # Save results after each problem (in case of interruption)
        with open("math_problem_results.json", "w") as f:
            json.dump(results, f, indent=2)
        
        # Avoid rate limiting
        time.sleep(1)
    
    # Generate summary
    correct_count = sum(1 for r in results if r["is_correct"])
    total_count = len(results)
    accuracy = (correct_count / total_count) * 100 if total_count > 0 else 0
    
    summary = {
        "total_problems": total_count,
        "correct_solutions": correct_count,
        "accuracy_percentage": accuracy,
        "problem_type_breakdown": {}
    }
    
    # Breakdown by problem type
    problem_types = set(r["problem_type"] for r in results)
    for p_type in problem_types:
        type_results = [r for r in results if r["problem_type"] == p_type]
        type_correct = sum(1 for r in type_results if r["is_correct"])
        type_total = len(type_results)
        type_accuracy = (type_correct / type_total) * 100 if type_total > 0 else 0
        
        summary["problem_type_breakdown"][p_type] = {
            "total": type_total,
            "correct": type_correct,
            "accuracy": type_accuracy
        }
    
    # Save summary
    with open("math_problems_summary.json", "w") as f:
        json.dump(summary, f, indent=2)
    
    print(f"Processing complete. Accuracy: {accuracy:.2f}% ({correct_count}/{total_count})")


In [None]:
if __name__ == "__main__":
    import sys
    
    if len(sys.argv) != 2:
        print("Usage: python solve_math_problems.py <json_file_path>")
        sys.exit(1)
    
    json_file_path = sys.argv[1]
    main(json_file_path)