In [1]:
!pip install langchain langchain-openai langchain-anthropic langchain-mistralai langgraph requests python-dotenv

# Import required libraries
import os
import requests
import json
from typing import Dict, List, Any, TypedDict
from langgraph.graph import StateGraph, START, END
from langchain.tools import tool
import time
!pip install langchain-google-genai python-dotenv





In [2]:

import getpass
import os
os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API key: ")



Enter your Gemini API key: ··········


In [3]:
DEFECT_CATEGORIES = [
    "off_by_one", "incorrect_operator", "missing_condition", "wrong_variable",
    "incorrect_loop", "missing_statement", "wrong_order", "incorrect_comparison",
    "missing_initialization", "wrong_data_structure", "incorrect_return",
    "missing_edge_case", "wrong_algorithm", "other"
]

QUIXBUGS_PROGRAMS = [
    "breadth_first_search", "depth_first_search", "detect_cycle",
    "find_first_in_sorted", "find_in_sorted", "gcd", "get_factors",
    "hanoi", "is_valid_parenthesization", "kheapsort", "knapsack",
    "kth", "levenshtein", "lis", "longest_common_subsequence",
    "max_sublist_sum", "mergesort", "next_palindrome", "next_permutation",
    "pascal", "possible_change", "powerset", "quicksort", "reverse_linked_list",
    "rpn_eval", "shortest_path_length", "shortest_path_lengths", "sieve",
    "sqrt", "subsequences", "substring", "surrogate_count", "to_base",
    "topological_ordering", "wrap", "bitcount", "bucketsort", "flatten",
    "shunting_yard", "node"
]

print(f" Configuration loaded with {len(DEFECT_CATEGORIES)} defect categories")
print(f" {len(QUIXBUGS_PROGRAMS)} QuixBugs programs available for processing")


 Configuration loaded with 14 defect categories
 40 QuixBugs programs available for processing


In [4]:
class AgentState(TypedDict):
    program_name: str
    buggy_code: str
    defect_category: str
    code_understanding: str
    fixed_code: str
    fix_explanation: str
    messages: List[Dict[str, Any]]
    current_step: str

print("State management class defined")


State management class defined


In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os

if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ:
    del os.environ["GOOGLE_APPLICATION_CREDENTIALS"]

os.environ["GEMINI_API_KEY"] = "API_KEY"

gemini_llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=os.getenv("GEMINI_API_KEY"),
    temperature=0.2
)


try:
    response = gemini_llm.invoke("Hello, test message")
    print("Success! Connection working.")
except Exception as e:
    print(f"Still getting error: {e}")


Success! Connection working.


In [6]:
import time

def invoke_with_retry(llm, prompt, max_retries=5, base_delay=5):
    for attempt in range(max_retries):
        try:
            response = llm.invoke(prompt)
            return response
        except Exception as e:
            if "429" in str(e) or "Too Many Requests" in str(e):
                delay = base_delay * (2 ** attempt)
                print(f"429 error encountered. Waiting {delay} seconds before retrying...")
                time.sleep(delay)
            else:
                raise
    raise Exception("Max retries exceeded due to repeated 429 errors.")


In [7]:
@tool
def fetch_buggy_code(program_name: str) -> Dict[str, Any]:
    """Fetch buggy code and test cases from the QuixBugs repository"""
    base_url = "https://raw.githubusercontent.com/RumbleJack56/Code-Refactoring-QuixBugs/master"

    try:
        code_url = f"{base_url}/python_programs/{program_name}.py"
        code_response = requests.get(code_url)

        if code_response.status_code != 200:
            return {"error": f"Could not fetch code for {program_name}", "success": False}

        buggy_code = code_response.text

        test_url = f"{base_url}/json_testcases/{program_name}.json"
        test_response = requests.get(test_url)

        test_cases = []
        if test_response.status_code == 200:
            test_lines = test_response.text.strip().splitlines()
            test_cases = [json.loads(line) for line in test_lines if line.strip()]


        return {
            "buggy_code": buggy_code,
            "test_cases": test_cases,
            "success": True
        }

    except Exception as e:
        return {"error": str(e), "success": False}

print(" GitHub data fetcher tools defined")



 GitHub data fetcher tools defined


In [8]:
def agent1_fetch_and_categorize(state: AgentState) -> AgentState:
    """Agent 1: Fetch buggy code and categorize defect"""

    print(f"Agent 1: Fetching and categorizing {state['program_name']}...")

    fetch_result = fetch_buggy_code.invoke(state["program_name"])

    if not fetch_result.get("success", False):
        raise Exception(fetch_result.get("error", "Unknown error during fetch"))

    state["buggy_code"] = fetch_result["buggy_code"]
    state["test_cases"] = fetch_result["test_cases"]

    categorization_prompt = f"""You are a defect clssifying agent. Remembering the 14 defects from {DEFECT_CATEGORIES},
     I need you to very carefully assess the code and return the defect category in the following:


**Code:**
{state['buggy_code']}
"""

    response = invoke_with_retry(gemini_llm, categorization_prompt)

    defect_category = None
    for category in DEFECT_CATEGORIES:
        if category in response.content.lower():
            defect_category = category
            break

    if not defect_category:
        defect_category = "other"

    state["defect_category"] = defect_category
    return state


In [9]:
def agent2_understand_code(state: AgentState) -> AgentState:
    print(f"Agent 2: Understanding code structure...")

    prompt = f"""You are an assisstant used only for understanding the context of the given code, what it is meant to do by using the defect category
    analyzed earlier. You are meant to understand the purpose of the code and the structure. Make use of comments in the given code:
**Program Name:** {state['program_name']}
**Defect Category:** {state['defect_category']}

**Buggy Code:**
{state['buggy_code']}
"""
    try:
        response = invoke_with_retry(gemini_llm, prompt)

        state['code_understanding'] = response.content
        state['understanding_prompt'] = prompt
    except Exception as e:
        state['code_understanding'] = f"Error in understanding: {str(e)}"

    return state


In [10]:
def agent3_fix_code(state: AgentState) -> AgentState:
    """Agent 3: Fix code and provide explanation using Mistral"""
    print(f"Agent 3: Fixing the code...")

    prompt = f"""You are a code-fixing assisstant. I will give you examples of single-line fixes for the reocurring defect categories that I want you to understand to help in fixing the program. But do not solely rely on these as there might be exceptions and the example fixes I will be giving are generalizations.
    1. Defect: Off-by-one
       Example: arr = [10, 20, 30]
                for i in range(len(arr) + 1):
                print(arr[i])
                fix: to iterate through the entire list the code will be fixed by replacing 'len(arr)+1' with len(arr)
    2.Defect: Incorrect Variable
      Example:def bucketsort(arr, k):
                   counts = [0] * k
                   for x in arr:
                      counts[x] += 1
                   sorted_arr = []
                   for i, count in enumerate(arr):
                      sorted_arr.extend([i] * count)

                   return sorted_arr
      Fix:counts tells how many times each number appears. so replaace 'enumerate(arr)' with 'enumerate(counts)'
    3.Defect: Missing Function call
      Example:
      def max_sublist_sum(arr):
            max_ending_here = 0
            max_so_far = 0
            for x in arr:
                  max_ending_here = max_ending_here + x
                  max_so_far = max(max_so_far, max_ending_here)
            return max_so_far
            Fix: max_ending_here = max(0, max_ending_here + x). this function call is missing
     4.Defect: Variable swap
       Example: def gcd(a, b):
                  if b == 0:
                     return a
                  else:
                     return gcd(a % b, b)
              Fix: Here, variables a & b are swapped in the final return statement. The correct order is: return gcd(b, a % b)
      5.Defect: Missing edge case
        Example: def detect_cycle(node):
                   hare = tortoise = node
                   while True:
                   if hare.successor is None:      (this should be: if hare is None or hare.successor is None)
                         return False
                   tortoise = tortoise.successor
                   hare = hare.successor.successor
                   if hare is tortoise:
                         return True
              Fix: You're trying to access .successor.successor without checking if it exists. This can crash if the list ends (i.e., there's no cycle).


**Program Name:** {state['program_name']}
**Defect Category:** {state['defect_category']}
**Code Analysis:** {state['code_understanding']}

**Buggy Code:**
{state['buggy_code']}

Please provide:
- The fixed code
- A brief explanation of the fix
"""

    try:
        response = invoke_with_retry(gemini_llm, prompt)

        parts = response.content.split("Explanation:")
        state['fixed_code'] = parts[0].strip()
        state['fix_explanation'] = parts[1].strip() if len(parts) > 1 else "No explanation provided"
        state['fix_prompt'] = prompt
    except Exception as e:
        state['fixed_code'] = f"Error in fixing: {str(e)}"

    return state


In [11]:
def create_code_correction_workflow():
    """Create the multi-agent workflow using LangGraph"""

    workflow = StateGraph(AgentState)

    workflow.add_node("fetch_categorize", agent1_fetch_and_categorize)
    workflow.add_node("understand_code", agent2_understand_code)
    workflow.add_node("fix_code", agent3_fix_code)

    workflow.add_edge(START, "fetch_categorize")
    workflow.add_edge("fetch_categorize", "understand_code")
    workflow.add_edge("understand_code", "fix_code")
    workflow.add_edge("fix_code", END)

    return workflow.compile()

code_correction_graph = create_code_correction_workflow()
print("Multi-agent workflow created successfully")


Multi-agent workflow created successfully


In [12]:
def process_buggy_program(program_name: str) -> Dict[str, Any]:
    """Process a single buggy program through the multi-agent pipeline"""

    print(f"\n Starting processing for: {program_name}")

    initial_state = AgentState(
        program_name=program_name,
        buggy_code="",
        defect_category="",
        code_understanding="",
        fixed_code="",
        fix_explanation="",
        messages=[],
        current_step="initialized"
    )

    try:
        final_state = code_correction_graph.invoke(initial_state)

        print(f"Processing completed for {program_name}")

        return {
            "success": True,
            "program_name": program_name,
            "defect_category": final_state["defect_category"],
            "fixed_code": final_state["fixed_code"],
            "fix_explanation": final_state["fix_explanation"],
            "workflow_messages": final_state["messages"]
        }

    except Exception as e:
        print(f"Processing failed for {program_name}: {str(e)}")
        return {
            "success": False,
            "program_name": program_name,
            "error": str(e)
        }



print("Execution functions defined")


Execution functions defined


In [13]:
import os

def save_fixed_code(program_name, fixed_code):
    os.makedirs('fixed_programs', exist_ok=True)

    code = fixed_code.strip()
    if code.startswith("```"):
        code = code.split('```')[1]
        if code.startswith('python'):
            code = code[len('python'):].lstrip('\n')

    file_path = os.path.join('fixed_programs', f'{program_name}.py')
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(code)
    print(f"Saved fixed code to: {file_path}")


In [14]:
!git clone https://github.com/RumbleJack56/Code-Refactoring-QuixBugs.git
%cd Code-Refactoring-QuixBugs


Cloning into 'Code-Refactoring-QuixBugs'...
remote: Enumerating objects: 1278, done.[K
remote: Counting objects: 100% (480/480), done.[K
remote: Compressing objects: 100% (239/239), done.[K
remote: Total 1278 (delta 324), reused 280 (delta 240), pack-reused 798 (from 1)[K
Receiving objects: 100% (1278/1278), 1.17 MiB | 7.69 MiB/s, done.
Resolving deltas: 100% (719/719), done.
/content/Code-Refactoring-QuixBugs


In [17]:
test_program = "hanoi"
result = process_buggy_program(test_program)

print(f"Program: {result['program_name']}")
print(f"Success: {result['success']}")

if result['success']:
    print(f"Defect Category: {result['defect_category']}")
    print(f"\nFixed Code:\n{result['fixed_code']}")
    save_fixed_code(result['program_name'], result['fixed_code'])
else:
    print(f"Error: {result.get('error', 'Unknown error')}")



 Starting processing for: hanoi
Agent 1: Fetching and categorizing hanoi...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...
Processing completed for hanoi
Program: hanoi
Success: True
Defect Category: incorrect_return

Fixed Code:
```python
def hanoi(height, start=1, end=3):
    steps = []
    if height > 0:
        helper = ({1, 2, 3} - {start} - {end}).pop()
        steps.extend(hanoi(height - 1, start, helper))
        steps.append((start, end))
        steps.extend(hanoi(height - 1, helper, end))

    return steps


"""
Towers of Hanoi
hanoi
 

An algorithm for solving the Towers of Hanoi puzzle.  Three pegs exist, with a stack of differently-sized
disks beginning on one peg, ordered from smallest on top to largest on bottom.  The goal is to move the
entire stack to a different peg via a series of steps.  Each step must move a single disk from one peg to
another. At no point may a disk be placed on top of another smaller disk.

Input:
    height: The height of

In [19]:
import json
from tqdm import tqdm
import subprocess
from typing import List, Dict, Any

def process_all_programs() -> List[Dict[str, Any]]:
    """Process all QuixBugs programs through the pipeline"""
    results = []


    for program in tqdm(QUIXBUGS_PROGRAMS, desc="Processing Programs"):
        try:
            result = process_buggy_program(program)

            if result.get("success") and "fixed_code" in result:
                save_fixed_code(program, result["fixed_code"])
                #run_tester_for_program(program)
            success = result.get("success", False)
            print(f"[{program}] - {'SUCCESS' if success else 'FAILURE'}")

            results.append({
                "program_name": program,
                "success": result.get("success", False),
            })

        except Exception as e:
            results.append({
                "program_name": program,
                "success": False,
                "error": str(e)
            })

    return results


def calculate_metrics(results: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Calculate basic success/failure metrics"""
    total_programs = len(results)
    successful_fixes = sum(1 for r in results if r.get("success"))
    failed_programs = total_programs - successful_fixes
    success_rate = (successful_fixes / total_programs) * 100 if total_programs > 0 else 0

    return {
        "total_programs": total_programs,
        "successful_fixes": successful_fixes,
        "failed_programs": failed_programs,
        "success_rate_percent": round(success_rate, 2),
    }


def main():
    results = process_all_programs()

    metrics = calculate_metrics(results)

    with open("quixbugs_results.json", "w") as f:
        json.dump({
            "metrics": metrics,
            "detailed_results": results
        }, f, indent=2)

    print(f"Programs Processed: {metrics['total_programs']}")
    print(f"Successful Fixes: {metrics['successful_fixes']} ({metrics['success_rate_percent']}%)")
    print(f"Failed Fixes: {metrics['failed_programs']}")


if __name__ == "__main__":
    main()


Processing Programs:   0%|          | 0/40 [00:00<?, ?it/s]


 Starting processing for: breadth_first_search
Agent 1: Fetching and categorizing breadth_first_search...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:   2%|▎         | 1/40 [00:08<05:36,  8.62s/it]

Processing completed for breadth_first_search
Saved fixed code to: fixed_programs/breadth_first_search.py
[breadth_first_search] - SUCCESS

 Starting processing for: depth_first_search
Agent 1: Fetching and categorizing depth_first_search...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:   5%|▌         | 2/40 [00:15<04:37,  7.31s/it]

Processing completed for depth_first_search
Saved fixed code to: fixed_programs/depth_first_search.py
[depth_first_search] - SUCCESS

 Starting processing for: detect_cycle
Agent 1: Fetching and categorizing detect_cycle...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:   8%|▊         | 3/40 [00:20<03:59,  6.47s/it]

Processing completed for detect_cycle
Saved fixed code to: fixed_programs/detect_cycle.py
[detect_cycle] - SUCCESS

 Starting processing for: find_first_in_sorted
Agent 1: Fetching and categorizing find_first_in_sorted...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  10%|█         | 4/40 [00:30<04:37,  7.72s/it]

Processing completed for find_first_in_sorted
Saved fixed code to: fixed_programs/find_first_in_sorted.py
[find_first_in_sorted] - SUCCESS

 Starting processing for: find_in_sorted
Agent 1: Fetching and categorizing find_in_sorted...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  12%|█▎        | 5/40 [00:34<03:52,  6.65s/it]

Processing completed for find_in_sorted
Saved fixed code to: fixed_programs/find_in_sorted.py
[find_in_sorted] - SUCCESS

 Starting processing for: gcd
Agent 1: Fetching and categorizing gcd...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  15%|█▌        | 6/40 [00:39<03:27,  6.10s/it]

Processing completed for gcd
Saved fixed code to: fixed_programs/gcd.py
[gcd] - SUCCESS

 Starting processing for: get_factors
Agent 1: Fetching and categorizing get_factors...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  18%|█▊        | 7/40 [00:45<03:19,  6.04s/it]

Processing completed for get_factors
Saved fixed code to: fixed_programs/get_factors.py
[get_factors] - SUCCESS

 Starting processing for: hanoi
Agent 1: Fetching and categorizing hanoi...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 23
}
].


Agent 2: Understanding code structure...
429 error encountered. Waiting 5 seconds before retrying...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 16
}
].


429 error encountered. Waiting 10 seconds before retrying...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 4
}
].


429 error encountered. Waiting 20 seconds before retrying...
Agent 3: Fixing the code...


Processing Programs:  20%|██        | 8/40 [01:34<10:31, 19.74s/it]

Processing completed for hanoi
Saved fixed code to: fixed_programs/hanoi.py
[hanoi] - SUCCESS

 Starting processing for: is_valid_parenthesization
Agent 1: Fetching and categorizing is_valid_parenthesization...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  22%|██▎       | 9/40 [01:39<07:41, 14.88s/it]

Processing completed for is_valid_parenthesization
Saved fixed code to: fixed_programs/is_valid_parenthesization.py
[is_valid_parenthesization] - SUCCESS

 Starting processing for: kheapsort
Agent 1: Fetching and categorizing kheapsort...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  25%|██▌       | 10/40 [01:47<06:24, 12.83s/it]

Processing completed for kheapsort
Saved fixed code to: fixed_programs/kheapsort.py
[kheapsort] - SUCCESS

 Starting processing for: knapsack
Agent 1: Fetching and categorizing knapsack...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  28%|██▊       | 11/40 [01:56<05:36, 11.62s/it]

Processing completed for knapsack
Saved fixed code to: fixed_programs/knapsack.py
[knapsack] - SUCCESS

 Starting processing for: kth
Agent 1: Fetching and categorizing kth...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  30%|███       | 12/40 [02:03<04:50, 10.36s/it]

Processing completed for kth
Saved fixed code to: fixed_programs/kth.py
[kth] - SUCCESS

 Starting processing for: levenshtein
Agent 1: Fetching and categorizing levenshtein...
Agent 2: Understanding code structure...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 1
}
].


Agent 3: Fixing the code...
429 error encountered. Waiting 5 seconds before retrying...


Processing Programs:  32%|███▎      | 13/40 [02:18<05:16, 11.71s/it]

Processing completed for levenshtein
Saved fixed code to: fixed_programs/levenshtein.py
[levenshtein] - SUCCESS

 Starting processing for: lis
Agent 1: Fetching and categorizing lis...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  35%|███▌      | 14/40 [02:26<04:32, 10.48s/it]

Processing completed for lis
Saved fixed code to: fixed_programs/lis.py
[lis] - SUCCESS

 Starting processing for: longest_common_subsequence
Agent 1: Fetching and categorizing longest_common_subsequence...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  38%|███▊      | 15/40 [02:31<03:45,  9.01s/it]

Processing completed for longest_common_subsequence
Saved fixed code to: fixed_programs/longest_common_subsequence.py
[longest_common_subsequence] - SUCCESS

 Starting processing for: max_sublist_sum
Agent 1: Fetching and categorizing max_sublist_sum...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  40%|████      | 16/40 [02:38<03:16,  8.20s/it]

Processing completed for max_sublist_sum
Saved fixed code to: fixed_programs/max_sublist_sum.py
[max_sublist_sum] - SUCCESS

 Starting processing for: mergesort
Agent 1: Fetching and categorizing mergesort...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  42%|████▎     | 17/40 [02:46<03:07,  8.14s/it]

Processing completed for mergesort
Saved fixed code to: fixed_programs/mergesort.py
[mergesort] - SUCCESS

 Starting processing for: next_palindrome
Agent 1: Fetching and categorizing next_palindrome...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  45%|████▌     | 18/40 [02:52<02:47,  7.60s/it]

Processing completed for next_palindrome
Saved fixed code to: fixed_programs/next_palindrome.py
[next_palindrome] - SUCCESS

 Starting processing for: next_permutation
Agent 1: Fetching and categorizing next_permutation...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 17
}
].


Agent 2: Understanding code structure...
429 error encountered. Waiting 5 seconds before retrying...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 10
}
].


429 error encountered. Waiting 10 seconds before retrying...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 58
}
].


Agent 3: Fixing the code...


Processing Programs:  48%|████▊     | 19/40 [03:21<04:52, 13.93s/it]

Processing completed for next_permutation
Saved fixed code to: fixed_programs/next_permutation.py
[next_permutation] - SUCCESS

 Starting processing for: pascal
Agent 1: Fetching and categorizing pascal...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  50%|█████     | 20/40 [03:29<04:03, 12.19s/it]

Processing completed for pascal
Saved fixed code to: fixed_programs/pascal.py
[pascal] - SUCCESS

 Starting processing for: possible_change
Agent 1: Fetching and categorizing possible_change...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  52%|█████▎    | 21/40 [03:35<03:16, 10.34s/it]

Processing completed for possible_change
Saved fixed code to: fixed_programs/possible_change.py
[possible_change] - SUCCESS

 Starting processing for: powerset
Agent 1: Fetching and categorizing powerset...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  55%|█████▌    | 22/40 [03:41<02:43,  9.06s/it]

Processing completed for powerset
Saved fixed code to: fixed_programs/powerset.py
[powerset] - SUCCESS

 Starting processing for: quicksort
Agent 1: Fetching and categorizing quicksort...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  57%|█████▊    | 23/40 [03:46<02:15,  7.96s/it]

Processing completed for quicksort
Saved fixed code to: fixed_programs/quicksort.py
[quicksort] - SUCCESS

 Starting processing for: reverse_linked_list
Agent 1: Fetching and categorizing reverse_linked_list...
Agent 2: Understanding code structure...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 22
}
].


Agent 3: Fixing the code...
429 error encountered. Waiting 5 seconds before retrying...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 15
}
].


429 error encountered. Waiting 10 seconds before retrying...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 3
}
].


429 error encountered. Waiting 20 seconds before retrying...


Processing Programs:  60%|██████    | 24/40 [04:31<05:05, 19.12s/it]

Processing completed for reverse_linked_list
Saved fixed code to: fixed_programs/reverse_linked_list.py
[reverse_linked_list] - SUCCESS

 Starting processing for: rpn_eval
Agent 1: Fetching and categorizing rpn_eval...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  62%|██████▎   | 25/40 [04:39<03:53, 15.58s/it]

Processing completed for rpn_eval
Saved fixed code to: fixed_programs/rpn_eval.py
[rpn_eval] - SUCCESS

 Starting processing for: shortest_path_length
Agent 1: Fetching and categorizing shortest_path_length...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  65%|██████▌   | 26/40 [04:49<03:16, 14.07s/it]

Processing completed for shortest_path_length
Saved fixed code to: fixed_programs/shortest_path_length.py
[shortest_path_length] - SUCCESS

 Starting processing for: shortest_path_lengths
Agent 1: Fetching and categorizing shortest_path_lengths...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  68%|██████▊   | 27/40 [04:59<02:45, 12.70s/it]

Processing completed for shortest_path_lengths
Saved fixed code to: fixed_programs/shortest_path_lengths.py
[shortest_path_lengths] - SUCCESS

 Starting processing for: sieve
Agent 1: Fetching and categorizing sieve...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  70%|███████   | 28/40 [05:07<02:16, 11.36s/it]

Processing completed for sieve
Saved fixed code to: fixed_programs/sieve.py
[sieve] - SUCCESS

 Starting processing for: sqrt
Agent 1: Fetching and categorizing sqrt...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  72%|███████▎  | 29/40 [05:14<01:49,  9.95s/it]

Processing completed for sqrt
Saved fixed code to: fixed_programs/sqrt.py
[sqrt] - SUCCESS

 Starting processing for: subsequences
Agent 1: Fetching and categorizing subsequences...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
].


Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  75%|███████▌  | 30/40 [05:25<01:45, 10.51s/it]

Processing completed for subsequences
Saved fixed code to: fixed_programs/subsequences.py
[subsequences] - SUCCESS

 Starting processing for: substring
Agent 1: Fetching and categorizing substring...


Processing Programs:  80%|████████  | 32/40 [05:26<00:41,  5.23s/it]

Processing failed for substring: Could not fetch code for substring
[substring] - FAILURE

 Starting processing for: surrogate_count
Agent 1: Fetching and categorizing surrogate_count...
Processing failed for surrogate_count: Could not fetch code for surrogate_count
[surrogate_count] - FAILURE

 Starting processing for: to_base
Agent 1: Fetching and categorizing to_base...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  82%|████████▎ | 33/40 [05:32<00:39,  5.61s/it]

Processing completed for to_base
Saved fixed code to: fixed_programs/to_base.py
[to_base] - SUCCESS

 Starting processing for: topological_ordering
Agent 1: Fetching and categorizing topological_ordering...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  85%|████████▌ | 34/40 [05:41<00:38,  6.45s/it]

Processing completed for topological_ordering
Saved fixed code to: fixed_programs/topological_ordering.py
[topological_ordering] - SUCCESS

 Starting processing for: wrap
Agent 1: Fetching and categorizing wrap...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  88%|████████▊ | 35/40 [05:47<00:31,  6.37s/it]

Processing completed for wrap
Saved fixed code to: fixed_programs/wrap.py
[wrap] - SUCCESS

 Starting processing for: bitcount
Agent 1: Fetching and categorizing bitcount...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  90%|█████████ | 36/40 [05:52<00:24,  6.07s/it]

Processing completed for bitcount
Saved fixed code to: fixed_programs/bitcount.py
[bitcount] - SUCCESS

 Starting processing for: bucketsort
Agent 1: Fetching and categorizing bucketsort...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 18
}
].


429 error encountered. Waiting 5 seconds before retrying...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 11
}
].


429 error encountered. Waiting 10 seconds before retrying...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 59
}
].


429 error encountered. Waiting 20 seconds before retrying...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  92%|█████████▎| 37/40 [06:40<00:55, 18.43s/it]

Processing completed for bucketsort
Saved fixed code to: fixed_programs/bucketsort.py
[bucketsort] - SUCCESS

 Starting processing for: flatten
Agent 1: Fetching and categorizing flatten...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  95%|█████████▌| 38/40 [06:45<00:29, 14.56s/it]

Processing completed for flatten
Saved fixed code to: fixed_programs/flatten.py
[flatten] - SUCCESS

 Starting processing for: shunting_yard
Agent 1: Fetching and categorizing shunting_yard...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs:  98%|█████████▊| 39/40 [07:31<00:24, 24.09s/it]

Processing completed for shunting_yard
Saved fixed code to: fixed_programs/shunting_yard.py
[shunting_yard] - SUCCESS

 Starting processing for: node
Agent 1: Fetching and categorizing node...
Agent 2: Understanding code structure...
Agent 3: Fixing the code...


Processing Programs: 100%|██████████| 40/40 [07:38<00:00, 11.45s/it]

Processing completed for node
Saved fixed code to: fixed_programs/node.py
[node] - SUCCESS
Programs Processed: 40
Successful Fixes: 38 (95.0%)
Failed Fixes: 2





In [20]:
import os
import subprocess
import sys

FIXED_DIR = "fixed_programs"  # or whatever your fixed code directory is

def run_tester_on_all_fixed():
    # List all fixed program files
    for fname in os.listdir(FIXED_DIR):
        if fname.endswith(".py") and not fname.startswith('.'):
            program_name = fname[:-3]  # Remove '.py'
            print(f"Running tester.py on {program_name}...")
            try:
                # Call tester.py with the program name as argument
                result = subprocess.run(
                    [sys.executable, "tester.py", program_name],
                    capture_output=True, text=True, check=True
                )
                print(result.stdout)
            except subprocess.CalledProcessError as e:
                print(f"Tester failed for {program_name}:\n{e.stderr}")

# Call this function after all fixing is complete
run_tester_on_all_fixed()


Running tester.py on sieve...
[[1], []]
Correct Python: []
Bad Python: []
Fixed Python: []
[[2], [2]]
Correct Python: []
Bad Python: []
Fixed Python: [2]
[[4], [2, 3]]
Correct Python: []
Bad Python: []
Fixed Python: [2, 3]
[[7], [2, 3, 5, 7]]
Correct Python: []
Bad Python: []
Fixed Python: [2, 3, 5, 7]
[[20], [2, 3, 5, 7, 11, 13, 17, 19]]
Correct Python: []
Bad Python: []
Fixed Python: [2, 3, 5, 7, 11, 13, 17, 19]
[[50], [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]]
Correct Python: []
Bad Python: []
Fixed Python: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]

Running tester.py on shunting_yard...
[[[]], []]
Correct Python: []
Bad Python: []
Fixed Python: []
[[[30]], [30]]
Correct Python: [30]
Bad Python: [30]
Fixed Python: [30]
[[[10, '-', 5, '-', 2]], [10, 5, '-', 2, '-']]
Correct Python: [10, 5, 2]
Bad Python: [10, 5, 2]
Fixed Python: [10, 5, '-', 2, '-']
[[[34, '-', 12, '/', 5]], [34, 12, 5, '/', '-']]
Correct Python: [34, 12, 5]
Bad Python: [34, 12, 5]
Fixed 