In [1]:
def add_line_numbers(input_path, output_path=None):
    with open(input_path, 'r') as file:
        lines = file.readlines()

    numbered_lines = [f"{i+1:4}: {line}" for i, line in enumerate(lines)]

   
    with open(output_path, 'w') as file:
        file.writelines(numbered_lines)
    

add_line_numbers("commit.py", "numbered_output.py")


In [3]:
from typing import Annotated, List, Dict
from langchain_core.tools import tool

@tool
def insert_code_at_line(
    file_path: Annotated[str, "Path to the Python file to insert code into."],
    input_dict: Annotated[
        Dict[str, Annotated[object, "Contains 'start' (int) and 'code' (list of strings)."]],
        "Dictionary with 'start' (1-based line index) and 'code' (lines to insert)."
    ]
) -> str:
    """
    Inserts a list of code lines into a file at a specified line number.

    The inserted block is automatically wrapped with:
        - '# Added by AI' at the beginning
        - '# !!!' at the end

    Args:
        file_path (str): Path to the file being modified.
        input_dict (dict): A dictionary with:
            - 'start' (int): 1-based line number where code is inserted.
            - 'code' (List[str]): Code lines to insert.

    Returns:
        str: Status message indicating where the code was inserted.
    """
    try:
        start = input_dict["start"]
        new_code = input_dict["code"]
        if not isinstance(start, int) or not isinstance(new_code, list):
            return "Invalid input_dict format. 'start' should be int, 'code' should be a list of strings."
    except (KeyError, TypeError):
        return "Error: input_dict must contain keys 'start' (int) and 'code' (list of strings)."

    new_code = ["# Added by AI"] + new_code + ["# !!!"]

    try:
        with open(file_path, 'r') as f:
            lines = f.readlines()
    except FileNotFoundError:
        return f"Error: File not found - {file_path}"
    except Exception as e:
        return f"Error reading file: {e}"

    if start > len(lines):
        lines.extend(['\n'] * (start - len(lines)))

    for i, line in enumerate(new_code):
        lines.insert(start - 1 + i, line + '\n')

    try:
        with open(file_path, 'w') as f:
            f.writelines(lines)
    except Exception as e:
        return f"Error writing to file: {e}"

    return f"Inserted code at line {start} in {file_path}."


In [4]:
from typing import Annotated


@tool
def delete_lines(
    file_path: Annotated[str, "Path to the file from which lines will be deleted."],
    start_line: Annotated[int, "The starting line number (1-based, inclusive)."],
    end_line: Annotated[int, "The ending line number (1-based, exclusive)."]
) -> str:
    """
    Deletes a range of lines from a file, specified by line numbers.

    The function modifies the file in-place, removing lines from start_line (inclusive)
    up to end_line (exclusive). Line numbers are 1-based.

    Args:
        file_path (str): Path to the file to edit.
        start_line (int): 1-based index of the first line to delete.
        end_line (int): 1-based index of the line after the last one to delete.

    Returns:
        str: A message indicating the outcome of the operation.
    """
    try:
        with open(file_path, 'r') as f:
            lines = f.readlines()
    except FileNotFoundError:
        return f"Error: File not found - {file_path}"
    except Exception as e:
        return f"Error reading file: {e}"

    start_idx = max(start_line - 1, 0)
    end_idx = min(end_line, len(lines))

    if start_idx >= end_idx:
        return "Invalid range: no lines deleted."

    del lines[start_idx:end_idx]

    try:
        with open(file_path, 'w') as f:
            f.writelines(lines)
    except Exception as e:
        return f"Error writing to file: {e}"

    return f"Deleted lines {start_line} to {end_line - 1} from {file_path}."


In [5]:
import re

@tool
def generate_code_skeleton(
    file_path: Annotated[str, "Path to the Python file whose skeleton is to be extracted."]
) -> str:
    """
    Extracts a readable skeleton from a Python source file by identifying:
      - Top-level imports
      - Class definitions
      - Function definitions (including async functions)
      - AI-injected blocks demarcated by '# Added by AI' and ending at '# !!!'

    Returns a string containing these elements, each prefixed with their original line numbers.

    Args:
        file_path (str): Path to the Python source file.

    Returns:
        str: A multi-line string showing the skeleton of the file with line numbers.
    """
    skeleton_lines = []
    inside_ai_block = False

    try:
        with open(file_path, 'r') as f:
            lines = f.readlines()
    except FileNotFoundError:
        return f"Error: File not found - {file_path}"
    except Exception as e:
        return f"Error while reading file: {e}"

    for idx, line in enumerate(lines, start=1):
        stripped = line.strip()

        # Basic structure: imports, class, def
        if (
            stripped.startswith("import") or
            stripped.startswith("from") or
            stripped.startswith("class ") or
            re.match(r"(async\s+)?def\s+\w+\s*\(", stripped)
        ):
            skeleton_lines.append(f"{idx:4}: {line.rstrip()}")

        # Detect AI-generated blocks
        if "# Added by AI" in stripped:
            inside_ai_block = True
            skeleton_lines.append(f"{idx:4}: {line.rstrip()}")
            continue

        if inside_ai_block:
            skeleton_lines.append(f"{idx:4}: {line.rstrip()}")
            if "# !!!" in stripped:
                inside_ai_block = False

    return "\n".join(skeleton_lines) if skeleton_lines else "No skeleton elements found."


In [16]:
print(generate_code_skeleton("commit.py"))

  10: # Added by AI  11: def foo():  11: def foo():  12:     print('Hello')  13:     return  14: # !!!  16: import git  17: import sys  18: import networkx as nx  19: import pickle  20: import re  21: import os  22: from utils import utils  23: from utils.embed_skeleton import get_skeleton  24: import ast  25: import faiss  26: from dotenv import load_dotenv  27: from langchain.embeddings import OpenAIEmbeddings  28: from langchain_community.docstore.in_memory import InMemoryDocstore  29: from langchain_community.vectorstores import FAISS  30: from langchain_core.documents import Document  33: def neighbors_by_relation(G, node, relation_type):  45: def find_function_or_class(path, name, graph):  52: def filter_import_lines(code_lines):  58:     import_pattern = re.compile(r'^\s*(import\s+\w|from\s+\w+(\.\w+)*\s+import\s+)')  61: def extract_imports(base, file_path, graph):  67:     imports = set()  82:                         imports.add(base + node.module.replace(".", "/")+ "/" + alia

In [6]:
@tool
def view_code_block(
    file_path: Annotated[str, "Path to the file to view a block from."],
    start_line: Annotated[int, "1-based starting line number (inclusive)."],
    end_line: Annotated[int, "1-based ending line number (inclusive)."]
) -> str:
    """
    Returns a specific block of lines from a file, between start_line and end_line (both inclusive).

    This is useful for inspecting a portion of code without loading the entire file content,
    especially within code manipulation agents.

    Args:
        file_path (str): The path to the file.
        start_line (int): 1-based line number to start viewing from (inclusive).
        end_line (int): 1-based line number to end viewing at (inclusive).

    Returns:
        str: The extracted code block as a string, or an error message if inputs are invalid.
    """
    if start_line > end_line:
        return "Error: start_line must be less than or equal to end_line."

    try:
        with open(file_path, 'r') as f:
            lines = f.readlines()
    except FileNotFoundError:
        return f"Error: File not found - {file_path}"
    except Exception as e:
        return f"Error reading file: {e}"

    if start_line < 1 or end_line > len(lines):
        return f"Error: Line numbers out of bounds (file has {len(lines)} lines)."

    block = lines[start_line - 1:end_line]
    return ''.join(block)


In [None]:

from typing_extensions import TypedDict
from typing import Any, List, Dict
class AgentState(TypedDict):
    """
    Represents the state of an agent, including its name and the current file being edited.
    """
    current_file: str
    candidates: List[Dict[str, Any]]
    instructions: str
    

In [None]:
from typing import Annotated, Dict, Any
from langgraph.graph import State


@tool
def get_next_candidate(
    state: AgentState
) -> AgentState:
    
    try:
        candidates = state.get("candidates", [])
        if not isinstance(candidates, list) or len(candidates) == 0:
            return {"error": "State must contain a non-empty 'candidates' list."}

        top = max(candidates, key=lambda c: float(c.get("score", float("-inf"))))
        return top

    except Exception as e:
        return {"error": f"Failed to select top candidate: {str(e)}"}


In [19]:
print(extract_code_block_as_string("commit.py", 10, 12))

# Added by AI
def foo():
    print('Hello')



In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.callbacks import get_openai_callback
from langchain.utils.openai_functions import convert_pydantic_to_openai_function
from langchain.agents import tool
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
from datasets import load_dataset


llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.1,
    #max_retries=2,
)

dataset = load_dataset("lahirum/SWE_Experimental", split="train")