From bce4249f3617c995edf804f0f56a2f747e54e555 Mon Sep 17 00:00:00 2001
From: Vishal Shenoy <vishal.nshenoy@icloud.com>
Date: Tue, 11 Feb 2025 10:39:02 -0800
Subject: [PATCH 1/3] .

---
 src/codegen/extensions/langchain/agent.py |   4 +-
 src/codegen/extensions/langchain/tools.py |  21 +--
 tinygen.py                                | 213 ++++++++++++++++++++++
 3 files changed, 226 insertions(+), 12 deletions(-)
 create mode 100644 tinygen.py

diff --git a/src/codegen/extensions/langchain/agent.py b/src/codegen/extensions/langchain/agent.py
index 458903c24..57a9b146f 100644
--- a/src/codegen/extensions/langchain/agent.py
+++ b/src/codegen/extensions/langchain/agent.py
@@ -3,7 +3,7 @@
 from langchain import hub
 from langchain.agents import AgentExecutor
 from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
-from langchain_core.chat_history import ChatMessageHistory
+from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain_core.runnables.history import RunnableWithMessageHistory
 from langchain_openai import ChatOpenAI
 
@@ -21,6 +21,7 @@
     SearchTool,
     SemanticEditTool,
     ViewFileTool,
+    SemanticSearchTool,
 )
 
 
@@ -59,6 +60,7 @@ def create_codebase_agent(
         MoveSymbolTool(codebase),
         RevealSymbolTool(codebase),
         SemanticEditTool(codebase),
+        SemanticSearchTool(codebase),
         CommitTool(codebase),
     ]
 
diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py
index fcfcd2997..1b9d4c752 100644
--- a/src/codegen/extensions/langchain/tools.py
+++ b/src/codegen/extensions/langchain/tools.py
@@ -1,7 +1,7 @@
 """Langchain tools for workspace operations."""
 
 import json
-from typing import ClassVar, Literal, Optional
+from typing import ClassVar, Literal, Optional, Type
 
 from langchain.tools import BaseTool
 from pydantic import BaseModel, Field
@@ -233,6 +233,8 @@ def new_function():
     )
 
 
+
+
 class SemanticEditTool(BaseTool):
     """Tool for semantic editing of files."""
 
@@ -312,20 +314,17 @@ def _run(
         return json.dumps(result, indent=2)
 
 
+class SemanticSearchInput(BaseModel):
+    query: str = Field(..., description="The natural language search query")
+    k: int = Field(default=5, description="Number of results to return")
+    preview_length: int = Field(default=200, description="Length of content preview in characters")
+
 class SemanticSearchTool(BaseTool):
     """Tool for semantic code search."""
 
     name: ClassVar[str] = "semantic_search"
     description: ClassVar[str] = "Search the codebase using natural language queries and semantic similarity"
-    args_schema: ClassVar[type[BaseModel]] = type(
-        "SemanticSearchInput",
-        (BaseModel,),
-        {
-            "query": (str, Field(..., description="The natural language search query")),
-            "k": (int, Field(default=5, description="Number of results to return")),
-            "preview_length": (int, Field(default=200, description="Length of content preview in characters")),
-        },
-    )
+    args_schema: ClassVar[Type[BaseModel]] = SemanticSearchInput
     codebase: Codebase = Field(exclude=True)
 
     def __init__(self, codebase: Codebase) -> None:
@@ -333,4 +332,4 @@ def __init__(self, codebase: Codebase) -> None:
 
     def _run(self, query: str, k: int = 5, preview_length: int = 200) -> str:
         result = semantic_search(self.codebase, query, k=k, preview_length=preview_length)
-        return json.dumps(result, indent=2)
+        return json.dumps(result, indent=2)
\ No newline at end of file
diff --git a/tinygen.py b/tinygen.py
new file mode 100644
index 000000000..40fe5da56
--- /dev/null
+++ b/tinygen.py
@@ -0,0 +1,213 @@
+from typing import List, Tuple
+from codegen import Codebase
+from codegen.extensions.vector_index import VectorIndex
+from codegen.extensions.langchain.agent import Agent, create_codebase_agent
+import shutil
+import tempfile
+import git
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def setup_vector_search(repo_path: str) -> VectorIndex:
+    """Initialize and create vector index for a codebase."""
+    # Initialize codebase
+    codebase = Codebase(repo_path)
+
+    # Create vector index
+    index = VectorIndex(codebase)
+
+    # Try to load existing index, create if not found
+    try:
+        index.load()
+        print("✓ Loaded existing vector index")
+    except FileNotFoundError:
+        print("⚡ Creating new vector index...")
+        index.create()
+        index.save()
+        print("✓ Created and saved vector index")
+
+    return index
+
+
+def find_relevant_files(
+    index: VectorIndex, query: str, k: int = 10, min_similarity: float = 0.1
+) -> List[Tuple[str, float, str]]:
+    """Find most relevant files for a query with previews."""
+    # Perform semantic search
+    results = index.similarity_search(query, k=k)
+
+    relevant_files = []
+    for filepath, similarity in results:
+        print(filepath, similarity)
+        # Skip if similarity is too low
+        if similarity < min_similarity:
+            continue
+
+        # Get file content preview
+        try:
+            file = index.codebase.get_file(filepath)
+            preview = file.content[:200].replace("\n", " ").strip()
+            if len(file.content) > 200:
+                preview += "..."
+
+            relevant_files.append((filepath, similarity, preview))
+        except Exception as e:
+            print(f"Warning: Could not read file {filepath}: {e}")
+
+    return relevant_files
+
+
+def clone_repo(repo_url: str) -> str:
+    temp_dir = tempfile.mkdtemp()
+    try:
+        print(f"Cloning repository from {repo_url} to {temp_dir}")
+        git.Repo.clone_from(repo_url, temp_dir)
+    except git.exc.GitError as e:
+        shutil.rmtree(temp_dir)
+        raise ValueError(f"Failed to clone repository: {e}")
+    return temp_dir
+
+
+def process_files_with_agent(
+    codebase: Codebase,
+    files_to_process: List[Tuple[str, float, str]],
+    query: str,
+    model_name: str = "gpt-4",
+    temperature: float = 0,
+) -> None:
+    """Process relevant files using the LangChain agent.
+
+    Args:
+        codebase: The codebase object containing the files
+        files_to_process: List of tuples containing (filepath, similarity, preview)
+        query: The original query/prompt describing the changes to make
+        model_name: Name of the model to use
+        temperature: Model temperature
+    """
+    # Create the agent with the codebase tools
+    print("\nInitializing AI agent...")
+    agent = create_codebase_agent(
+        codebase=codebase,
+        model_name=model_name,
+        temperature=temperature,
+        verbose=True  # Enable verbose mode to see agent's thought process
+    )
+
+    print("\nProcessing files with AI agent...")
+    print("=" * 80)
+
+    modifications_made = False
+
+    # Process each file
+    for filepath, similarity, preview in files_to_process:
+        if not codebase.has_file(filepath):
+            print(f"⚠️ File not found: {filepath}")
+            continue
+
+        print(f"\nProcessing file: {filepath}")
+        print(f"Similarity score: {similarity:.2f}")
+        print(f"Preview: {preview[:100]}...")
+
+        # Create a specific prompt for this file
+        file_prompt = f"""
+Analyze and modify the file {filepath} based on this request: "{query}"
+
+Follow these steps:
+1. First use the view_file tool to see the current content
+2. Analyze the changes needed based on the request
+3. Use the edit_file tool to make the necessary modifications
+4. Explain what changes were made and why
+
+Guidelines:
+- Preserve the overall structure and functionality
+- Only make changes that align with the request
+- Ensure code quality and consistency
+"""
+
+        try:
+            # Invoke the agent with the file-specific prompt
+            result = agent.invoke(
+                {
+                    "input": file_prompt,
+                    "config": {"configurable": {"session_id": filepath}},
+                }
+            )
+
+            print(f"✅ Processed {filepath}")
+            print(f"Agent output: {result['output']}")
+            print("-" * 80)
+            modifications_made = True
+
+        except Exception as e:
+            print(f"❌ Error processing {filepath}: {str(e)}")
+            print("-" * 80)
+
+    # Only commit if changes were made
+    if modifications_made:
+        print("\nCommitting changes to disk...")
+        try:
+            codebase.commit()
+            print("✅ Changes committed successfully")
+        except Exception as e:
+            print(f"❌ Error committing changes: {str(e)}")
+    else:
+        print("\nℹ️ No changes were made to commit")
+
+
+def main():
+    # Example usage
+    repo_url = "https://github.com/Textualize/rich"
+    query = "Delete dead code"
+
+    repo_path = clone_repo(repo_url)
+
+    # Setup vector search
+    index = setup_vector_search(repo_path)
+
+    # Find relevant files
+    print("\nSearching for relevant files...")
+    results = find_relevant_files(index, query)
+
+    # Display results
+    print("\nMost relevant files:")
+    print("-" * 80)
+    for filepath, similarity, preview in results:
+        print(f"\n📄 {filepath}")
+        print(f"Similarity: {similarity:.2f}")
+        print(f"Preview: {preview}")
+        print("-" * 80)
+
+    # Store original file contents for diff
+    original_contents = {}
+    for filepath, _, _ in results:
+        if index.codebase.has_file(filepath):
+            original_contents[filepath] = index.codebase.get_file(filepath).content
+
+    # Process files with AI agent
+    process_files_with_agent(index.codebase, results, query)
+
+    # Print diffs for modified files
+    print("\nFile modifications:")
+    print("=" * 80)
+    for filepath in original_contents:
+        if index.codebase.has_file(filepath):
+            new_content = index.codebase.get_file(filepath).content
+            if new_content != original_contents[filepath]:
+                from difflib import unified_diff
+
+                diff = unified_diff(
+                    original_contents[filepath].splitlines(keepends=True),
+                    new_content.splitlines(keepends=True),
+                    fromfile=f"a/{filepath}",
+                    tofile=f"b/{filepath}",
+                )
+                print(f"\nChanges in {filepath}:")
+                print("".join(diff))
+            else:
+                print(f"\nNo changes made to {filepath}")
+
+
+if __name__ == "__main__":
+    main()

From 6f9c9958a8414a5a3733d29859d7bfdcfac602c0 Mon Sep 17 00:00:00 2001
From: Vishal Shenoy <vishal.nshenoy@icloud.com>
Date: Tue, 11 Feb 2025 11:32:49 -0800
Subject: [PATCH 2/3] delete my tinygen

---
 tinygen.py | 213 -----------------------------------------------------
 1 file changed, 213 deletions(-)
 delete mode 100644 tinygen.py

diff --git a/tinygen.py b/tinygen.py
deleted file mode 100644
index 40fe5da56..000000000
--- a/tinygen.py
+++ /dev/null
@@ -1,213 +0,0 @@
-from typing import List, Tuple
-from codegen import Codebase
-from codegen.extensions.vector_index import VectorIndex
-from codegen.extensions.langchain.agent import Agent, create_codebase_agent
-import shutil
-import tempfile
-import git
-from dotenv import load_dotenv
-
-load_dotenv()
-
-
-def setup_vector_search(repo_path: str) -> VectorIndex:
-    """Initialize and create vector index for a codebase."""
-    # Initialize codebase
-    codebase = Codebase(repo_path)
-
-    # Create vector index
-    index = VectorIndex(codebase)
-
-    # Try to load existing index, create if not found
-    try:
-        index.load()
-        print("✓ Loaded existing vector index")
-    except FileNotFoundError:
-        print("⚡ Creating new vector index...")
-        index.create()
-        index.save()
-        print("✓ Created and saved vector index")
-
-    return index
-
-
-def find_relevant_files(
-    index: VectorIndex, query: str, k: int = 10, min_similarity: float = 0.1
-) -> List[Tuple[str, float, str]]:
-    """Find most relevant files for a query with previews."""
-    # Perform semantic search
-    results = index.similarity_search(query, k=k)
-
-    relevant_files = []
-    for filepath, similarity in results:
-        print(filepath, similarity)
-        # Skip if similarity is too low
-        if similarity < min_similarity:
-            continue
-
-        # Get file content preview
-        try:
-            file = index.codebase.get_file(filepath)
-            preview = file.content[:200].replace("\n", " ").strip()
-            if len(file.content) > 200:
-                preview += "..."
-
-            relevant_files.append((filepath, similarity, preview))
-        except Exception as e:
-            print(f"Warning: Could not read file {filepath}: {e}")
-
-    return relevant_files
-
-
-def clone_repo(repo_url: str) -> str:
-    temp_dir = tempfile.mkdtemp()
-    try:
-        print(f"Cloning repository from {repo_url} to {temp_dir}")
-        git.Repo.clone_from(repo_url, temp_dir)
-    except git.exc.GitError as e:
-        shutil.rmtree(temp_dir)
-        raise ValueError(f"Failed to clone repository: {e}")
-    return temp_dir
-
-
-def process_files_with_agent(
-    codebase: Codebase,
-    files_to_process: List[Tuple[str, float, str]],
-    query: str,
-    model_name: str = "gpt-4",
-    temperature: float = 0,
-) -> None:
-    """Process relevant files using the LangChain agent.
-
-    Args:
-        codebase: The codebase object containing the files
-        files_to_process: List of tuples containing (filepath, similarity, preview)
-        query: The original query/prompt describing the changes to make
-        model_name: Name of the model to use
-        temperature: Model temperature
-    """
-    # Create the agent with the codebase tools
-    print("\nInitializing AI agent...")
-    agent = create_codebase_agent(
-        codebase=codebase,
-        model_name=model_name,
-        temperature=temperature,
-        verbose=True  # Enable verbose mode to see agent's thought process
-    )
-
-    print("\nProcessing files with AI agent...")
-    print("=" * 80)
-
-    modifications_made = False
-
-    # Process each file
-    for filepath, similarity, preview in files_to_process:
-        if not codebase.has_file(filepath):
-            print(f"⚠️ File not found: {filepath}")
-            continue
-
-        print(f"\nProcessing file: {filepath}")
-        print(f"Similarity score: {similarity:.2f}")
-        print(f"Preview: {preview[:100]}...")
-
-        # Create a specific prompt for this file
-        file_prompt = f"""
-Analyze and modify the file {filepath} based on this request: "{query}"
-
-Follow these steps:
-1. First use the view_file tool to see the current content
-2. Analyze the changes needed based on the request
-3. Use the edit_file tool to make the necessary modifications
-4. Explain what changes were made and why
-
-Guidelines:
-- Preserve the overall structure and functionality
-- Only make changes that align with the request
-- Ensure code quality and consistency
-"""
-
-        try:
-            # Invoke the agent with the file-specific prompt
-            result = agent.invoke(
-                {
-                    "input": file_prompt,
-                    "config": {"configurable": {"session_id": filepath}},
-                }
-            )
-
-            print(f"✅ Processed {filepath}")
-            print(f"Agent output: {result['output']}")
-            print("-" * 80)
-            modifications_made = True
-
-        except Exception as e:
-            print(f"❌ Error processing {filepath}: {str(e)}")
-            print("-" * 80)
-
-    # Only commit if changes were made
-    if modifications_made:
-        print("\nCommitting changes to disk...")
-        try:
-            codebase.commit()
-            print("✅ Changes committed successfully")
-        except Exception as e:
-            print(f"❌ Error committing changes: {str(e)}")
-    else:
-        print("\nℹ️ No changes were made to commit")
-
-
-def main():
-    # Example usage
-    repo_url = "https://github.com/Textualize/rich"
-    query = "Delete dead code"
-
-    repo_path = clone_repo(repo_url)
-
-    # Setup vector search
-    index = setup_vector_search(repo_path)
-
-    # Find relevant files
-    print("\nSearching for relevant files...")
-    results = find_relevant_files(index, query)
-
-    # Display results
-    print("\nMost relevant files:")
-    print("-" * 80)
-    for filepath, similarity, preview in results:
-        print(f"\n📄 {filepath}")
-        print(f"Similarity: {similarity:.2f}")
-        print(f"Preview: {preview}")
-        print("-" * 80)
-
-    # Store original file contents for diff
-    original_contents = {}
-    for filepath, _, _ in results:
-        if index.codebase.has_file(filepath):
-            original_contents[filepath] = index.codebase.get_file(filepath).content
-
-    # Process files with AI agent
-    process_files_with_agent(index.codebase, results, query)
-
-    # Print diffs for modified files
-    print("\nFile modifications:")
-    print("=" * 80)
-    for filepath in original_contents:
-        if index.codebase.has_file(filepath):
-            new_content = index.codebase.get_file(filepath).content
-            if new_content != original_contents[filepath]:
-                from difflib import unified_diff
-
-                diff = unified_diff(
-                    original_contents[filepath].splitlines(keepends=True),
-                    new_content.splitlines(keepends=True),
-                    fromfile=f"a/{filepath}",
-                    tofile=f"b/{filepath}",
-                )
-                print(f"\nChanges in {filepath}:")
-                print("".join(diff))
-            else:
-                print(f"\nNo changes made to {filepath}")
-
-
-if __name__ == "__main__":
-    main()

From 7ec8c50aab4b6f59e31a86704ba8db1590fda844 Mon Sep 17 00:00:00 2001
From: vishalshenoy <34020235+vishalshenoy@users.noreply.github.com>
Date: Tue, 11 Feb 2025 19:35:35 +0000
Subject: [PATCH 3/3] Automated pre-commit update

---
 src/codegen/extensions/langchain/agent.py | 2 +-
 src/codegen/extensions/langchain/tools.py | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/codegen/extensions/langchain/agent.py b/src/codegen/extensions/langchain/agent.py
index 57a9b146f..dc1a1b47d 100644
--- a/src/codegen/extensions/langchain/agent.py
+++ b/src/codegen/extensions/langchain/agent.py
@@ -20,8 +20,8 @@
     RevealSymbolTool,
     SearchTool,
     SemanticEditTool,
-    ViewFileTool,
     SemanticSearchTool,
+    ViewFileTool,
 )
 
 
diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py
index 1b9d4c752..f00b193a3 100644
--- a/src/codegen/extensions/langchain/tools.py
+++ b/src/codegen/extensions/langchain/tools.py
@@ -1,7 +1,7 @@
 """Langchain tools for workspace operations."""
 
 import json
-from typing import ClassVar, Literal, Optional, Type
+from typing import ClassVar, Literal, Optional
 
 from langchain.tools import BaseTool
 from pydantic import BaseModel, Field
@@ -233,8 +233,6 @@ def new_function():
     )
 
 
-
-
 class SemanticEditTool(BaseTool):
     """Tool for semantic editing of files."""
 
@@ -319,12 +317,13 @@ class SemanticSearchInput(BaseModel):
     k: int = Field(default=5, description="Number of results to return")
     preview_length: int = Field(default=200, description="Length of content preview in characters")
 
+
 class SemanticSearchTool(BaseTool):
     """Tool for semantic code search."""
 
     name: ClassVar[str] = "semantic_search"
     description: ClassVar[str] = "Search the codebase using natural language queries and semantic similarity"
-    args_schema: ClassVar[Type[BaseModel]] = SemanticSearchInput
+    args_schema: ClassVar[type[BaseModel]] = SemanticSearchInput
     codebase: Codebase = Field(exclude=True)
 
     def __init__(self, codebase: Codebase) -> None:
@@ -332,4 +331,4 @@ def __init__(self, codebase: Codebase) -> None:
 
     def _run(self, query: str, k: int = 5, preview_length: int = 200) -> str:
         result = semantic_search(self.codebase, query, k=k, preview_length=preview_length)
-        return json.dumps(result, indent=2)
\ No newline at end of file
+        return json.dumps(result, indent=2)